diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision')
28 files changed, 13036 insertions, 13581 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h index 872f039506..27835bb747 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h @@ -5,14 +5,13 @@  struct b3BvhInfo  { -	b3Vector3	m_aabbMin; -	b3Vector3	m_aabbMax; -	b3Vector3	m_quantization; -	int			m_numNodes; -	int			m_numSubTrees; -	int			m_nodeOffset; -	int			m_subTreeOffset; - +	b3Vector3 m_aabbMin; +	b3Vector3 m_aabbMax; +	b3Vector3 m_quantization; +	int m_numNodes; +	int m_numSubTrees; +	int m_nodeOffset; +	int m_subTreeOffset;  }; -#endif //B3_BVH_INFO_H
\ No newline at end of file +#endif  //B3_BVH_INFO_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp index cb30ee939b..4db717f8c3 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp @@ -15,7 +15,6 @@ subject to the following restrictions:  3. This notice may not be removed or altered from any source distribution.  */ -  #include "b3ContactCache.h"  #include "Bullet3Common/b3Transform.h" @@ -69,7 +68,7 @@ int b3ContactCache::sortCachedPoints(const b3Vector3& pt)  				maxPenetration = m_pointCache[i].getDistance();  			}  		} -#endif //KEEP_DEEPEST_POINT +#endif  //KEEP_DEEPEST_POINT  		b3Scalar res0(b3Scalar(0.)),res1(b3Scalar(0.)),res2(b3Scalar(0.)),res3(b3Scalar(0.)); @@ -251,8 +250,4 @@ void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transfo  } - - - -  #endif diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h index d6c9b0a07e..a15fd0b2a9 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h @@ -17,17 +17,13 @@ subject to the following restrictions:  #ifndef B3_CONTACT_CACHE_H  #define B3_CONTACT_CACHE_H -  #include "Bullet3Common/b3Vector3.h"  #include "Bullet3Common/b3Transform.h"  #include "Bullet3Common/b3AlignedAllocator.h" -  ///maximum contact breaking and merging threshold  extern b3Scalar gContactBreakingThreshold; - -  #define MANIFOLD_CACHE_SIZE 4  ///b3ContactCache is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase. @@ -37,24 +33,16 @@ extern b3Scalar gContactBreakingThreshold;  ///reduces the cache to 4 points, when more then 4 points are added, using following rules:  ///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points  ///note that some pairs of objects might have more then one contact manifold. -B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache +B3_ATTRIBUTE_ALIGNED16(class) +b3ContactCache  { - -	 - -	  	/// sort cached points so most isolated points come first -	int	sortCachedPoints(const b3Vector3& pt); - -	 +	int sortCachedPoints(const b3Vector3& pt);  public: -  	B3_DECLARE_ALIGNED_ALLOCATOR(); -	 -	 -	int addManifoldPoint( const b3Vector3& newPoint); +	int addManifoldPoint(const b3Vector3& newPoint);  	/*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex)  	{ @@ -63,18 +51,12 @@ public:  	}  	*/ - -	  	static bool validContactDistance(const b3Vector3& pt); -	 -	/// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin -	static void	refreshContactPoints(  const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& newContactCache); -	static void removeContactPoint(struct b3Contact4Data& newContactCache,int i); -	 +	/// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin +	static void refreshContactPoints(const b3Transform& trA, const b3Transform& trB, struct b3Contact4Data& newContactCache); +	static void removeContactPoint(struct b3Contact4Data & newContactCache, int i);  }; - - -#endif //B3_CONTACT_CACHE_H +#endif  //B3_CONTACT_CACHE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index fb435aa7fd..54a104c5c8 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -16,19 +16,18 @@ subject to the following restrictions:  bool findSeparatingAxisOnGpu = true;  bool splitSearchSepAxisConcave = false;  bool splitSearchSepAxisConvex = true; -bool useMprGpu = true;//use mpr for edge-edge  (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... +bool useMprGpu = true;  //use mpr for edge-edge  (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling...  bool bvhTraversalKernelGPU = true;  bool findConcaveSeparatingAxisKernelGPU = true; -bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; -bool clipConvexFacesAndFindContactsCPU = false;//false;//true; -bool reduceConcaveContactsOnGPU = true;//false; -bool reduceConvexContactsOnGPU = true;//false; +bool clipConcaveFacesAndFindContactsCPU = false;  //false;//true; +bool clipConvexFacesAndFindContactsCPU = false;   //false;//true; +bool reduceConcaveContactsOnGPU = true;           //false; +bool reduceConvexContactsOnGPU = true;            //false;  bool findConvexClippingFacesGPU = true; -bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false -bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false +bool useGjk = false;          ///option for CPU/host testing, when findSeparatingAxisOnGpu = false +bool useGjkContacts = false;  //////option for CPU/host testing when findSeparatingAxisOnGpu = false - -static int myframecount=0;///for testing +static int myframecount = 0;  ///for testing  ///This file was written by Erwin Coumans  ///Separating axis rest based on work from Pierre Terdiman, see @@ -42,10 +41,10 @@ static int myframecount=0;///for testing  //#define PERSISTENT_CONTACTS_HOST  #endif -int b3g_actualSATPairTests=0; +int b3g_actualSATPairTests = 0;  #include "b3ConvexHullContact.h" -#include <string.h>//memcpy +#include <string.h>  //memcpy  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" @@ -54,8 +53,7 @@ int b3g_actualSATPairTests=0;  typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; - -#include <float.h> //for FLT_MAX +#include <float.h>  //for FLT_MAX  #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"  #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"  //#include "AdlQuaternion.h" @@ -69,7 +67,6 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;  #include "kernels/bvhTraversal.h"  #include "kernels/primitiveContacts.h" -  #include "Bullet3Geometry/b3AabbUtil.h"  #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" @@ -77,12 +74,10 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;  #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" -  #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl"  #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl"  #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" -  #ifndef __global  #define __global  #endif @@ -91,204 +86,184 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray;  #define __kernel  #endif -  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" - -  #define dot3F4 b3Dot -GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue  q ) -:m_context(ctx), -m_device(device), -m_queue(q), +GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q) +	: m_context(ctx), +	  m_device(device), +	  m_queue(q), -m_findSeparatingAxisKernel(0), -m_findSeparatingAxisVertexFaceKernel(0), -m_findSeparatingAxisEdgeEdgeKernel(0), -m_unitSphereDirections(m_context,m_queue), +	  m_findSeparatingAxisKernel(0), +	  m_findSeparatingAxisVertexFaceKernel(0), +	  m_findSeparatingAxisEdgeEdgeKernel(0), +	  m_unitSphereDirections(m_context, m_queue), -m_totalContactsOut(m_context, m_queue), -m_sepNormals(m_context, m_queue), -m_dmins(m_context,m_queue), +	  m_totalContactsOut(m_context, m_queue), +	  m_sepNormals(m_context, m_queue), +	  m_dmins(m_context, m_queue), -m_hasSeparatingNormals(m_context, m_queue), -m_concaveSepNormals(m_context, m_queue), -m_concaveHasSeparatingNormals(m_context,m_queue), -m_numConcavePairsOut(m_context, m_queue), +	  m_hasSeparatingNormals(m_context, m_queue), +	  m_concaveSepNormals(m_context, m_queue), +	  m_concaveHasSeparatingNormals(m_context, m_queue), +	  m_numConcavePairsOut(m_context, m_queue), +	  m_gpuCompoundPairs(m_context, m_queue), -m_gpuCompoundPairs(m_context, m_queue), +	  m_gpuCompoundSepNormals(m_context, m_queue), +	  m_gpuHasCompoundSepNormals(m_context, m_queue), - -m_gpuCompoundSepNormals(m_context, m_queue), -m_gpuHasCompoundSepNormals(m_context, m_queue), - -m_numCompoundPairsOut(m_context, m_queue) +	  m_numCompoundPairsOut(m_context, m_queue)  {  	m_totalContactsOut.push_back(0); -	 -	cl_int errNum=0; + +	cl_int errNum = 0;  	if (1)  	{  		const char* mprSrc = mprKernelsCL; -		 +  		const char* srcConcave = satConcaveKernelsCL; -		char flags[1024]={0}; -//#ifdef CL_PLATFORM_INTEL -//		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); -//#endif -		m_mprPenetrationKernel  = 0; +		char flags[1024] = {0}; +		//#ifdef CL_PLATFORM_INTEL +		//		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); +		//#endif +		m_mprPenetrationKernel = 0;  		m_findSeparatingAxisUnitSphereKernel = 0;  		if (useMprGpu)  		{ -			cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); -			b3Assert(errNum==CL_SUCCESS); -		 -			m_mprPenetrationKernel  = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); +			cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH); +			b3Assert(errNum == CL_SUCCESS); + +			m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg);  			b3Assert(m_mprPenetrationKernel); -			b3Assert(errNum==CL_SUCCESS); +			b3Assert(errNum == CL_SUCCESS); -			m_findSeparatingAxisUnitSphereKernel =  b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); +			m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg);  			b3Assert(m_findSeparatingAxisUnitSphereKernel); -            b3Assert(errNum==CL_SUCCESS); +			b3Assert(errNum == CL_SUCCESS); - -			int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); +			int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3);  			m_unitSphereDirections.resize(numDirections); -			m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); - - +			m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true);  		} +		cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH); +		b3Assert(errNum == CL_SUCCESS); -		cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,satKernelsCL,&errNum,flags,BT_NARROWPHASE_SAT_PATH); -		b3Assert(errNum==CL_SUCCESS); - -		cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcConcave,&errNum,flags,BT_NARROWPHASE_SAT_CONCAVE_PATH); -		b3Assert(errNum==CL_SUCCESS); +		cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH); +		b3Assert(errNum == CL_SUCCESS); -		m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisKernel",&errNum,satProg ); +		m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg);  		b3Assert(m_findSeparatingAxisKernel); -		b3Assert(errNum==CL_SUCCESS); +		b3Assert(errNum == CL_SUCCESS); - -		m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisVertexFaceKernel",&errNum,satProg ); +		m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg);  		b3Assert(m_findSeparatingAxisVertexFaceKernel); -		m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisEdgeEdgeKernel",&errNum,satProg ); +		m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg);  		b3Assert(m_findSeparatingAxisVertexFaceKernel); - -		m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findConcaveSeparatingAxisKernel",&errNum,satProg ); +		m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg);  		b3Assert(m_findConcaveSeparatingAxisKernel); -		b3Assert(errNum==CL_SUCCESS); -         -        m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisVertexFaceKernel",&errNum,satConcaveProg ); +		b3Assert(errNum == CL_SUCCESS); + +		m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg);  		b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); -		b3Assert(errNum==CL_SUCCESS); -         -        m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel",&errNum,satConcaveProg ); +		b3Assert(errNum == CL_SUCCESS); + +		m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg);  		b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); -		b3Assert(errNum==CL_SUCCESS); -         -      -         -		 -		m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findCompoundPairsKernel",&errNum,satProg ); +		b3Assert(errNum == CL_SUCCESS); + +		m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg);  		b3Assert(m_findCompoundPairsKernel); -		b3Assert(errNum==CL_SUCCESS); -		m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "processCompoundPairsKernel",&errNum,satProg ); +		b3Assert(errNum == CL_SUCCESS); +		m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg);  		b3Assert(m_processCompoundPairsKernel); -		b3Assert(errNum==CL_SUCCESS); +		b3Assert(errNum == CL_SUCCESS);  	}  	if (1)  	{  		const char* srcClip = satClipKernelsCL; -		char flags[1024]={0}; -//#ifdef CL_PLATFORM_INTEL -//		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); -//#endif +		char flags[1024] = {0}; +		//#ifdef CL_PLATFORM_INTEL +		//		sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); +		//#endif -		cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); -		b3Assert(errNum==CL_SUCCESS); +		cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH); +		b3Assert(errNum == CL_SUCCESS); -		m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); -		b3Assert(errNum==CL_SUCCESS); +		m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg); +		b3Assert(errNum == CL_SUCCESS); -		m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); -		b3Assert(errNum==CL_SUCCESS); -		 +		m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg); +		b3Assert(errNum == CL_SUCCESS); -        m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); -		b3Assert(errNum==CL_SUCCESS); +		m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg); +		b3Assert(errNum == CL_SUCCESS); -        m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndFindContactsKernel",&errNum,satClipContactsProg); -		b3Assert(errNum==CL_SUCCESS);         +		m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg); +		b3Assert(errNum == CL_SUCCESS); -		m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); -		b3Assert(errNum==CL_SUCCESS); +		m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg); +		b3Assert(errNum == CL_SUCCESS); -//		m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); -	//	b3Assert(errNum==CL_SUCCESS); +		//		m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); +		//	b3Assert(errNum==CL_SUCCESS); -        m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, -                            "newContactReductionKernel",&errNum,satClipContactsProg); -		b3Assert(errNum==CL_SUCCESS); +		m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, +																			   "newContactReductionKernel", &errNum, satClipContactsProg); +		b3Assert(errNum == CL_SUCCESS);  	} -   else +	else  	{ -		m_clipHullHullKernel=0; +		m_clipHullHullKernel = 0;  		m_clipCompoundsHullHullKernel = 0; -        m_findClippingFacesKernel = 0; -        m_newContactReductionKernel=0; -        m_clipFacesAndFindContacts = 0; +		m_findClippingFacesKernel = 0; +		m_newContactReductionKernel = 0; +		m_clipFacesAndFindContacts = 0;  		m_clipHullHullConcaveConvexKernel = 0; -//		m_extractManifoldAndAddContactKernel = 0; +		//		m_extractManifoldAndAddContactKernel = 0;  	} -	 if (1) +	if (1)  	{  		const char* srcBvh = bvhTraversalKernelCL; -		cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); -		b3Assert(errNum==CL_SUCCESS); - -		m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); -		b3Assert(errNum==CL_SUCCESS); +		cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH); +		b3Assert(errNum == CL_SUCCESS); +		m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, ""); +		b3Assert(errNum == CL_SUCCESS);  	} -         -	 { -		 const char* primitiveContactsSrc = primitiveContactsKernelsCL; -		cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); -		b3Assert(errNum==CL_SUCCESS); -		m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); -		b3Assert(errNum==CL_SUCCESS); +	{ +		const char* primitiveContactsSrc = primitiveContactsKernelsCL; +		cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); +		b3Assert(errNum == CL_SUCCESS); + +		m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, ""); +		b3Assert(errNum == CL_SUCCESS); -		m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg ); -		b3Assert(errNum==CL_SUCCESS); +		m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg); +		b3Assert(errNum == CL_SUCCESS);  		b3Assert(m_findConcaveSphereContactsKernel); -		m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,""); -		b3Assert(errNum==CL_SUCCESS); +		m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, ""); +		b3Assert(errNum == CL_SUCCESS);  		b3Assert(m_processCompoundPairsPrimitivesKernel); -		  -	 } -	 - +	}  }  GpuSatCollision::~GpuSatCollision()  { -	  	if (m_findSeparatingAxisVertexFaceKernel)  		clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); @@ -301,17 +276,15 @@ GpuSatCollision::~GpuSatCollision()  	if (m_mprPenetrationKernel)  		clReleaseKernel(m_mprPenetrationKernel); -  	if (m_findSeparatingAxisKernel)  		clReleaseKernel(m_findSeparatingAxisKernel); -    if (m_findConcaveSeparatingAxisVertexFaceKernel) -        clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); +	if (m_findConcaveSeparatingAxisVertexFaceKernel) +		clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); + +	if (m_findConcaveSeparatingAxisEdgeEdgeKernel) +		clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); -     -    if (m_findConcaveSeparatingAxisEdgeEdgeKernel) -        clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); -      	if (m_findConcaveSeparatingAxisKernel)  		clReleaseKernel(m_findConcaveSeparatingAxisKernel); @@ -320,17 +293,17 @@ GpuSatCollision::~GpuSatCollision()  	if (m_processCompoundPairsKernel)  		clReleaseKernel(m_processCompoundPairsKernel); -     -    if (m_findClippingFacesKernel) -        clReleaseKernel(m_findClippingFacesKernel); -    -    if (m_clipFacesAndFindContacts) -        clReleaseKernel(m_clipFacesAndFindContacts); -    if (m_newContactReductionKernel) -        clReleaseKernel(m_newContactReductionKernel); + +	if (m_findClippingFacesKernel) +		clReleaseKernel(m_findClippingFacesKernel); + +	if (m_clipFacesAndFindContacts) +		clReleaseKernel(m_clipFacesAndFindContacts); +	if (m_newContactReductionKernel) +		clReleaseKernel(m_newContactReductionKernel);  	if (m_primitiveContactsKernel)  		clReleaseKernel(m_primitiveContactsKernel); -     +  	if (m_findConcaveSphereContactsKernel)  		clReleaseKernel(m_findConcaveSphereContactsKernel); @@ -344,12 +317,11 @@ GpuSatCollision::~GpuSatCollision()  	if (m_clipHullHullConcaveConvexKernel)  		clReleaseKernel(m_clipHullHullConcaveConvexKernel); -//	if (m_extractManifoldAndAddContactKernel) +	//	if (m_extractManifoldAndAddContactKernel)  	//	clReleaseKernel(m_extractManifoldAndAddContactKernel);  	if (m_bvhTraversalKernel)  		clReleaseKernel(m_bvhTraversalKernel); -  }  struct MyTriangleCallback : public b3NodeOverlapCallback @@ -359,14 +331,13 @@ struct MyTriangleCallback : public b3NodeOverlapCallback  	virtual void processNode(int subPart, int triangleIndex)  	{ -		printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); +		printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB);  		printf("triangleIndex %d\n", triangleIndex);  	}  }; -  #define float4 b3Vector3 -#define make_float4(x,y,z,w) b3MakeVector3(x,y,z,w) +#define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w)  float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace)  { @@ -377,9 +348,7 @@ float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn  	return dist;  } - - -#define cross3(a,b) (a.cross(b)) +#define cross3(a, b) (a.cross(b))  b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn)  {  	b3Transform tr; @@ -390,184 +359,170 @@ b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion  	return res;  } - -inline bool IsPointInPolygon(const float4& p,  -							const b3GpuFace* face, +inline bool IsPointInPolygon(const float4& p, +							 const b3GpuFace* face,  							 const float4* baseVertex, -							const  int* convexIndices, -							float4* out) +							 const int* convexIndices, +							 float4* out)  { -    float4 a; -    float4 b; -    float4 ab; -    float4 ap; -    float4 v; +	float4 a; +	float4 b; +	float4 ab; +	float4 ap; +	float4 v; -	float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); -	 -	if (face->m_numIndices<2) +	float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f); + +	if (face->m_numIndices < 2)  		return false; -	 -	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; +	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]];  	b = v0; -    for(unsigned i=0; i != face->m_numIndices; ++i) -    { +	for (unsigned i = 0; i != face->m_numIndices; ++i) +	{  		a = b;  		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];  		b = vi; -        ab = b-a; -        ap = p-a; -        v = cross3(ab,plane); - -        if (b3Dot(ap, v) > 0.f) -        { -            float ab_m2 = b3Dot(ab, ab); -            float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; -            if (rt <= 0.f) -            { -                *out = a; -            } -            else if (rt >= 1.f)  -            { -                *out = b; -            } -            else -            { -            	float s = 1.f - rt; +		ab = b - a; +		ap = p - a; +		v = cross3(ab, plane); + +		if (b3Dot(ap, v) > 0.f) +		{ +			float ab_m2 = b3Dot(ab, ab); +			float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; +			if (rt <= 0.f) +			{ +				*out = a; +			} +			else if (rt >= 1.f) +			{ +				*out = b; +			} +			else +			{ +				float s = 1.f - rt;  				out[0].x = s * a.x + rt * b.x;  				out[0].y = s * a.y + rt * b.y;  				out[0].z = s * a.z + rt * b.z; -            } -            return false; -        } -    } -    return true; +			} +			return false; +		} +	} +	return true;  }  #define normalize3(a) (a.normalize()) - -int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx)  { -	if( nPoints == 0 ) -        return 0; -     -    if (nPoints <=4) -        return nPoints; -     -     -    if (nPoints >64) -        nPoints = 64; -     -	float4 center = b3MakeVector3(0,0,0,0); +	if (nPoints == 0) +		return 0; + +	if (nPoints <= 4) +		return nPoints; + +	if (nPoints > 64) +		nPoints = 64; + +	float4 center = b3MakeVector3(0, 0, 0, 0);  	{ -		 -		for (int i=0;i<nPoints;i++) +		for (int i = 0; i < nPoints; i++)  			center += p[i];  		center /= (float)nPoints;  	} -     -	 -     +  	//	sample 4 directions -     -    float4 aVector = p[0] - center; -    float4 u = cross3( nearNormal, aVector ); -    float4 v = cross3( nearNormal, u ); -    u = normalize3( u ); -    v = normalize3( v ); -     -     -    //keep point with deepest penetration -    float minW= FLT_MAX; -     -    int minIndex=-1; -     -    float4 maxDots; -    maxDots.x = FLT_MIN; -    maxDots.y = FLT_MIN; -    maxDots.z = FLT_MIN; -    maxDots.w = FLT_MIN; -     -    //	idx, distance -    for(int ie = 0; ie<nPoints; ie++ ) -    { -        if (p[ie].w<minW) -        { -            minW = p[ie].w; -            minIndex=ie; -        } -        float f; -        float4 r = p[ie]-center; -        f = dot3F4( u, r ); -        if (f<maxDots.x) -        { -            maxDots.x = f; -            contactIdx[0].x = ie; -        } -         -        f = dot3F4( -u, r ); -        if (f<maxDots.y) -        { -            maxDots.y = f; -            contactIdx[0].y = ie; -        } -         -         -        f = dot3F4( v, r ); -        if (f<maxDots.z) -        { -            maxDots.z = f; -            contactIdx[0].z = ie; -        } -         -        f = dot3F4( -v, r ); -        if (f<maxDots.w) -        { -            maxDots.w = f; -            contactIdx[0].w = ie; -        } -         -    } -     -    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) -    { -        //replace the first contact with minimum (todo: replace contact with least penetration) -        contactIdx[0].x = minIndex; -    } -     -    return 4; -     -} +	float4 aVector = p[0] - center; +	float4 u = cross3(nearNormal, aVector); +	float4 v = cross3(nearNormal, u); +	u = normalize3(u); +	v = normalize3(v); +	//keep point with deepest penetration +	float minW = FLT_MAX; -#define MAX_VERTS 1024 +	int minIndex = -1; + +	float4 maxDots; +	maxDots.x = FLT_MIN; +	maxDots.y = FLT_MIN; +	maxDots.z = FLT_MIN; +	maxDots.w = FLT_MIN; + +	//	idx, distance +	for (int ie = 0; ie < nPoints; ie++) +	{ +		if (p[ie].w < minW) +		{ +			minW = p[ie].w; +			minIndex = ie; +		} +		float f; +		float4 r = p[ie] - center; +		f = dot3F4(u, r); +		if (f < maxDots.x) +		{ +			maxDots.x = f; +			contactIdx[0].x = ie; +		} + +		f = dot3F4(-u, r); +		if (f < maxDots.y) +		{ +			maxDots.y = f; +			contactIdx[0].y = ie; +		} + +		f = dot3F4(v, r); +		if (f < maxDots.z) +		{ +			maxDots.z = f; +			contactIdx[0].z = ie; +		} + +		f = dot3F4(-v, r); +		if (f < maxDots.w) +		{ +			maxDots.w = f; +			contactIdx[0].w = ie; +		} +	} + +	if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) +	{ +		//replace the first contact with minimum (todo: replace contact with least penetration) +		contactIdx[0].x = minIndex; +	} + +	return 4; +} +#define MAX_VERTS 1024 -inline void project(const b3ConvexPolyhedronData& hull,  const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max) +inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max)  {  	min = FLT_MAX;  	max = -FLT_MAX;  	int numVerts = hull.m_numVertices; -	const float4 localDir = b3QuatRotate(orn.inverse(),dir); +	const float4 localDir = b3QuatRotate(orn.inverse(), dir); -	b3Scalar offset = dot3F4(pos,dir); +	b3Scalar offset = dot3F4(pos, dir); -	for(int i=0;i<numVerts;i++) +	for (int i = 0; i < numVerts; i++)  	{  		//b3Vector3 pt = trans * vertices[m_vertexOffset+i];  		//b3Scalar dp = pt.dot(dir);  		//b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; -		b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset+i],localDir); +		b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir);  		//b3Assert(dp==dpL); -		if(dp < min)	min = dp; -		if(dp > max)	max = dp; +		if (dp < min) min = dp; +		if (dp > max) max = dp;  	} -	if(min>max) +	if (min > max)  	{  		b3Scalar tmp = min;  		min = max; @@ -577,50 +532,48 @@ inline void project(const b3ConvexPolyhedronData& hull,  const float4& pos, cons  	max += offset;  } - -static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,  -	const float4& posA,const b3Quaternion& ornA, -	const float4& posB,const b3Quaternion& ornB, -	const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth) +static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, +						const float4& posA, const b3Quaternion& ornA, +						const float4& posB, const b3Quaternion& ornB, +						const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth)  { -	b3Scalar Min0,Max0; -	b3Scalar Min1,Max1; -	project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0); -	project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1); +	b3Scalar Min0, Max0; +	b3Scalar Min1, Max1; +	project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0); +	project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1); -	if(Max0<Min1 || Max1<Min0) +	if (Max0 < Min1 || Max1 < Min0)  		return false;  	b3Scalar d0 = Max0 - Min1; -	assert(d0>=0.0f); +	assert(d0 >= 0.0f);  	b3Scalar d1 = Max1 - Min0; -	assert(d1>=0.0f); -	depth = d0<d1 ? d0:d1; +	assert(d1 >= 0.0f); +	depth = d0 < d1 ? d0 : d1;  	return true;  }  inline bool IsAlmostZero(const b3Vector3& v)  { -	if(fabsf(v.x)>1e-6 || fabsf(v.y)>1e-6 || fabsf(v.z)>1e-6)	return false; +	if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false;  	return true;  } - -static bool findSeparatingAxis(	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,  -	const float4& posA1, -	const b3Quaternion& ornA, -	const float4& posB1, -	const b3Quaternion& ornB, -	const b3AlignedObjectArray<b3Vector3>& verticesA, -	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,  -	const b3AlignedObjectArray<b3GpuFace>& facesA, -	const b3AlignedObjectArray<int>& indicesA, -	const b3AlignedObjectArray<b3Vector3>& verticesB,  -	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB,  -	const b3AlignedObjectArray<b3GpuFace>& facesB, -	const b3AlignedObjectArray<int>& indicesB, - -	b3Vector3& sep) +static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, +							   const float4& posA1, +							   const b3Quaternion& ornA, +							   const float4& posB1, +							   const b3Quaternion& ornB, +							   const b3AlignedObjectArray<b3Vector3>& verticesA, +							   const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, +							   const b3AlignedObjectArray<b3GpuFace>& facesA, +							   const b3AlignedObjectArray<int>& indicesA, +							   const b3AlignedObjectArray<b3Vector3>& verticesB, +							   const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, +							   const b3AlignedObjectArray<b3GpuFace>& facesB, +							   const b3AlignedObjectArray<int>& indicesB, + +							   b3Vector3& sep)  {  	B3_PROFILE("findSeparatingAxis"); @@ -629,41 +582,40 @@ static bool findSeparatingAxis(	const b3ConvexPolyhedronData& hullA, const b3Con  	posA.w = 0.f;  	float4 posB = posB1;  	posB.w = 0.f; -//#ifdef TEST_INTERNAL_OBJECTS +	//#ifdef TEST_INTERNAL_OBJECTS  	float4 c0local = (float4&)hullA.m_localCenter;  	float4 c0 = transform(&c0local, &posA, &ornA);  	float4 c1local = (float4&)hullB.m_localCenter; -	float4 c1 = transform(&c1local,&posB,&ornB); +	float4 c1 = transform(&c1local, &posB, &ornB);  	const float4 deltaC2 = c0 - c1; -//#endif +	//#endif  	b3Scalar dmin = FLT_MAX; -	int curPlaneTests=0; +	int curPlaneTests = 0;  	int numFacesA = hullA.m_numFaces;  	// Test normals from hullA -	for(int i=0;i<numFacesA;i++) +	for (int i = 0; i < numFacesA; i++)  	{ -		const float4& normal = (float4&)facesA[hullA.m_faceOffset+i].m_plane; -		float4 faceANormalWS = b3QuatRotate(ornA,normal); +		const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane; +		float4 faceANormalWS = b3QuatRotate(ornA, normal); -		if (dot3F4(deltaC2,faceANormalWS)<0) -			faceANormalWS*=-1.f; +		if (dot3F4(deltaC2, faceANormalWS) < 0) +			faceANormalWS *= -1.f;  		curPlaneTests++;  #ifdef TEST_INTERNAL_OBJECTS  		gExpectedNbTests++; -		if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) +		if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin))  			continue;  		gActualNbTests++;  #endif -		  		b3Scalar d; -		if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,faceANormalWS, verticesA, verticesB,d)) +		if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d))  			return false; -		if(d<dmin) +		if (d < dmin)  		{  			dmin = d;  			sep = (b3Vector3&)faceANormalWS; @@ -672,28 +624,28 @@ static bool findSeparatingAxis(	const b3ConvexPolyhedronData& hullA, const b3Con  	int numFacesB = hullB.m_numFaces;  	// Test normals from hullB -	for(int i=0;i<numFacesB;i++) +	for (int i = 0; i < numFacesB; i++)  	{ -		float4 normal = (float4&)facesB[hullB.m_faceOffset+i].m_plane; +		float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane;  		float4 WorldNormal = b3QuatRotate(ornB, normal); -		if (dot3F4(deltaC2,WorldNormal)<0) +		if (dot3F4(deltaC2, WorldNormal) < 0)  		{ -			WorldNormal*=-1.f; +			WorldNormal *= -1.f;  		}  		curPlaneTests++;  #ifdef TEST_INTERNAL_OBJECTS  		gExpectedNbTests++; -		if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin)) +		if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin))  			continue;  		gActualNbTests++;  #endif  		b3Scalar d; -		if(!TestSepAxis(hullA, hullB,posA,ornA,posB,ornB,WorldNormal,verticesA,verticesB,d)) +		if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d))  			return false; -		if(d<dmin) +		if (d < dmin)  		{  			dmin = d;  			sep = (b3Vector3&)WorldNormal; @@ -702,70 +654,65 @@ static bool findSeparatingAxis(	const b3ConvexPolyhedronData& hullA, const b3Con  	int curEdgeEdge = 0;  	// Test edges -	for(int e0=0;e0<hullA.m_numUniqueEdges;e0++) +	for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++)  	{ -		const float4& edge0 = (float4&) uniqueEdgesA[hullA.m_uniqueEdgesOffset+e0]; -		float4 edge0World = b3QuatRotate(ornA,(float4&)edge0); +		const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0]; +		float4 edge0World = b3QuatRotate(ornA, (float4&)edge0); -		for(int e1=0;e1<hullB.m_numUniqueEdges;e1++) +		for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++)  		{ -			const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset+e1]; -			float4 edge1World = b3QuatRotate(ornB,(float4&)edge1); - +			const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1]; +			float4 edge1World = b3QuatRotate(ornB, (float4&)edge1); -			float4 crossje = cross3(edge0World,edge1World); +			float4 crossje = cross3(edge0World, edge1World);  			curEdgeEdge++; -			if(!IsAlmostZero((b3Vector3&)crossje)) +			if (!IsAlmostZero((b3Vector3&)crossje))  			{  				crossje = normalize3(crossje); -				if (dot3F4(deltaC2,crossje)<0) -					crossje*=-1.f; - +				if (dot3F4(deltaC2, crossje) < 0) +					crossje *= -1.f;  #ifdef TEST_INTERNAL_OBJECTS  				gExpectedNbTests++; -				if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin)) +				if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin))  					continue;  				gActualNbTests++;  #endif  				b3Scalar dist; -				if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,crossje, verticesA,verticesB,dist)) +				if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist))  					return false; -				if(dist<dmin) +				if (dist < dmin)  				{  					dmin = dist;  					sep = (b3Vector3&)crossje;  				}  			}  		} -  	} -	 -	if((dot3F4(-deltaC2,(float4&)sep))>0.0f) +	if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f)  		sep = -sep;  	return true;  } - -bool findSeparatingAxisEdgeEdge(	__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB,  -	const b3Float4& posA1, -	const b3Quat& ornA, -	const b3Float4& posB1, -	const b3Quat& ornB, -	const b3Float4& DeltaC2, -	__global const b3AlignedObjectArray<float4>& vertices,  -	__global const b3AlignedObjectArray<float4>& uniqueEdges,  -	__global const b3AlignedObjectArray<b3GpuFace>& faces, -	__global const b3AlignedObjectArray<int>&  indices, -	float4* sep, -	float* dmin) +bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, +								const b3Float4& posA1, +								const b3Quat& ornA, +								const b3Float4& posB1, +								const b3Quat& ornB, +								const b3Float4& DeltaC2, +								__global const b3AlignedObjectArray<float4>& vertices, +								__global const b3AlignedObjectArray<float4>& uniqueEdges, +								__global const b3AlignedObjectArray<b3GpuFace>& faces, +								__global const b3AlignedObjectArray<int>& indices, +								float4* sep, +								float* dmin)  { -//	int i = get_global_id(0); +	//	int i = get_global_id(0);  	float4 posA = posA1;  	posA.w = 0.f; @@ -776,97 +723,89 @@ bool findSeparatingAxisEdgeEdge(	__global const b3ConvexPolyhedronData* hullA, _  	int curEdgeEdge = 0;  	// Test edges -	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) +	for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++)  	{ -		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; -		float4 edge0World = b3QuatRotate(ornA,edge0); +		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0]; +		float4 edge0World = b3QuatRotate(ornA, edge0); -		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) +		for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++)  		{ -			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; -			float4 edge1World = b3QuatRotate(ornB,edge1); +			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1]; +			float4 edge1World = b3QuatRotate(ornB, edge1); - -			float4 crossje = cross3(edge0World,edge1World); +			float4 crossje = cross3(edge0World, edge1World);  			curEdgeEdge++; -			if(!IsAlmostZero(crossje)) +			if (!IsAlmostZero(crossje))  			{  				crossje = normalize3(crossje); -				if (dot3F4(DeltaC2,crossje)<0) -					crossje*=-1.f; -					 +				if (dot3F4(DeltaC2, crossje) < 0) +					crossje *= -1.f; +  				float dist;  				bool result = true;  				{ -					float Min0,Max0; -					float Min1,Max1; -					project(*hullA,posA,ornA,crossje,vertices, Min0, Max0); -					project(*hullB,posB,ornB,crossje,vertices, Min1, Max1); -				 -					if(Max0<Min1 || Max1<Min0) +					float Min0, Max0; +					float Min1, Max1; +					project(*hullA, posA, ornA, crossje, vertices, Min0, Max0); +					project(*hullB, posB, ornB, crossje, vertices, Min1, Max1); + +					if (Max0 < Min1 || Max1 < Min0)  						result = false; -				 +  					float d0 = Max0 - Min1;  					float d1 = Max1 - Min0; -					dist = d0<d1 ? d0:d1; +					dist = d0 < d1 ? d0 : d1;  					result = true; -  				} -				 -				if(dist<*dmin) +				if (dist < *dmin)  				{  					*dmin = dist;  					*sep = crossje;  				}  			}  		} -  	} -	 -	if((dot3F4(-DeltaC2,*sep))>0.0f) +	if ((dot3F4(-DeltaC2, *sep)) > 0.0f)  	{  		*sep = -(*sep);  	}  	return true;  } - -__inline float4 lerp3(const float4& a,const float4& b, float  t) +__inline float4 lerp3(const float4& a, const float4& b, float t)  { -	return b3MakeVector3(	a.x + (b.x - a.x) * t, -						a.y + (b.y - a.y) * t, -						a.z + (b.z - a.z) * t, -						0.f); +	return b3MakeVector3(a.x + (b.x - a.x) * t, +						 a.y + (b.y - a.y) * t, +						 a.z + (b.z - a.z) * t, +						 0.f);  } -  // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float planeEqWS, float4* ppVtxOut) +int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut)  { -	  	int ve;  	float ds, de;  	int numVertsOut = 0;  	if (numVertsIn < 2)  		return 0; -	float4 firstVertex=pVtxIn[numVertsIn-1]; +	float4 firstVertex = pVtxIn[numVertsIn - 1];  	float4 endVertex = pVtxIn[0]; -	 -	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + +	ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS;  	for (ve = 0; ve < numVertsIn; ve++)  	{ -		endVertex=pVtxIn[ve]; +		endVertex = pVtxIn[ve]; -		de = dot3F4(planeNormalWS,endVertex)+planeEqWS; +		de = dot3F4(planeNormalWS, endVertex) + planeEqWS; -		if (ds<0) +		if (ds < 0)  		{ -			if (de<0) +			if (de < 0)  			{  				// Start < 0, end < 0, so output endVertex  				ppVtxOut[numVertsOut++] = endVertex; @@ -874,15 +813,15 @@ int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float p  			else  			{  				// Start < 0, end >= 0, so output intersection -				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); +				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));  			}  		}  		else  		{ -			if (de<0) +			if (de < 0)  			{  				// Start >= 0, end < 0 so output intersection and end -				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); +				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de)));  				ppVtxOut[numVertsOut++] = endVertex;  			}  		} @@ -892,36 +831,35 @@ int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float p  	return numVertsOut;  } - -int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA,   -	const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, -	float4* worldVertsB2, int capacityWorldVertsB2, -	const float minDist, float maxDist, -	const b3AlignedObjectArray<float4>& verticesA,	const b3AlignedObjectArray<b3GpuFace>& facesA,	const b3AlignedObjectArray<int>& indicesA, -	//const float4* verticesB,	const b3GpuFace* facesB,	const int* indicesB, -	float4* contactsOut, -	int contactCapacity) +int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, +						const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, +						float4* worldVertsB2, int capacityWorldVertsB2, +						const float minDist, float maxDist, +						const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, +						//const float4* verticesB,	const b3GpuFace* facesB,	const int* indicesB, +						float4* contactsOut, +						int contactCapacity)  {  	int numContactsOut = 0;  	float4* pVtxIn = worldVertsB1;  	float4* pVtxOut = worldVertsB2; -	 +  	int numVertsIn = numWorldVertsB1;  	int numVertsOut = 0; -	int closestFaceA=-1; +	int closestFaceA = -1;  	{  		float dmin = FLT_MAX; -		for(int face=0;face<hullA->m_numFaces;face++) +		for (int face = 0; face < hullA->m_numFaces; face++)  		{  			const float4 Normal = b3MakeVector3( -				facesA[hullA->m_faceOffset+face].m_plane.x,  -				facesA[hullA->m_faceOffset+face].m_plane.y,  -				facesA[hullA->m_faceOffset+face].m_plane.z,0.f); -			const float4 faceANormalWS = b3QuatRotate(ornA,Normal); -		 -			float d = dot3F4(faceANormalWS,separatingNormal); +				facesA[hullA->m_faceOffset + face].m_plane.x, +				facesA[hullA->m_faceOffset + face].m_plane.y, +				facesA[hullA->m_faceOffset + face].m_plane.z, 0.f); +			const float4 faceANormalWS = b3QuatRotate(ornA, Normal); + +			float d = dot3F4(faceANormalWS, separatingNormal);  			if (d < dmin)  			{  				dmin = d; @@ -929,33 +867,33 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron  			}  		}  	} -	if (closestFaceA<0) +	if (closestFaceA < 0)  		return numContactsOut; -	b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; +	b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA];  	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face -//	int numContacts = numWorldVertsB1; +	//	int numContacts = numWorldVertsB1;  	int numVerticesA = polyA.m_numIndices; -	for(int e0=0;e0<numVerticesA;e0++) +	for (int e0 = 0; e0 < numVerticesA; e0++)  	{ -		const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; -		const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; +		const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]]; +		const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]];  		const float4 edge0 = a - b; -		const float4 WorldEdge0 = b3QuatRotate(ornA,edge0); -		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); -		float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA); +		const float4 WorldEdge0 = b3QuatRotate(ornA, edge0); +		float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); +		float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA); + +		float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1); +		float4 worldA1 = transform(&a, &posA, &ornA); +		float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1); -		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); -		float4 worldA1 = transform(&a,&posA,&ornA); -		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); -		  		float4 planeNormalWS = planeNormalWS1; -		float planeEqWS=planeEqWS1; -		 +		float planeEqWS = planeEqWS1; +  		//clip face  		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); -		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); +		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut);  		//btSwap(pVtxIn,pVtxOut);  		float4* tmp = pVtxOut; @@ -965,32 +903,32 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron  		numVertsOut = 0;  	} -	  	// only keep points that are behind the witness face  	{ -		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); +		float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f);  		float localPlaneEq = polyA.m_plane.w; -		float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal); -		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); -		for (int i=0;i<numVertsIn;i++) +		float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal); +		float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA); +		for (int i = 0; i < numVertsIn; i++)  		{ -			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; -			if (depth <=minDist) +			float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS; +			if (depth <= minDist)  			{  				depth = minDist;  			} -			if (numContactsOut<contactCapacity) +			if (numContactsOut < contactCapacity)  			{ -				if (depth <=maxDist) +				if (depth <= maxDist)  				{  					float4 pointInWorld = pVtxIn[i];  					//resultOut.addContactPoint(separatingNormal,point,depth); -					contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); +					contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth);  					//printf("depth=%f\n",depth);  				} -			} else +			} +			else  			{ -				b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut,contactCapacity); +				b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity);  			}  		}  	} @@ -998,62 +936,60 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron  	return numContactsOut;  } +static int clipHullAgainstHull(const float4& separatingNormal, +							   const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, +							   const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB, +							   float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, +							   const float minDist, float maxDist, +							   const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, +							   const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB, - -static int	clipHullAgainstHull(const float4& separatingNormal,  -	const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB,  -	const float4& posA, const b3Quaternion& ornA,const float4& posB, const b3Quaternion& ornB,  -	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, -	const float minDist, float maxDist, -	const b3AlignedObjectArray<float4>& verticesA,	const b3AlignedObjectArray<b3GpuFace>& facesA,	const b3AlignedObjectArray<int>& indicesA, -	const b3AlignedObjectArray<float4>& verticesB,	const b3AlignedObjectArray<b3GpuFace>& facesB,	const b3AlignedObjectArray<int>& indicesB, - -	float4*	contactsOut, -	int contactCapacity) +							   float4* contactsOut, +							   int contactCapacity)  {  	int numContactsOut = 0; -	int numWorldVertsB1= 0; -	 +	int numWorldVertsB1 = 0; +  	B3_PROFILE("clipHullAgainstHull"); -//	float curMaxDist=maxDist; -	int closestFaceB=-1; +	//	float curMaxDist=maxDist; +	int closestFaceB = -1;  	float dmax = -FLT_MAX;  	{  		//B3_PROFILE("closestFaceB"); -		if (hullB.m_numFaces!=1) +		if (hullB.m_numFaces != 1)  		{  			//printf("wtf\n");  		}  		static bool once = true;  		//printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); -		 -		for(int face=0;face<hullB.m_numFaces;face++) + +		for (int face = 0; face < hullB.m_numFaces; face++)  		{  #ifdef BT_DEBUG_SAT_FACE  			if (once) -				printf("face %d\n",face); -			const b3GpuFace* faceB = &facesB[hullB.m_faceOffset+face]; +				printf("face %d\n", face); +			const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face];  			if (once)  			{ -				for (int i=0;i<faceB->m_numIndices;i++) +				for (int i = 0; i < faceB->m_numIndices; i++)  				{ -					float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]]; -					printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z); +					float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]]; +					printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z);  				}  			} -#endif //BT_DEBUG_SAT_FACE -			//if (facesB[hullB.m_faceOffset+face].m_numIndices>2) +#endif  //BT_DEBUG_SAT_FACE \ +	//if (facesB[hullB.m_faceOffset+face].m_numIndices>2)  			{ -				const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x,  -					facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); +				const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x, +													facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f);  				const float4 WorldNormal = b3QuatRotate(ornB, Normal);  #ifdef BT_DEBUG_SAT_FACE  				if (once) -					printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z); +					printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z);  #endif -				float d = dot3F4(WorldNormal,separatingNormal); +				float d = dot3F4(WorldNormal, separatingNormal);  				if (d > dmax)  				{  					dmax = d; @@ -1064,184 +1000,176 @@ static int	clipHullAgainstHull(const float4& separatingNormal,  		once = false;  	} -	 -	b3Assert(closestFaceB>=0); +	b3Assert(closestFaceB >= 0);  	{  		//B3_PROFILE("worldVertsB1"); -		const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB]; +		const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB];  		const int numVertices = polyB.m_numIndices; -		for(int e0=0;e0<numVertices;e0++) +		for (int e0 = 0; e0 < numVertices; e0++)  		{ -			const float4& b = verticesB[hullB.m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; -			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); +			const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]]; +			worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB);  		}  	} -	if (closestFaceB>=0) +	if (closestFaceB >= 0)  	{  		//B3_PROFILE("clipFaceAgainstHull"); -		numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA,  -				posA,ornA, -				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, -				verticesA,				facesA,				indicesA, -				contactsOut,contactCapacity); +		numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, +											 posA, ornA, +											 worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist, +											 verticesA, facesA, indicesA, +											 contactsOut, contactCapacity);  	}  	return numContactsOut;  } +#define PARALLEL_SUM(v, n) \ +	for (int j = 1; j < n; j++) v[0] += v[j]; +#define PARALLEL_DO(execution, n)  \ +	for (int ie = 0; ie < n; ie++) \ +	{                              \ +		execution;                 \ +	} +#define REDUCE_MAX(v, n)                                                                                     \ +	{                                                                                                        \ +		int i = 0;                                                                                           \ +		for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \ +	} +#define REDUCE_MIN(v, n)                                                                                     \ +	{                                                                                                        \ +		int i = 0;                                                                                           \ +		for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \ +	} +int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +{ +	if (nPoints == 0) +		return 0; +	if (nPoints <= 4) +		return nPoints; +	if (nPoints > 64) +		nPoints = 64; - -#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} -#define REDUCE_MAX(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } -#define REDUCE_MIN(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } - -int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ -	if( nPoints == 0 ) -        return 0; -     -    if (nPoints <=4) -        return nPoints; -     -     -    if (nPoints >64) -        nPoints = 64; -     -	float4 center = make_float4(0,0,0,0); +	float4 center = make_float4(0, 0, 0, 0);  	{ -		 -		for (int i=0;i<nPoints;i++) +		for (int i = 0; i < nPoints; i++)  			center += p[i];  		center /= (float)nPoints;  	} -     -	 -     +  	//	sample 4 directions -     -    float4 aVector = p[0] - center; -    float4 u = cross3( nearNormal, aVector ); -    float4 v = cross3( nearNormal, u ); -    u = normalize3( u ); -    v = normalize3( v ); -     -     -    //keep point with deepest penetration -    float minW= FLT_MAX; -     -    int minIndex=-1; -     -    float4 maxDots; -    maxDots.x = FLT_MIN; -    maxDots.y = FLT_MIN; -    maxDots.z = FLT_MIN; -    maxDots.w = FLT_MIN; -     -    //	idx, distance -    for(int ie = 0; ie<nPoints; ie++ ) -    { -        if (p[ie].w<minW) -        { -            minW = p[ie].w; -            minIndex=ie; -        } -        float f; -        float4 r = p[ie]-center; -        f = dot3F4( u, r ); -        if (f<maxDots.x) -        { -            maxDots.x = f; -            contactIdx[0].x = ie; -        } -         -        f = dot3F4( -u, r ); -        if (f<maxDots.y) -        { -            maxDots.y = f; -            contactIdx[0].y = ie; -        } -         -         -        f = dot3F4( v, r ); -        if (f<maxDots.z) -        { -            maxDots.z = f; -            contactIdx[0].z = ie; -        } -         -        f = dot3F4( -v, r ); -        if (f<maxDots.w) -        { -            maxDots.w = f; -            contactIdx[0].w = ie; -        } -         -    } -     -    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) -    { -        //replace the first contact with minimum (todo: replace contact with least penetration) -        contactIdx[0].x = minIndex; -    } -     -    return 4; -     -} +	float4 aVector = p[0] - center; +	float4 u = cross3(nearNormal, aVector); +	float4 v = cross3(nearNormal, u); +	u = normalize3(u); +	v = normalize3(v); + +	//keep point with deepest penetration +	float minW = FLT_MAX; +	int minIndex = -1; +	float4 maxDots; +	maxDots.x = FLT_MIN; +	maxDots.y = FLT_MIN; +	maxDots.z = FLT_MIN; +	maxDots.w = FLT_MIN; + +	//	idx, distance +	for (int ie = 0; ie < nPoints; ie++) +	{ +		if (p[ie].w < minW) +		{ +			minW = p[ie].w; +			minIndex = ie; +		} +		float f; +		float4 r = p[ie] - center; +		f = dot3F4(u, r); +		if (f < maxDots.x) +		{ +			maxDots.x = f; +			contactIdx[0].x = ie; +		} + +		f = dot3F4(-u, r); +		if (f < maxDots.y) +		{ +			maxDots.y = f; +			contactIdx[0].y = ie; +		} + +		f = dot3F4(v, r); +		if (f < maxDots.z) +		{ +			maxDots.z = f; +			contactIdx[0].z = ie; +		} + +		f = dot3F4(-v, r); +		if (f < maxDots.w) +		{ +			maxDots.w = f; +			contactIdx[0].w = ie; +		} +	} + +	if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) +	{ +		//replace the first contact with minimum (todo: replace contact with least penetration) +		contactIdx[0].x = minIndex; +	} + +	return 4; +}  int clipHullHullSingle( -			int bodyIndexA, int bodyIndexB, -										 const float4& posA, -										 const b3Quaternion& ornA, -										 const float4& posB, -										 const b3Quaternion& ornB, +	int bodyIndexA, int bodyIndexB, +	const float4& posA, +	const b3Quaternion& ornA, +	const float4& posB, +	const b3Quaternion& ornB, -			int collidableIndexA, int collidableIndexB, +	int collidableIndexA, int collidableIndexB, -			const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf,  -			b3AlignedObjectArray<b3Contact4>* globalContactOut,  -			int& nContacts, -			 -			const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, -			const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, -	 -			const b3AlignedObjectArray<b3Vector3>& verticesA,  -			const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA,  -			const b3AlignedObjectArray<b3GpuFace>& facesA, -			const b3AlignedObjectArray<int>& indicesA, -	 -			const b3AlignedObjectArray<b3Vector3>& verticesB, -			const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, -			const b3AlignedObjectArray<b3GpuFace>& facesB, -			const b3AlignedObjectArray<int>& indicesB, - -			const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, -			const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, -			const b3Vector3& sepNormalWorldSpace, -			int maxContactCapacity			) +	const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, +	b3AlignedObjectArray<b3Contact4>* globalContactOut, +	int& nContacts, + +	const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, +	const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, + +	const b3AlignedObjectArray<b3Vector3>& verticesA, +	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, +	const b3AlignedObjectArray<b3GpuFace>& facesA, +	const b3AlignedObjectArray<int>& indicesA, + +	const b3AlignedObjectArray<b3Vector3>& verticesB, +	const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, +	const b3AlignedObjectArray<b3GpuFace>& facesB, +	const b3AlignedObjectArray<int>& indicesB, + +	const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, +	const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, +	const b3Vector3& sepNormalWorldSpace, +	int maxContactCapacity)  {  	int contactIndex = -1;  	b3ConvexPolyhedronData hullA, hullB; -     -    b3Collidable colA = hostCollidablesA[collidableIndexA]; -    hullA = hostConvexDataA[colA.m_shapeIndex]; -    //printf("numvertsA = %d\n",hullA.m_numVertices); -     -     -    b3Collidable colB = hostCollidablesB[collidableIndexB]; -    hullB = hostConvexDataB[colB.m_shapeIndex]; -    //printf("numvertsB = %d\n",hullB.m_numVertices); -     -	 + +	b3Collidable colA = hostCollidablesA[collidableIndexA]; +	hullA = hostConvexDataA[colA.m_shapeIndex]; +	//printf("numvertsA = %d\n",hullA.m_numVertices); + +	b3Collidable colB = hostCollidablesB[collidableIndexB]; +	hullB = hostConvexDataB[colB.m_shapeIndex]; +	//printf("numvertsB = %d\n",hullB.m_numVertices); +  	float4 contactsOut[MAX_VERTS];  	int localContactCapacity = MAX_VERTS; @@ -1249,133 +1177,125 @@ int clipHullHullSingle(  	b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x));  	b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x));  #endif -	 -	 +  	{ -		  		float4 worldVertsB1[MAX_VERTS];  		float4 worldVertsB2[MAX_VERTS];  		int capacityWorldVerts = MAX_VERTS; -		float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); +		float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f);  		int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex;  		int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex;  		b3Scalar minDist = -1;  		b3Scalar maxDist = 0.; -		         - -		b3Transform trA,trB; +		b3Transform trA, trB;  		{ -		//B3_PROFILE("transform computation"); -		//trA.setIdentity(); -		trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); -		trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); -				 -		//trB.setIdentity(); -		trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); -		trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); +			//B3_PROFILE("transform computation"); +			//trA.setIdentity(); +			trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z)); +			trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w)); + +			//trB.setIdentity(); +			trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z)); +			trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w));  		}  		b3Quaternion trAorn = trA.getRotation(); -        b3Quaternion trBorn = trB.getRotation(); -         -		int numContactsOut = clipHullAgainstHull(hostNormal,  -						hostConvexDataA.at(shapeA),  -						hostConvexDataB.at(shapeB), -								(float4&)trA.getOrigin(), (b3Quaternion&)trAorn, -								(float4&)trB.getOrigin(), (b3Quaternion&)trBorn, -								worldVertsB1,worldVertsB2,capacityWorldVerts, -								minDist, maxDist, -								verticesA,	facesA,indicesA, -								verticesB,	facesB,indicesB, -								 -								contactsOut,localContactCapacity); - -		if (numContactsOut>0) +		b3Quaternion trBorn = trB.getRotation(); + +		int numContactsOut = clipHullAgainstHull(hostNormal, +												 hostConvexDataA.at(shapeA), +												 hostConvexDataB.at(shapeB), +												 (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, +												 (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, +												 worldVertsB1, worldVertsB2, capacityWorldVerts, +												 minDist, maxDist, +												 verticesA, facesA, indicesA, +												 verticesB, facesB, indicesB, + +												 contactsOut, localContactCapacity); + +		if (numContactsOut > 0)  		{  			B3_PROFILE("overlap");  			float4 normalOnSurfaceB = (float4&)hostNormal; -			 +  			b3Int4 contactIdx;  			contactIdx.x = 0;  			contactIdx.y = 1;  			contactIdx.z = 2;  			contactIdx.w = 3; -			 +  			int numPoints = 0; -					 +  			{ -			//	B3_PROFILE("extractManifold"); -				numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB,  &contactIdx); +				//	B3_PROFILE("extractManifold"); +				numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx);  			} -					 +  			b3Assert(numPoints); -					 -			if (nContacts<maxContactCapacity) + +			if (nContacts < maxContactCapacity)  			{  				contactIndex = nContacts;  				globalContactOut->expand();  				b3Contact4& contact = globalContactOut->at(nContacts); -				contact.m_batchIdx = 0;//i; -				contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; -				contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; +				contact.m_batchIdx = 0;  //i; +				contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; +				contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB;  				contact.m_frictionCoeffCmp = 45874;  				contact.m_restituitionCoeffCmp = 0; -					 -	//			float distance = 0.f; -				for (int p=0;p<numPoints;p++) + +				//			float distance = 0.f; +				for (int p = 0; p < numPoints; p++)  				{ -					contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];//check if it is actually on B -					contact.m_worldNormalOnB = normalOnSurfaceB;  +					contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];  //check if it is actually on B +					contact.m_worldNormalOnB = normalOnSurfaceB;  				}  				//printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints);  				contact.m_worldNormalOnB.w = (b3Scalar)numPoints;  				nContacts++; -			} else +			} +			else  			{ -				b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts,maxContactCapacity); +				b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity);  			}  		}  	}  	return contactIndex;  } -	 - - -  void computeContactPlaneConvex(int pairIndex, -																int bodyIndexA, int bodyIndexB,  -																int collidableIndexA, int collidableIndexB,  -																const b3RigidBodyData* rigidBodies,  -																const b3Collidable* collidables, -																const b3ConvexPolyhedronData* convexShapes, -																const b3Vector3* convexVertices, -																const int* convexIndices, -																const b3GpuFace* faces, -																b3Contact4* globalContactsOut, -																int& nGlobalContactsOut, -																int maxContactCapacity) +							   int bodyIndexA, int bodyIndexB, +							   int collidableIndexA, int collidableIndexB, +							   const b3RigidBodyData* rigidBodies, +							   const b3Collidable* collidables, +							   const b3ConvexPolyhedronData* convexShapes, +							   const b3Vector3* convexVertices, +							   const int* convexIndices, +							   const b3GpuFace* faces, +							   b3Contact4* globalContactsOut, +							   int& nGlobalContactsOut, +							   int maxContactCapacity)  { - -		int shapeIndex = collidables[collidableIndexB].m_shapeIndex; +	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;  	const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex]; -	 +  	b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;  	b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat;  	b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;  	b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; -//	int numContactsOut = 0; -//	int numWorldVertsB1= 0; +	//	int numContactsOut = 0; +	//	int numWorldVertsB1= 0;  	b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; -	b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); -	b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); +	b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); +	b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);  	float planeConstant = planeEq.w;  	b3Transform convexWorldTransform;  	convexWorldTransform.setIdentity(); @@ -1387,13 +1307,13 @@ void computeContactPlaneConvex(int pairIndex,  	planeTransform.setRotation(ornA);  	b3Transform planeInConvex; -	planeInConvex= convexWorldTransform.inverse() * planeTransform; +	planeInConvex = convexWorldTransform.inverse() * planeTransform;  	b3Transform convexInPlane;  	convexInPlane = planeTransform.inverse() * convexWorldTransform; -	 -	b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; + +	b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;  	float maxDot = -1e30; -	int hitVertex=-1; +	int hitVertex = -1;  	b3Vector3 hitVtx;  #define MAX_PLANE_CONVEX_POINTS 64 @@ -1406,54 +1326,52 @@ void computeContactPlaneConvex(int pairIndex,  	contactIdx.s[1] = 1;  	contactIdx.s[2] = 2;  	contactIdx.s[3] = 3; -	 -	for (int i=0;i<hullB->m_numVertices;i++) + +	for (int i = 0; i < hullB->m_numVertices; i++)  	{ -		b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; +		b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];  		float curDot = vtx.dot(planeNormalInConvex); - -		if (curDot>maxDot) +		if (curDot > maxDot)  		{ -			hitVertex=i; -			maxDot=curDot; +			hitVertex = i; +			maxDot = curDot;  			hitVtx = vtx;  			//make sure the deepest points is always included -			if (numPoints==MAX_PLANE_CONVEX_POINTS) +			if (numPoints == MAX_PLANE_CONVEX_POINTS)  				numPoints--;  		} -		if (numPoints<MAX_PLANE_CONVEX_POINTS) +		if (numPoints < MAX_PLANE_CONVEX_POINTS)  		{ -			b3Vector3 vtxWorld = convexWorldTransform*vtx; -			b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; -			float dist = planeNormal.dot(vtxInPlane)-planeConstant; -			if (dist<0.f) +			b3Vector3 vtxWorld = convexWorldTransform * vtx; +			b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; +			float dist = planeNormal.dot(vtxInPlane) - planeConstant; +			if (dist < 0.f)  			{  				vtxWorld.w = dist;  				contactPoints[numPoints] = vtxWorld;  				numPoints++;  			}  		} -  	} -	int numReducedPoints  = 0; +	int numReducedPoints = 0;  	numReducedPoints = numPoints; -	 -	if (numPoints>4) + +	if (numPoints > 4)  	{ -		numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); +		numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);  	}  	int dstIdx; -//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); -		 -	if (numReducedPoints>0) +	//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + +	if (numReducedPoints > 0)  	{  		if (nGlobalContactsOut < maxContactCapacity)  		{ -			dstIdx=nGlobalContactsOut; +			dstIdx = nGlobalContactsOut;  			nGlobalContactsOut++;  			b3Contact4* c = &globalContactsOut[dstIdx]; @@ -1462,38 +1380,33 @@ void computeContactPlaneConvex(int pairIndex,  			c->setRestituitionCoeff(0.f);  			c->m_batchIdx = pairIndex; -			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; -			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; -			for (int i=0;i<numReducedPoints;i++) +			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; +			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; +			for (int i = 0; i < numReducedPoints; i++)  			{  				b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];  				c->m_worldPosB[i] = pOnB1;  			}  			c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; -		}//if (dstIdx < numPairs) -	}	 -		 - +		}  //if (dstIdx < numPairs) +	} -//	printf("computeContactPlaneConvex\n"); +	//	printf("computeContactPlaneConvex\n");  } - - -B3_FORCE_INLINE b3Vector3	MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) -	{ -			b3Vector3	vecOut; -			vecOut.setValue( -			(b3Scalar)(vecIn[0]) / (quantization.x), -			(b3Scalar)(vecIn[1]) / (quantization.y), -			(b3Scalar)(vecIn[2]) / (quantization.z)); -			vecOut += bvhAabbMin; -			return vecOut; -	} +B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) +{ +	b3Vector3 vecOut; +	vecOut.setValue( +		(b3Scalar)(vecIn[0]) / (quantization.x), +		(b3Scalar)(vecIn[1]) / (quantization.y), +		(b3Scalar)(vecIn[2]) / (quantization.z)); +	vecOut += bvhAabbMin; +	return vecOut; +}  void traverseTreeTree()  { -  }  #include "Bullet3Common/shared/b3Mat3x3.h" @@ -1503,44 +1416,40 @@ int maxNumAabbChecks = 0;  int maxDepth = 0;  // work-in-progress -__kernel void   findCompoundPairsKernel(  +__kernel void findCompoundPairsKernel(  	int pairIndex,  	int bodyIndexA,  	int bodyIndexB,  	int collidableIndexA,  	int collidableIndexB, -	__global const b3RigidBodyData* rigidBodies,  +	__global const b3RigidBodyData* rigidBodies,  	__global const b3Collidable* collidables, -	__global const b3ConvexPolyhedronData* convexShapes,  +	__global const b3ConvexPolyhedronData* convexShapes,  	__global const b3AlignedObjectArray<b3Float4>& vertices,  	__global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace,  	__global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace,  	__global const b3GpuChildShape* gpuChildShapes,  	__global b3Int4* gpuCompoundPairsOut, -	__global  int* numCompoundPairsOut, +	__global int* numCompoundPairsOut,  	int maxNumCompoundPairsCapacity, -	b3AlignedObjectArray<b3QuantizedBvhNode>&	treeNodesCPU, -	b3AlignedObjectArray<b3BvhSubtreeInfo>&	subTreesCPU, -	b3AlignedObjectArray<b3BvhInfo>&	bvhInfoCPU -	) +	b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, +	b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, +	b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)  { -	numAabbChecks=0; -	maxNumAabbChecks=0; -//	int i = pairIndex; +	numAabbChecks = 0; +	maxNumAabbChecks = 0; +	//	int i = pairIndex;  	{ -		 -  		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;  		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; -  		//once the broadphase avoids static-static pairs, we can remove this test -		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) +		if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0))  		{  			return;  		} -		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) +		if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))  		{  			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;  			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; @@ -1548,9 +1457,8 @@ __kernel void   findCompoundPairsKernel(  			int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset;  			int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; -  			int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; -			 +  			float4 posA = rigidBodies[bodyIndexA].m_pos;  			b3Quat ornA = rigidBodies[bodyIndexA].m_quat; @@ -1567,41 +1475,37 @@ __kernel void   findCompoundPairsKernel(  			transB.setOrigin(posB);  			transB.setRotation(ornB); - - -			for (int p=0;p<numSubTreesA;p++) +			for (int p = 0; p < numSubTreesA; p++)  			{ -				b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA+p]; +				b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p];  				//bvhInfoCPU[bvhA].m_quantization -				b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); -				b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); +				b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); +				b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); -				b3Vector3 aabbAMinOut,aabbAMaxOut; -				float margin=0.f; -				b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); +				b3Vector3 aabbAMinOut, aabbAMaxOut; +				float margin = 0.f; +				b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); -				for (int q=0;q<numSubTreesB;q++) +				for (int q = 0; q < numSubTreesB; q++)  				{ -					b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB+q]; +					b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q]; -					b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); -					b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); +					b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); +					b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); -					b3Vector3 aabbBMinOut,aabbBMaxOut; -					float margin=0.f; -					b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); +					b3Vector3 aabbBMinOut, aabbBMaxOut; +					float margin = 0.f; +					b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); -					 -					numAabbChecks=0; -					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); +					numAabbChecks = 0; +					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);  					if (aabbOverlap)  					{ -						 -						int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfoCPU[bvhA].m_nodeOffset; -		//				int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; +						int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset; +						//				int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; -						int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfoCPU[bvhB].m_nodeOffset; -		//				int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; +						int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset; +						//				int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;  						b3AlignedObjectArray<b3Int2> nodeStack;  						b3Int2 node0; @@ -1610,33 +1514,33 @@ __kernel void   findCompoundPairsKernel(  						int maxStackDepth = 1024;  						nodeStack.resize(maxStackDepth); -						int depth=0; -						nodeStack[depth++]=node0; +						int depth = 0; +						nodeStack[depth++] = node0;  						do  						{  							if (depth > maxDepth)  							{ -								maxDepth=depth; -								printf("maxDepth=%d\n",maxDepth); +								maxDepth = depth; +								printf("maxDepth=%d\n", maxDepth);  							}  							b3Int2 node = nodeStack[--depth]; -							 -							b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); -							b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); -							b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); -							b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); +							b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); +							b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); -							float margin=0.f; -							b3Vector3 aabbAMinOut,aabbAMaxOut; -							b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); +							b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); +							b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); -							b3Vector3 aabbBMinOut,aabbBMaxOut; -							b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); +							float margin = 0.f; +							b3Vector3 aabbAMinOut, aabbAMaxOut; +							b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); + +							b3Vector3 aabbBMinOut, aabbBMaxOut; +							b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);  							numAabbChecks++; -							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); +							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);  							if (nodeOverlap)  							{  								bool isLeafA = treeNodesCPU[node.x].isLeafNode(); @@ -1645,23 +1549,23 @@ __kernel void   findCompoundPairsKernel(  								bool isInternalB = !isLeafB;  								//fail, even though it might hit two leaf nodes -								if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) +								if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB))  								{  									b3Error("Error: traversal exceeded maxStackDepth\n");  									continue;  								} -								if(isInternalA) +								if (isInternalA)  								{ -									int nodeAleftChild = node.x+1; -									bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode(); -									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + treeNodesCPU[node.x+1].getEscapeIndex(); +									int nodeAleftChild = node.x + 1; +									bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode(); +									int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex(); -									if(isInternalB) -									{					 -										int nodeBleftChild = node.y+1; -										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); -										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); +									if (isInternalB) +									{ +										int nodeBleftChild = node.y + 1; +										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); +										int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex();  										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);  										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); @@ -1670,90 +1574,83 @@ __kernel void   findCompoundPairsKernel(  									}  									else  									{ -										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); -										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); +										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y); +										nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y);  									}  								}  								else  								{ -									if(isInternalB) +									if (isInternalB)  									{ -										int nodeBleftChild = node.y+1; -										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); -										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); -										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); -										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); +										int nodeBleftChild = node.y + 1; +										bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); +										int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); +										nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild); +										nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild);  									}  									else  									{  										int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); -										if (compoundPairIdx<maxNumCompoundPairsCapacity) +										if (compoundPairIdx < maxNumCompoundPairsCapacity)  										{  											int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex();  											int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex(); -											gpuCompoundPairsOut[compoundPairIdx]  = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); +											gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);  										}  									}  								}  							}  						} while (depth); -						maxNumAabbChecks = b3Max(numAabbChecks,maxNumAabbChecks); +						maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks);  					}  				}  			} -			 +  			return;  		} -		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) +		if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))  		{ - -			if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)  +			if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)  			{ -  				int numChildrenA = collidables[collidableIndexA].m_numChildShapes; -				for (int c=0;c<numChildrenA;c++) +				for (int c = 0; c < numChildrenA; c++)  				{ -					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; +					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c;  					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;  					float4 posA = rigidBodies[bodyIndexA].m_pos;  					b3Quat ornA = rigidBodies[bodyIndexA].m_quat;  					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;  					b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; -					float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; -					b3Quat newOrnA = b3QuatMul(ornA,childOrnA); -					 +					float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; +					b3Quat newOrnA = b3QuatMul(ornA, childOrnA); -					  					b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; -					  					b3Transform transA;  					transA.setIdentity();  					transA.setOrigin(newPosA);  					transA.setRotation(newOrnA); -					b3Scalar margin=0.0f; +					b3Scalar margin = 0.0f; -					b3Vector3 aabbAMinOut,aabbAMaxOut; +					b3Vector3 aabbAMinOut, aabbAMaxOut; -					b3TransformAabb2((const b3Float4&)aabbA.m_min,(const b3Float4&)aabbA.m_max, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); +					b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); -					if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) +					if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)  					{  						int numChildrenB = collidables[collidableIndexB].m_numChildShapes; -						for (int b=0;b<numChildrenB;b++) +						for (int b = 0; b < numChildrenB; b++)  						{ -							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; +							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;  							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;  							b3Quat ornB = rigidBodies[bodyIndexB].m_quat;  							float4 posB = rigidBodies[bodyIndexB].m_pos;  							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;  							b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; -							float4 newPosB = transform(&childPosB,&posB,&ornB); -							b3Quat newOrnB = b3QuatMul(ornB,childOrnB); - -							 +							float4 newPosB = transform(&childPosB, &posB, &ornB); +							b3Quat newOrnB = b3QuatMul(ornB, childOrnB);  							b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; @@ -1762,11 +1659,11 @@ __kernel void   findCompoundPairsKernel(  							transB.setOrigin(newPosB);  							transB.setRotation(newOrnB); -							b3Vector3 aabbBMinOut,aabbBMaxOut; -							b3TransformAabb2((const b3Float4&)aabbB.m_min,(const b3Float4&)aabbB.m_max, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); +							b3Vector3 aabbBMinOut, aabbBMaxOut; +							b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut);  							numAabbChecks++; -							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); +							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut);  							if (aabbOverlap)  							{  								/* @@ -1784,22 +1681,22 @@ __kernel void   findCompoundPairsKernel(  								float4 c1 = transform(&c1local,&posB,&ornB);  								const float4 DeltaC2 = c0 - c1;  								*/ -								{// +								{  //  									int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); -									if (compoundPairIdx<maxNumCompoundPairsCapacity) +									if (compoundPairIdx < maxNumCompoundPairsCapacity)  									{ -										gpuCompoundPairsOut[compoundPairIdx]  = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); +										gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB);  									} -								}// -							}//fi(1) -						} //for (int b=0 -					}//if (collidables[collidableIndexB]. -					else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) +								}  // +							}      //fi(1) +						}          //for (int b=0 +					}              //if (collidables[collidableIndexB]. +					else           //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)  					{  						if (1)  						{ -						//	int numFacesA = convexShapes[shapeIndexA].m_numFaces; -						//	float dmin = FLT_MAX; +							//	int numFacesA = convexShapes[shapeIndexA].m_numFaces; +							//	float dmin = FLT_MAX;  							float4 posA = newPosA;  							posA.w = 0.f;  							float4 posB = rigidBodies[bodyIndexB].m_pos; @@ -1811,45 +1708,43 @@ __kernel void   findCompoundPairsKernel(  							float4 c1local = convexShapes[shapeIndexB].m_localCenter;  							b3Quat ornB = rigidBodies[bodyIndexB].m_quat;  							float4 c1; -							c1 = transform(&c1local,&posB,&ornB); -						//	const float4 DeltaC2 = c0 - c1; +							c1 = transform(&c1local, &posB, &ornB); +							//	const float4 DeltaC2 = c0 - c1;  							{  								int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); -								if (compoundPairIdx<maxNumCompoundPairsCapacity) +								if (compoundPairIdx < maxNumCompoundPairsCapacity)  								{ -									gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,-1); -								}//if (compoundPairIdx<maxNumCompoundPairsCapacity) -							}// -						}//fi (1) -					}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) -				}//for (int b=0;b<numChildrenB;b++)	 +									gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1); +								}  //if (compoundPairIdx<maxNumCompoundPairsCapacity) +							}      // +						}          //fi (1) +					}              //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) +				}                  //for (int b=0;b<numChildrenB;b++)  				return; -			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) -			if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)  -				&& (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) +			}  //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) +			if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS))  			{  				int numChildrenB = collidables[collidableIndexB].m_numChildShapes; -				for (int b=0;b<numChildrenB;b++) +				for (int b = 0; b < numChildrenB; b++)  				{ -					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; +					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b;  					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;  					b3Quat ornB = rigidBodies[bodyIndexB].m_quat;  					float4 posB = rigidBodies[bodyIndexB].m_pos;  					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;  					b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; -					float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; -					b3Quat newOrnB = b3QuatMul(ornB,childOrnB); +					float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; +					b3Quat newOrnB = b3QuatMul(ornB, childOrnB);  					int shapeIndexB = collidables[childColIndexB].m_shapeIndex; -  					//////////////////////////////////////  					if (1)  					{ -					//	int numFacesA = convexShapes[shapeIndexA].m_numFaces; -					//	float dmin = FLT_MAX; +						//	int numFacesA = convexShapes[shapeIndexA].m_numFaces; +						//	float dmin = FLT_MAX;  						float4 posA = rigidBodies[bodyIndexA].m_pos;  						posA.w = 0.f;  						float4 posB = newPosB; @@ -1859,99 +1754,96 @@ __kernel void   findCompoundPairsKernel(  						float4 c0;  						c0 = transform(&c0local, &posA, &ornA);  						float4 c1local = convexShapes[shapeIndexB].m_localCenter; -						b3Quat ornB =newOrnB; +						b3Quat ornB = newOrnB;  						float4 c1; -						c1 = transform(&c1local,&posB,&ornB); -					//	const float4 DeltaC2 = c0 - c1; -						{// +						c1 = transform(&c1local, &posB, &ornB); +						//	const float4 DeltaC2 = c0 - c1; +						{  //  							int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); -							if (compoundPairIdx<maxNumCompoundPairsCapacity) +							if (compoundPairIdx < maxNumCompoundPairsCapacity)  							{ -								gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,-1,childShapeIndexB); -							}//fi (compoundPairIdx<maxNumCompoundPairsCapacity) -						}// -					}//fi (1)	 -				}//for (int b=0;b<numChildrenB;b++) +								gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB); +							}  //fi (compoundPairIdx<maxNumCompoundPairsCapacity) +						}      // +					}          //fi (1) +				}              //for (int b=0;b<numChildrenB;b++)  				return; -			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) +			}  //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)  			return; -		}//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) -	}//i<numPairs +		}  //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) +	}      //i<numPairs  } - - -__kernel void   processCompoundPairsKernel( __global const b3Int4* gpuCompoundPairs, -										__global const b3RigidBodyData* rigidBodies,  -										__global const b3Collidable* collidables, -										__global const b3ConvexPolyhedronData* convexShapes,  -										__global const b3AlignedObjectArray<b3Float4>& vertices, -										__global const b3AlignedObjectArray<b3Float4>& uniqueEdges, -										__global const b3AlignedObjectArray<b3GpuFace>& faces, -										__global const b3AlignedObjectArray<int>& indices, -										__global b3Aabb* aabbs, -										__global const b3GpuChildShape* gpuChildShapes, -										__global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, -										__global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, -										int numCompoundPairs, -										int i -										) +__kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs, +										 __global const b3RigidBodyData* rigidBodies, +										 __global const b3Collidable* collidables, +										 __global const b3ConvexPolyhedronData* convexShapes, +										 __global const b3AlignedObjectArray<b3Float4>& vertices, +										 __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, +										 __global const b3AlignedObjectArray<b3GpuFace>& faces, +										 __global const b3AlignedObjectArray<int>& indices, +										 __global b3Aabb* aabbs, +										 __global const b3GpuChildShape* gpuChildShapes, +										 __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, +										 __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, +										 int numCompoundPairs, +										 int i)  { - -//	int i = get_global_id(0); -	if (i<numCompoundPairs) +	//	int i = get_global_id(0); +	if (i < numCompoundPairs)  	{  		int bodyIndexA = gpuCompoundPairs[i].x;  		int bodyIndexB = gpuCompoundPairs[i].y;  		int childShapeIndexA = gpuCompoundPairs[i].z;  		int childShapeIndexB = gpuCompoundPairs[i].w; -		 +  		int collidableIndexA = -1;  		int collidableIndexB = -1; -		 +  		b3Quat ornA = rigidBodies[bodyIndexA].m_quat;  		float4 posA = rigidBodies[bodyIndexA].m_pos; -		 +  		b3Quat ornB = rigidBodies[bodyIndexB].m_quat;  		float4 posB = rigidBodies[bodyIndexB].m_pos; -							 +  		if (childShapeIndexA >= 0)  		{  			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;  			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; -			b3Quat	childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; -			float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; -			b3Quat newOrnA = b3QuatMul(ornA,childOrnA); +			b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; +			float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; +			b3Quat newOrnA = b3QuatMul(ornA, childOrnA);  			posA = newPosA;  			ornA = newOrnA; -		} else +		} +		else  		{  			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;  		} -		 -		if (childShapeIndexB>=0) + +		if (childShapeIndexB >= 0)  		{  			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;  			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;  			b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; -			float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; -			b3Quat newOrnB = b3QuatMul(ornB,childOrnB); +			float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; +			b3Quat newOrnB = b3QuatMul(ornB, childOrnB);  			posB = newPosB;  			ornB = newOrnB; -		} else +		} +		else  		{ -			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	 +			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;  		} -	 +  		gpuHasCompoundSepNormalsOut[i] = 0; -	 +  		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;  		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; -	 +  		int shapeTypeA = collidables[collidableIndexA].m_shapeType;  		int shapeTypeB = collidables[collidableIndexB].m_shapeType; -	  		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))  		{ @@ -1959,145 +1851,142 @@ __kernel void   processCompoundPairsKernel( __global const b3Int4* gpuCompoundPa  		}  		int hasSeparatingAxis = 5; -							 -	//	int numFacesA = convexShapes[shapeIndexA].m_numFaces; + +		//	int numFacesA = convexShapes[shapeIndexA].m_numFaces;  		float dmin = FLT_MAX;  		posA.w = 0.f;  		posB.w = 0.f;  		float4 c0local = convexShapes[shapeIndexA].m_localCenter;  		float4 c0 = transform(&c0local, &posA, &ornA);  		float4 c1local = convexShapes[shapeIndexB].m_localCenter; -		float4 c1 = transform(&c1local,&posB,&ornB); +		float4 c1 = transform(&c1local, &posB, &ornB);  		const float4 DeltaC2 = c0 - c1; -		float4 sepNormal = make_float4(1,0,0,0); -//		bool sepA = findSeparatingAxis(	convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); -		bool sepA = findSeparatingAxis(	convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); -	 +		float4 sepNormal = make_float4(1, 0, 0, 0); +		//		bool sepA = findSeparatingAxis(	convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); +		bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal);  //,&dmin); +  		hasSeparatingAxis = 4;  		if (!sepA)  		{  			hasSeparatingAxis = 0; -		} else +		} +		else  		{ -			bool sepB = findSeparatingAxis(	convexShapes[shapeIndexB],convexShapes[shapeIndexA],posB,ornB,posA,ornA,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); +			bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal);  //,&dmin);  			if (!sepB)  			{  				hasSeparatingAxis = 0; -			} else//(!sepB) +			} +			else  //(!sepB)  			{ -				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); +				bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin);  				if (sepEE)  				{ -						gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); -						gpuHasCompoundSepNormalsOut[i] = 1; -				}//sepEE -			}//(!sepB) -		}//(!sepA) -		 -		 +					gpuCompoundSepNormalsOut[i] = sepNormal;  //fastNormalize4(sepNormal); +					gpuHasCompoundSepNormalsOut[i] = 1; +				}  //sepEE +			}      //(!sepB) +		}          //(!sepA)  	} -		  } - -__kernel void   clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundPairs,  -																					__global const b3RigidBodyData* rigidBodies,  -																					__global const b3Collidable* collidables, -																					__global const b3ConvexPolyhedronData* convexShapes,  -																					__global const b3AlignedObjectArray<b3Float4>& vertices, -																					__global const b3AlignedObjectArray<b3Float4>& uniqueEdges, -																					__global const b3AlignedObjectArray<b3GpuFace>& faces, -																					__global const b3AlignedObjectArray<int>& indices, -																					__global const b3GpuChildShape* gpuChildShapes, -																					__global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, -																					__global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, -																					__global struct b3Contact4Data* globalContactsOut, -																					int* nGlobalContactsOut, -																					int numCompoundPairs, int maxContactCapacity, int i) +__kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs, +										  __global const b3RigidBodyData* rigidBodies, +										  __global const b3Collidable* collidables, +										  __global const b3ConvexPolyhedronData* convexShapes, +										  __global const b3AlignedObjectArray<b3Float4>& vertices, +										  __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, +										  __global const b3AlignedObjectArray<b3GpuFace>& faces, +										  __global const b3AlignedObjectArray<int>& indices, +										  __global const b3GpuChildShape* gpuChildShapes, +										  __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, +										  __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, +										  __global struct b3Contact4Data* globalContactsOut, +										  int* nGlobalContactsOut, +										  int numCompoundPairs, int maxContactCapacity, int i)  { - -//	int i = get_global_id(0); +	//	int i = get_global_id(0);  	int pairIndex = i; -	 +  	float4 worldVertsB1[64];  	float4 worldVertsB2[64]; -	int capacityWorldVerts = 64;	 +	int capacityWorldVerts = 64;  	float4 localContactsOut[64]; -	int localContactCapacity=64; -	 +	int localContactCapacity = 64; +  	float minDist = -1e30f;  	float maxDist = 0.0f; -	if (i<numCompoundPairs) +	if (i < numCompoundPairs)  	{ -  		if (gpuHasCompoundSepNormalsOut[i])  		{ -  			int bodyIndexA = gpuCompoundPairs[i].x;  			int bodyIndexB = gpuCompoundPairs[i].y; -			 +  			int childShapeIndexA = gpuCompoundPairs[i].z;  			int childShapeIndexB = gpuCompoundPairs[i].w; -			 +  			int collidableIndexA = -1;  			int collidableIndexB = -1; -			 +  			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;  			float4 posA = rigidBodies[bodyIndexA].m_pos; -			 +  			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;  			float4 posB = rigidBodies[bodyIndexB].m_pos; -								 +  			if (childShapeIndexA >= 0)  			{  				collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;  				float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;  				b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; -				float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; -				b3Quat newOrnA = b3QuatMul(ornA,childOrnA); +				float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; +				b3Quat newOrnA = b3QuatMul(ornA, childOrnA);  				posA = newPosA;  				ornA = newOrnA; -			} else +			} +			else  			{  				collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;  			} -			 -			if (childShapeIndexB>=0) + +			if (childShapeIndexB >= 0)  			{  				collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;  				float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; -				b3Quat  childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; -				float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; -				b3Quat  newOrnB = b3QuatMul(ornB,childOrnB); +				b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; +				float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; +				b3Quat newOrnB = b3QuatMul(ornB, childOrnB);  				posB = newPosB;  				ornB = newOrnB; -			} else +			} +			else  			{ -				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	 +				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;  			} -			 +  			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;  			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; -		 +  			int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], -														convexShapes[shapeIndexA], convexShapes[shapeIndexB], -														posA,ornA, -													  posB,ornB, -													  worldVertsB1,worldVertsB2,capacityWorldVerts, -														minDist, maxDist, -														vertices,faces,indices, -														vertices,faces,indices, -														localContactsOut,localContactCapacity); -												 -		if (numLocalContactsOut>0) -		{ +														  convexShapes[shapeIndexA], convexShapes[shapeIndexB], +														  posA, ornA, +														  posB, ornB, +														  worldVertsB1, worldVertsB2, capacityWorldVerts, +														  minDist, maxDist, +														  vertices, faces, indices, +														  vertices, faces, indices, +														  localContactsOut, localContactCapacity); + +			if (numLocalContactsOut > 0) +			{  				float4 normal = -gpuCompoundSepNormalsOut[i];  				int nPoints = numLocalContactsOut;  				float4* pointsIn = localContactsOut; -				b3Int4 contactIdx;// = {-1,-1,-1,-1}; +				b3Int4 contactIdx;  // = {-1,-1,-1,-1};  				contactIdx.s[0] = 0;  				contactIdx.s[1] = 1; @@ -2105,111 +1994,106 @@ __kernel void   clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundP  				contactIdx.s[3] = 3;  				int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); -		 +  				int dstIdx; -				dstIdx = b3AtomicInc( nGlobalContactsOut); -				if ((dstIdx+nReducedContacts) < maxContactCapacity) +				dstIdx = b3AtomicInc(nGlobalContactsOut); +				if ((dstIdx + nReducedContacts) < maxContactCapacity)  				{ -					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx; +					__global struct b3Contact4Data* c = globalContactsOut + dstIdx;  					c->m_worldNormalOnB = -normal; -					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); +					c->m_restituitionCoeffCmp = (0.f * 0xffff); +					c->m_frictionCoeffCmp = (0.7f * 0xffff);  					c->m_batchIdx = pairIndex;  					int bodyA = gpuCompoundPairs[pairIndex].x;  					int bodyB = gpuCompoundPairs[pairIndex].y; -					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; -					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; +					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA; +					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB;  					c->m_childIndexA = childShapeIndexA;  					c->m_childIndexB = childShapeIndexB; -					for (int i=0;i<nReducedContacts;i++) +					for (int i = 0; i < nReducedContacts; i++)  					{  						c->m_worldPosB[i] = pointsIn[contactIdx.s[i]];  					} -					b3Contact4Data_setNumPoints(c,nReducedContacts); +					b3Contact4Data_setNumPoints(c, nReducedContacts);  				} -				 -			}//		if (numContactsOut>0) -		}//		if (gpuHasCompoundSepNormalsOut[i]) -	}//	if (i<numCompoundPairs) +			}  //		if (numContactsOut>0) +		}      //		if (gpuHasCompoundSepNormalsOut[i]) +	}          //	if (i<numCompoundPairs)  } -  void computeContactCompoundCompound(int pairIndex, -																int bodyIndexA, int bodyIndexB,  -																int collidableIndexA, int collidableIndexB,  -																const b3RigidBodyData* rigidBodies,  -																const b3Collidable* collidables, -																const b3ConvexPolyhedronData* convexShapes, -																const b3GpuChildShape* cpuChildShapes, -																const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, -																const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, - -																const b3AlignedObjectArray<b3Vector3>& convexVertices, -																const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, -																const b3AlignedObjectArray<int>& convexIndices, -																const b3AlignedObjectArray<b3GpuFace>& faces, -																 -																b3Contact4* globalContactsOut, -																int& nGlobalContactsOut, -																int maxContactCapacity, -																b3AlignedObjectArray<b3QuantizedBvhNode>&	treeNodesCPU, -																b3AlignedObjectArray<b3BvhSubtreeInfo>&	subTreesCPU, -																b3AlignedObjectArray<b3BvhInfo>&	bvhInfoCPU -																) +									int bodyIndexA, int bodyIndexB, +									int collidableIndexA, int collidableIndexB, +									const b3RigidBodyData* rigidBodies, +									const b3Collidable* collidables, +									const b3ConvexPolyhedronData* convexShapes, +									const b3GpuChildShape* cpuChildShapes, +									const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, +									const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, + +									const b3AlignedObjectArray<b3Vector3>& convexVertices, +									const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, +									const b3AlignedObjectArray<int>& convexIndices, +									const b3AlignedObjectArray<b3GpuFace>& faces, + +									b3Contact4* globalContactsOut, +									int& nGlobalContactsOut, +									int maxContactCapacity, +									b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, +									b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, +									b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU)  { -  	int shapeTypeB = collidables[collidableIndexB].m_shapeType;  	b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS);  	b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; -	int numCompoundPairsOut=0; -	int maxNumCompoundPairsCapacity = 8192;//1024; +	int numCompoundPairsOut = 0; +	int maxNumCompoundPairsCapacity = 8192;  //1024;  	cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity);  	// work-in-progress -	findCompoundPairsKernel(  -							pairIndex, -							bodyIndexA,bodyIndexB, -							collidableIndexA,collidableIndexB, -							rigidBodies,  -							collidables, -							convexShapes,  -							convexVertices, -							hostAabbsWorldSpace, -							hostAabbsLocalSpace, -							cpuChildShapes, -							&cpuCompoundPairsOut[0], -							&numCompoundPairsOut, -							maxNumCompoundPairsCapacity	, -							treeNodesCPU, -							subTreesCPU, -							bvhInfoCPU -							); - -	printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); -	if (numCompoundPairsOut>maxNumCompoundPairsCapacity) +	findCompoundPairsKernel( +		pairIndex, +		bodyIndexA, bodyIndexB, +		collidableIndexA, collidableIndexB, +		rigidBodies, +		collidables, +		convexShapes, +		convexVertices, +		hostAabbsWorldSpace, +		hostAabbsLocalSpace, +		cpuChildShapes, +		&cpuCompoundPairsOut[0], +		&numCompoundPairsOut, +		maxNumCompoundPairsCapacity, +		treeNodesCPU, +		subTreesCPU, +		bvhInfoCPU); + +	printf("maxNumAabbChecks=%d\n", maxNumAabbChecks); +	if (numCompoundPairsOut > maxNumCompoundPairsCapacity)  	{ -		b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); -		numCompoundPairsOut=maxNumCompoundPairsCapacity; +		b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity); +		numCompoundPairsOut = maxNumCompoundPairsCapacity;  	}  	b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut;  	b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut;  	cpuCompoundSepNormalsOut.resize(numCompoundPairsOut);  	cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); -	for (int i=0;i<numCompoundPairsOut;i++) +	for (int i = 0; i < numCompoundPairsOut; i++)  	{ - -		processCompoundPairsKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,0,cpuChildShapes, -			cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,numCompoundPairsOut,i); +		processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes, +								   cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i);  	} -	for (int i=0;i<numCompoundPairsOut;i++) +	for (int i = 0; i < numCompoundPairsOut; i++)  	{ -		clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,cpuChildShapes, -			cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,globalContactsOut,&nGlobalContactsOut,numCompoundPairsOut,maxContactCapacity,i); +		clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes, +									cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i);  	} -		/* +	/*  		int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;  					float4 posA = rigidBodies[bodyIndexA].m_pos; @@ -2235,7 +2119,6 @@ void computeContactCompoundCompound(int pairIndex,  							);  							*/ -	  	/*  	if (foundSepAxis)  	{ @@ -2271,8 +2154,8 @@ void computeContactCompoundCompound(int pairIndex,  	}  	*/ -//	return contactIndex; -	 +	//	return contactIndex; +  	/*  	int numChildrenB = collidables[collidableIndexB].m_numChildShapes; @@ -2294,56 +2177,52 @@ void computeContactCompoundCompound(int pairIndex,  	}  	*/ -  }  void computeContactPlaneCompound(int pairIndex, -																int bodyIndexA, int bodyIndexB,  -																int collidableIndexA, int collidableIndexB,  -																const b3RigidBodyData* rigidBodies,  -																const b3Collidable* collidables, -																const b3ConvexPolyhedronData* convexShapes, -																const b3GpuChildShape* cpuChildShapes, -																const b3Vector3* convexVertices, -																const int* convexIndices, -																const b3GpuFace* faces, -																 -																b3Contact4* globalContactsOut, -																int& nGlobalContactsOut, -																int maxContactCapacity) +								 int bodyIndexA, int bodyIndexB, +								 int collidableIndexA, int collidableIndexB, +								 const b3RigidBodyData* rigidBodies, +								 const b3Collidable* collidables, +								 const b3ConvexPolyhedronData* convexShapes, +								 const b3GpuChildShape* cpuChildShapes, +								 const b3Vector3* convexVertices, +								 const int* convexIndices, +								 const b3GpuFace* faces, + +								 b3Contact4* globalContactsOut, +								 int& nGlobalContactsOut, +								 int maxContactCapacity)  { -  	int shapeTypeB = collidables[collidableIndexB].m_shapeType;  	b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); -  	int numChildrenB = collidables[collidableIndexB].m_numChildShapes; -	for (int c=0;c<numChildrenB;c++) +	for (int c = 0; c < numChildrenB; c++)  	{ -		int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; +		int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c;  		int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex;  		float4 rootPosB = rigidBodies[bodyIndexB].m_pos;  		b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat;  		b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition;  		b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; -		float4  posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; -		b3Quaternion ornB = rootOrnB*childOrnB;//b3QuatMul(ornB,childOrnB); +		float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB; +		b3Quaternion ornB = rootOrnB * childOrnB;  //b3QuatMul(ornB,childOrnB);  		int shapeIndexB = collidables[childColIndexB].m_shapeIndex;  		const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; -	 -		 +  		b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;  		b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; -	//	int numContactsOut = 0; -	//	int numWorldVertsB1= 0; +		//	int numContactsOut = 0; +		//	int numWorldVertsB1= 0;  		b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; -		b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); -		b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); +		b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); +		b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal);  		float planeConstant = planeEq.w;  		b3Transform convexWorldTransform;  		convexWorldTransform.setIdentity(); @@ -2355,16 +2234,16 @@ void computeContactPlaneCompound(int pairIndex,  		planeTransform.setRotation(ornA);  		b3Transform planeInConvex; -		planeInConvex= convexWorldTransform.inverse() * planeTransform; +		planeInConvex = convexWorldTransform.inverse() * planeTransform;  		b3Transform convexInPlane;  		convexInPlane = planeTransform.inverse() * convexWorldTransform; -	 -		b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; + +		b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal;  		float maxDot = -1e30; -		int hitVertex=-1; +		int hitVertex = -1;  		b3Vector3 hitVtx; -	#define MAX_PLANE_CONVEX_POINTS 64 +#define MAX_PLANE_CONVEX_POINTS 64  		b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS];  		int numPoints = 0; @@ -2374,54 +2253,52 @@ void computeContactPlaneCompound(int pairIndex,  		contactIdx.s[1] = 1;  		contactIdx.s[2] = 2;  		contactIdx.s[3] = 3; -	 -		for (int i=0;i<hullB->m_numVertices;i++) + +		for (int i = 0; i < hullB->m_numVertices; i++)  		{ -			b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; +			b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i];  			float curDot = vtx.dot(planeNormalInConvex); - -			if (curDot>maxDot) +			if (curDot > maxDot)  			{ -				hitVertex=i; -				maxDot=curDot; +				hitVertex = i; +				maxDot = curDot;  				hitVtx = vtx;  				//make sure the deepest points is always included -				if (numPoints==MAX_PLANE_CONVEX_POINTS) +				if (numPoints == MAX_PLANE_CONVEX_POINTS)  					numPoints--;  			} -			if (numPoints<MAX_PLANE_CONVEX_POINTS) +			if (numPoints < MAX_PLANE_CONVEX_POINTS)  			{ -				b3Vector3 vtxWorld = convexWorldTransform*vtx; -				b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; -				float dist = planeNormal.dot(vtxInPlane)-planeConstant; -				if (dist<0.f) +				b3Vector3 vtxWorld = convexWorldTransform * vtx; +				b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; +				float dist = planeNormal.dot(vtxInPlane) - planeConstant; +				if (dist < 0.f)  				{  					vtxWorld.w = dist;  					contactPoints[numPoints] = vtxWorld;  					numPoints++;  				}  			} -  		} -		int numReducedPoints  = 0; +		int numReducedPoints = 0;  		numReducedPoints = numPoints; -	 -		if (numPoints>4) + +		if (numPoints > 4)  		{ -			numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); +			numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx);  		}  		int dstIdx; -	//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); -		 -		if (numReducedPoints>0) +		//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + +		if (numReducedPoints > 0)  		{  			if (nGlobalContactsOut < maxContactCapacity)  			{ -				dstIdx=nGlobalContactsOut; +				dstIdx = nGlobalContactsOut;  				nGlobalContactsOut++;  				b3Contact4* c = &globalContactsOut[dstIdx]; @@ -2430,48 +2307,37 @@ void computeContactPlaneCompound(int pairIndex,  				c->setRestituitionCoeff(0.f);  				c->m_batchIdx = pairIndex; -				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; -				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; -				for (int i=0;i<numReducedPoints;i++) +				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; +				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; +				for (int i = 0; i < numReducedPoints; i++)  				{  					b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]];  					c->m_worldPosB[i] = pOnB1;  				}  				c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; -			}//if (dstIdx < numPairs) -		}	 -		 +			}  //if (dstIdx < numPairs) +		}  	} - -	  } - - - - -void	computeContactSphereConvex(int pairIndex, -																int bodyIndexA, int bodyIndexB,  -																int collidableIndexA, int collidableIndexB,  -																const b3RigidBodyData* rigidBodies,  -																const b3Collidable* collidables, -																const b3ConvexPolyhedronData* convexShapes, -																const b3Vector3* convexVertices, -																const int* convexIndices, -																const b3GpuFace* faces, -																b3Contact4* globalContactsOut, -																int& nGlobalContactsOut, -																int maxContactCapacity) +void computeContactSphereConvex(int pairIndex, +								int bodyIndexA, int bodyIndexB, +								int collidableIndexA, int collidableIndexB, +								const b3RigidBodyData* rigidBodies, +								const b3Collidable* collidables, +								const b3ConvexPolyhedronData* convexShapes, +								const b3Vector3* convexVertices, +								const int* convexIndices, +								const b3GpuFace* faces, +								b3Contact4* globalContactsOut, +								int& nGlobalContactsOut, +								int maxContactCapacity)  { -  	float radius = collidables[collidableIndexA].m_radius;  	float4 spherePos1 = rigidBodies[bodyIndexA].m_pos;  	b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; - -  	float4 pos = rigidBodies[bodyIndexB].m_pos; -	  	b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; @@ -2487,64 +2353,65 @@ void	computeContactSphereConvex(int pairIndex,  	int shapeIndex = collidables[collidableIndex].m_shapeIndex;  	int numFaces = convexShapes[shapeIndex].m_numFaces;  	float4 closestPnt = b3MakeVector3(0, 0, 0, 0); -//	float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); -	float minDist = -1000000.f; // TODO: What is the largest/smallest float? +	//	float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); +	float minDist = -1000000.f;  // TODO: What is the largest/smallest float?  	bool bCollide = true;  	int region = -1;  	float4 localHitNormal; -	for ( int f = 0; f < numFaces; f++ ) +	for (int f = 0; f < numFaces; f++)  	{ -		b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; +		b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f];  		float4 planeEqn; -		float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); -		float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); +		float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f); +		float4 n1 = localPlaneNormal;  //quatRotate(quat,localPlaneNormal);  		planeEqn = n1;  		planeEqn[3] = face.m_plane.w;  		float4 pntReturn;  		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); -		if ( dist > radius) +		if (dist > radius)  		{  			bCollide = false;  			break;  		} -		if ( dist > 0 ) +		if (dist > 0)  		{  			//might hit an edge or vertex  			b3Vector3 out;  			bool isInPoly = IsPointInPolygon(spherePos, -					&face, -					&convexVertices[convexShapes[shapeIndex].m_vertexOffset], -					convexIndices, -                    &out); +											 &face, +											 &convexVertices[convexShapes[shapeIndex].m_vertexOffset], +											 convexIndices, +											 &out);  			if (isInPoly)  			{ -				if (dist>minDist) +				if (dist > minDist)  				{  					minDist = dist;  					closestPnt = pntReturn;  					localHitNormal = planeEqn; -					region=1; +					region = 1;  				} -			} else +			} +			else  			{ -				b3Vector3 tmp = spherePos-out; +				b3Vector3 tmp = spherePos - out;  				b3Scalar l2 = tmp.length2(); -				if (l2<radius*radius) +				if (l2 < radius * radius)  				{ -					dist  = b3Sqrt(l2); -					if (dist>minDist) +					dist = b3Sqrt(l2); +					if (dist > minDist)  					{  						minDist = dist;  						closestPnt = out; -						localHitNormal = tmp/dist; -						region=2; +						localHitNormal = tmp / dist; +						region = 2;  					} -					 -				} else +				} +				else  				{  					bCollide = false;  					break; @@ -2553,12 +2420,12 @@ void	computeContactSphereConvex(int pairIndex,  		}  		else  		{ -			if ( dist > minDist ) +			if (dist > minDist)  			{  				minDist = dist;  				closestPnt = pntReturn;  				localHitNormal = planeEqn; -				region=3; +				region = 3;  			}  		}  	} @@ -2567,128 +2434,113 @@ void	computeContactSphereConvex(int pairIndex,  	if (bCollide && minDist > -10000)  	{ -		 -		float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld; +		float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal;  //-hitNormalWorld;  		float4 pOnB1 = tr(closestPnt);  		//printf("dist ,%f,",minDist); -		float actualDepth = minDist-radius; -		if (actualDepth<0) +		float actualDepth = minDist - radius; +		if (actualDepth < 0)  		{ -		//printf("actualDepth = ,%f,", actualDepth); -		//printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); -		//printf("region=,%d,\n", region); -		pOnB1[3] = actualDepth; +			//printf("actualDepth = ,%f,", actualDepth); +			//printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); +			//printf("region=,%d,\n", region); +			pOnB1[3] = actualDepth; -		int dstIdx; -//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); -		 -		if (nGlobalContactsOut < maxContactCapacity) -		{ -			dstIdx=nGlobalContactsOut; -			nGlobalContactsOut++; +			int dstIdx; +			//    dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); -			b3Contact4* c = &globalContactsOut[dstIdx]; -			c->m_worldNormalOnB = normalOnSurfaceB1; -			c->setFrictionCoeff(0.7); -			c->setRestituitionCoeff(0.f); +			if (nGlobalContactsOut < maxContactCapacity) +			{ +				dstIdx = nGlobalContactsOut; +				nGlobalContactsOut++; -			c->m_batchIdx = pairIndex; -			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; -			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; -			c->m_worldPosB[0] = pOnB1; -			int numPoints = 1; -			c->m_worldNormalOnB.w = (b3Scalar)numPoints; -		}//if (dstIdx < numPairs) +				b3Contact4* c = &globalContactsOut[dstIdx]; +				c->m_worldNormalOnB = normalOnSurfaceB1; +				c->setFrictionCoeff(0.7); +				c->setRestituitionCoeff(0.f); + +				c->m_batchIdx = pairIndex; +				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; +				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; +				c->m_worldPosB[0] = pOnB1; +				int numPoints = 1; +				c->m_worldNormalOnB.w = (b3Scalar)numPoints; +			}  //if (dstIdx < numPairs)  		} -	}//if (hasCollision) -	 +	}  //if (hasCollision)  } - - -  int computeContactConvexConvex2( -																int pairIndex, -																int bodyIndexA, int bodyIndexB,  -																int collidableIndexA, int collidableIndexB,  -																const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies,  -																const b3AlignedObjectArray<b3Collidable>& collidables, -																const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, -																const b3AlignedObjectArray<b3Vector3>& convexVertices, -																const b3AlignedObjectArray<b3Vector3>& uniqueEdges, -																const b3AlignedObjectArray<int>& convexIndices, -																const b3AlignedObjectArray<b3GpuFace>& faces, -																b3AlignedObjectArray<b3Contact4>& globalContactsOut, -																int& nGlobalContactsOut, -																int maxContactCapacity, -																const b3AlignedObjectArray<b3Contact4>& oldContacts -																) +	int pairIndex, +	int bodyIndexA, int bodyIndexB, +	int collidableIndexA, int collidableIndexB, +	const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, +	const b3AlignedObjectArray<b3Collidable>& collidables, +	const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, +	const b3AlignedObjectArray<b3Vector3>& convexVertices, +	const b3AlignedObjectArray<b3Vector3>& uniqueEdges, +	const b3AlignedObjectArray<int>& convexIndices, +	const b3AlignedObjectArray<b3GpuFace>& faces, +	b3AlignedObjectArray<b3Contact4>& globalContactsOut, +	int& nGlobalContactsOut, +	int maxContactCapacity, +	const b3AlignedObjectArray<b3Contact4>& oldContacts)  {  	int contactIndex = -1;  	b3Vector3 posA = rigidBodies[bodyIndexA].m_pos;  	b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat;  	b3Vector3 posB = rigidBodies[bodyIndexB].m_pos;  	b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; -	  	b3ConvexPolyhedronData hullA, hullB; -     +  	b3Vector3 sepNormalWorldSpace; -	 +	b3Collidable colA = collidables[collidableIndexA]; +	hullA = convexShapes[colA.m_shapeIndex]; +	//printf("numvertsA = %d\n",hullA.m_numVertices); -    b3Collidable colA = collidables[collidableIndexA]; -    hullA = convexShapes[colA.m_shapeIndex]; -    //printf("numvertsA = %d\n",hullA.m_numVertices); -     -     -    b3Collidable colB = collidables[collidableIndexB]; -    hullB = convexShapes[colB.m_shapeIndex]; -    //printf("numvertsB = %d\n",hullB.m_numVertices); +	b3Collidable colB = collidables[collidableIndexB]; +	hullB = convexShapes[colB.m_shapeIndex]; +	//printf("numvertsB = %d\n",hullB.m_numVertices); -//	int contactCapacity = MAX_VERTS; +	//	int contactCapacity = MAX_VERTS;  	//int numContactsOut=0; -  #ifdef _WIN32  	b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x));  	b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x));  #endif -	 -		bool foundSepAxis = findSeparatingAxis(hullA,hullB, -							posA, -							ornA, -							posB, -							ornB, -							convexVertices,uniqueEdges,faces,convexIndices, -							convexVertices,uniqueEdges,faces,convexIndices, -							 -							sepNormalWorldSpace -							); +	bool foundSepAxis = findSeparatingAxis(hullA, hullB, +										   posA, +										   ornA, +										   posB, +										   ornB, + +										   convexVertices, uniqueEdges, faces, convexIndices, +										   convexVertices, uniqueEdges, faces, convexIndices, + +										   sepNormalWorldSpace); -	  	if (foundSepAxis)  	{ -		 -		  		contactIndex = clipHullHullSingle(  			bodyIndexA, bodyIndexB, -						   posA,ornA, -						   posB,ornB, +			posA, ornA, +			posB, ornB,  			collidableIndexA, collidableIndexB, -			&rigidBodies,  +			&rigidBodies,  			&globalContactsOut,  			nGlobalContactsOut, -			 +  			convexShapes,  			convexShapes, -	 -			convexVertices,  -			uniqueEdges,  + +			convexVertices, +			uniqueEdges,  			faces,  			convexIndices, -	 +  			convexVertices,  			uniqueEdges,  			faces, @@ -2698,50 +2550,42 @@ int computeContactConvexConvex2(  			collidables,  			sepNormalWorldSpace,  			maxContactCapacity); -			  	}  	return contactIndex;  } - - - - -																 -																 -void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, -			const b3OpenCLArray<b3RigidBodyData>* bodyBuf, -			b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, -			const b3OpenCLArray<b3Contact4>* oldContacts, -			int maxContactCapacity, -			int compoundPairCapacity, -			const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, -			const b3OpenCLArray<b3Vector3>& gpuVertices, -			const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, -			const b3OpenCLArray<b3GpuFace>& gpuFaces, -			const b3OpenCLArray<int>& gpuIndices, -			const b3OpenCLArray<b3Collidable>& gpuCollidables, -			const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - -			const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, -			const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - -            b3OpenCLArray<b3Vector3>& worldVertsB1GPU, -            b3OpenCLArray<b3Int4>& clippingFacesOutGPU, -            b3OpenCLArray<b3Vector3>& worldNormalsAGPU, -            b3OpenCLArray<b3Vector3>& worldVertsA1GPU, -            b3OpenCLArray<b3Vector3>& worldVertsB2GPU,     -			b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, -			b3OpenCLArray<b3QuantizedBvhNode>*	treeNodesGPU, -			b3OpenCLArray<b3BvhSubtreeInfo>*	subTreesGPU, -			b3OpenCLArray<b3BvhInfo>*	bvhInfo, - -			int numObjects, -			int maxTriConvexPairCapacity, -			b3OpenCLArray<b3Int4>& triangleConvexPairsOut, -			int& numTriConvexPairsOut -			) +void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs, +														const b3OpenCLArray<b3RigidBodyData>* bodyBuf, +														b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, +														const b3OpenCLArray<b3Contact4>* oldContacts, +														int maxContactCapacity, +														int compoundPairCapacity, +														const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, +														const b3OpenCLArray<b3Vector3>& gpuVertices, +														const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, +														const b3OpenCLArray<b3GpuFace>& gpuFaces, +														const b3OpenCLArray<int>& gpuIndices, +														const b3OpenCLArray<b3Collidable>& gpuCollidables, +														const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, + +														const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, +														const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, + +														b3OpenCLArray<b3Vector3>& worldVertsB1GPU, +														b3OpenCLArray<b3Int4>& clippingFacesOutGPU, +														b3OpenCLArray<b3Vector3>& worldNormalsAGPU, +														b3OpenCLArray<b3Vector3>& worldVertsA1GPU, +														b3OpenCLArray<b3Vector3>& worldVertsB2GPU, +														b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, +														b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, +														b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, +														b3OpenCLArray<b3BvhInfo>* bvhInfo, + +														int numObjects, +														int maxTriConvexPairCapacity, +														b3OpenCLArray<b3Int4>& triangleConvexPairsOut, +														int& numTriConvexPairsOut)  {  	myframecount++; @@ -2750,14 +2594,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  #ifdef CHECK_ON_HOST - -	b3AlignedObjectArray<b3QuantizedBvhNode>	treeNodesCPU; +	b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU;  	treeNodesGPU->copyToHost(treeNodesCPU); -	b3AlignedObjectArray<b3BvhSubtreeInfo>	subTreesCPU; +	b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU;  	subTreesGPU->copyToHost(subTreesCPU); -	b3AlignedObjectArray<b3BvhInfo>	bvhInfoCPU; +	b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU;  	bvhInfo->copyToHost(bvhInfoCPU);  	b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; @@ -2772,8 +2615,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  	b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;  	bodyBuf->copyToHost(hostBodyBuf); -	 -  	b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData;  	convexData.copyToHost(hostConvexData); @@ -2788,10 +2629,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  	gpuIndices.copyToHost(hostIndices);  	b3AlignedObjectArray<b3Collidable> hostCollidables;  	gpuCollidables.copyToHost(hostCollidables); -	 +  	b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes;  	gpuChildShapes.copyToHost(cpuChildShapes); -	  	b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs; @@ -2802,16 +2642,15 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  	}  	b3AlignedObjectArray<b3Contact4> oldHostContacts; -	 +  	if (oldContacts->size())  	{  		oldContacts->copyToHost(oldHostContacts);  	} -  	hostContacts.resize(maxContactCapacity); -	for (int i=0;i<nPairs;i++) +	for (int i = 0; i < nPairs; i++)  	{  		int bodyIndexA = hostPairs[i].x;  		int bodyIndexB = hostPairs[i].y; @@ -2821,84 +2660,73 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)  		{ -			computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], -				&hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); +			computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], +									   &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);  		}  		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)  		{ -			computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], -				&hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); +			computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], +									   &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity);  			//printf("convex-sphere\n"); -			  		}  		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)  		{ -			computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], -			&hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -//			printf("convex-plane\n"); -			 +			computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], +									  &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); +			//			printf("convex-plane\n");  		}  		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)  		{ -			computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], -			&hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -//			printf("plane-convex\n"); -			 +			computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], +									  &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); +			//			printf("plane-convex\n");  		} -			if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && +		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)  		{ -			computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], -			&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0], -			nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU);	 -//			printf("convex-plane\n"); -			 +			computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], +										   &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0], +										   nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU); +			//			printf("convex-plane\n");  		} - -				if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && +		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE)  		{ -			computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], -			&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -//			printf("convex-plane\n"); -			 +			computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], +										&hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); +			//			printf("convex-plane\n");  		}  		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)  		{ -			computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], -			&hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -//			printf("plane-convex\n"); -			 +			computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], +										&hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); +			//			printf("plane-convex\n");  		}  		if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&  			hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)  		{  			//printf("hostPairs[i].z=%d\n",hostPairs[i].z); -			int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); +			int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts);  			//int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - -			if (contactIndex>=0) +			if (contactIndex >= 0)  			{ -//				printf("convex convex contactIndex = %d\n",contactIndex); +				//				printf("convex convex contactIndex = %d\n",contactIndex);  				hostPairs[i].z = contactIndex;  			} -//			printf("plane-convex\n"); -			 +			//			printf("plane-convex\n");  		} - -  	}  	if (hostPairs.size()) @@ -2908,81 +2736,76 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  	hostContacts.resize(nContacts);  	if (nContacts) -		{ -			 -			contactOut->copyFromHost(hostContacts); -		} else +	{ +		contactOut->copyFromHost(hostContacts); +	} +	else  	{  		contactOut->resize(0); -		} +	} -		m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -		//printf("(HOST) nContacts = %d\n",nContacts); +	m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); +	//printf("(HOST) nContacts = %d\n",nContacts);  #else  	{  		if (nPairs)  		{ -			m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); +			m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);  			B3_PROFILE("primitiveContactsKernel");  			b3BufferInfoCL bInfo[] = { -				b3BufferInfoCL( pairs->getBufferCL(), true ),  -				b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -				b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -				b3BufferInfoCL( convexData.getBufferCL(),true), -				b3BufferInfoCL( gpuVertices.getBufferCL(),true), -				b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -				b3BufferInfoCL( gpuFaces.getBufferCL(),true), -				b3BufferInfoCL( gpuIndices.getBufferCL(),true), -				b3BufferInfoCL( contactOut->getBufferCL()), -				b3BufferInfoCL( m_totalContactsOut.getBufferCL())	 -			}; -			 -			b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); -			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -			launcher.setConst( nPairs  ); +				b3BufferInfoCL(pairs->getBufferCL(), true), +				b3BufferInfoCL(bodyBuf->getBufferCL(), true), +				b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +				b3BufferInfoCL(convexData.getBufferCL(), true), +				b3BufferInfoCL(gpuVertices.getBufferCL(), true), +				b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +				b3BufferInfoCL(gpuFaces.getBufferCL(), true), +				b3BufferInfoCL(gpuIndices.getBufferCL(), true), +				b3BufferInfoCL(contactOut->getBufferCL()), +				b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + +			b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel"); +			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +			launcher.setConst(nPairs);  			launcher.setConst(maxContactCapacity);  			int num = nPairs; -			launcher.launch1D( num); +			launcher.launch1D(num);  			clFinish(m_queue); -		 +  			nContacts = m_totalContactsOut.at(0);  			contactOut->resize(nContacts);  		}  	} -	 -#endif//CHECK_ON_HOST -	 +#endif  //CHECK_ON_HOST +  	B3_PROFILE("computeConvexConvexContactsGPUSAT"); -   // printf("nContacts = %d\n",nContacts); -     -	 +	// printf("nContacts = %d\n",nContacts); +  	m_sepNormals.resize(nPairs);  	m_hasSeparatingNormals.resize(nPairs); -	 -	int concaveCapacity=maxTriConvexPairCapacity; + +	int concaveCapacity = maxTriConvexPairCapacity;  	m_concaveSepNormals.resize(concaveCapacity);  	m_concaveHasSeparatingNormals.resize(concaveCapacity);  	m_numConcavePairsOut.resize(0);  	m_numConcavePairsOut.push_back(0); -	  	m_gpuCompoundPairs.resize(compoundPairCapacity);  	m_gpuCompoundSepNormals.resize(compoundPairCapacity); -	 -	 +  	m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); -	 +  	m_numCompoundPairsOut.resize(0);  	m_numCompoundPairsOut.push_back(0);  	int numCompoundPairs = 0; -	int numConcavePairs =0; +	int numConcavePairs = 0;  	{  		clFinish(m_queue); @@ -2991,33 +2814,30 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  			m_dmins.resize(nPairs);  			if (splitSearchSepAxisConvex)  			{ -					 -  				if (useMprGpu)  				{  					nContacts = m_totalContactsOut.at(0);  					{  						B3_PROFILE("mprPenetrationKernel"); -						b3BufferInfoCL bInfo[] = {  -							b3BufferInfoCL( pairs->getBufferCL(), true ),  -							b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -							b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -							b3BufferInfoCL( convexData.getBufferCL(),true), -							b3BufferInfoCL( gpuVertices.getBufferCL(),true), -							b3BufferInfoCL( m_sepNormals.getBufferCL()), -							b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -							b3BufferInfoCL( contactOut->getBufferCL()), -							b3BufferInfoCL( m_totalContactsOut.getBufferCL()) -						}; - -						b3LauncherCL launcher(m_queue, m_mprPenetrationKernel,"mprPenetrationKernel"); -						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); +						b3BufferInfoCL bInfo[] = { +							b3BufferInfoCL(pairs->getBufferCL(), true), +							b3BufferInfoCL(bodyBuf->getBufferCL(), true), +							b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +							b3BufferInfoCL(convexData.getBufferCL(), true), +							b3BufferInfoCL(gpuVertices.getBufferCL(), true), +							b3BufferInfoCL(m_sepNormals.getBufferCL()), +							b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +							b3BufferInfoCL(contactOut->getBufferCL()), +							b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + +						b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel"); +						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));  						launcher.setConst(maxContactCapacity); -						launcher.setConst( nPairs  ); +						launcher.setConst(nPairs);  						int num = nPairs; -						launcher.launch1D( num); +						launcher.launch1D(num);  						clFinish(m_queue);  						/*  						b3AlignedObjectArray<int>hostHasSepAxis; @@ -3027,173 +2847,160 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  						*/  						nContacts = m_totalContactsOut.at(0);  						contactOut->resize(nContacts); -					//	printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); -						if (nContacts>maxContactCapacity) +						//	printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); +						if (nContacts > maxContactCapacity)  						{ -                  							b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);  							nContacts = maxContactCapacity;  						} -  					}  				} -				 +  				if (1)  				{ -  					if (1)  					{ -					{ -						B3_PROFILE("findSeparatingAxisVertexFaceKernel"); -						b3BufferInfoCL bInfo[] = {  -							b3BufferInfoCL( pairs->getBufferCL(), true ),  -							b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -							b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -							b3BufferInfoCL( convexData.getBufferCL(),true), -							b3BufferInfoCL( gpuVertices.getBufferCL(),true), -							b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -							b3BufferInfoCL( gpuFaces.getBufferCL(),true), -							b3BufferInfoCL( gpuIndices.getBufferCL(),true), -							b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -							b3BufferInfoCL( m_sepNormals.getBufferCL()), -							b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -							b3BufferInfoCL( m_dmins.getBufferCL()) -						}; - -						b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); -						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -						launcher.setConst( nPairs  ); +						{ +							B3_PROFILE("findSeparatingAxisVertexFaceKernel"); +							b3BufferInfoCL bInfo[] = { +								b3BufferInfoCL(pairs->getBufferCL(), true), +								b3BufferInfoCL(bodyBuf->getBufferCL(), true), +								b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +								b3BufferInfoCL(convexData.getBufferCL(), true), +								b3BufferInfoCL(gpuVertices.getBufferCL(), true), +								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +								b3BufferInfoCL(gpuFaces.getBufferCL(), true), +								b3BufferInfoCL(gpuIndices.getBufferCL(), true), +								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +								b3BufferInfoCL(m_sepNormals.getBufferCL()), +								b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +								b3BufferInfoCL(m_dmins.getBufferCL())}; + +							b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel"); +							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +							launcher.setConst(nPairs); -						int num = nPairs; -						launcher.launch1D( num); -						clFinish(m_queue); -					} +							int num = nPairs; +							launcher.launch1D(num); +							clFinish(m_queue); +						} +						int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); -					int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); -					 -					{ -						B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); -						b3BufferInfoCL bInfo[] = {  -							b3BufferInfoCL( pairs->getBufferCL(), true ),  -							b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -							b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -							b3BufferInfoCL( convexData.getBufferCL(),true), -							b3BufferInfoCL( gpuVertices.getBufferCL(),true), -							b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -							b3BufferInfoCL( gpuFaces.getBufferCL(),true), -							b3BufferInfoCL( gpuIndices.getBufferCL(),true), -							b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -							b3BufferInfoCL( m_sepNormals.getBufferCL()), -							b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -							b3BufferInfoCL( m_dmins.getBufferCL()), -							b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) - -						}; - -						b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); -						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -						launcher.setConst( numDirections); -						launcher.setConst( nPairs  ); -						int num = nPairs; -						launcher.launch1D( num); -						clFinish(m_queue); +						{ +							B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); +							b3BufferInfoCL bInfo[] = { +								b3BufferInfoCL(pairs->getBufferCL(), true), +								b3BufferInfoCL(bodyBuf->getBufferCL(), true), +								b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +								b3BufferInfoCL(convexData.getBufferCL(), true), +								b3BufferInfoCL(gpuVertices.getBufferCL(), true), +								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +								b3BufferInfoCL(gpuFaces.getBufferCL(), true), +								b3BufferInfoCL(gpuIndices.getBufferCL(), true), +								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +								b3BufferInfoCL(m_sepNormals.getBufferCL()), +								b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +								b3BufferInfoCL(m_dmins.getBufferCL()), +								b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true) -					} +							}; + +							b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel"); +							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +							launcher.setConst(numDirections); +							launcher.setConst(nPairs); +							int num = nPairs; +							launcher.launch1D(num); +							clFinish(m_queue); +						}  					}  					if (useMprGpu)  					{  						B3_PROFILE("findSeparatingAxisUnitSphereKernel"); -						b3BufferInfoCL bInfo[] = {  -								b3BufferInfoCL( pairs->getBufferCL(), true ),  -								b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -								b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -								b3BufferInfoCL( convexData.getBufferCL(),true), -								b3BufferInfoCL( gpuVertices.getBufferCL(),true), -								b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), -								b3BufferInfoCL( m_sepNormals.getBufferCL()), -								b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -								b3BufferInfoCL( m_dmins.getBufferCL()) -						}; - -						b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); -						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -						int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); -						launcher.setConst( numDirections); - -						launcher.setConst( nPairs  ); -                                                 +						b3BufferInfoCL bInfo[] = { +							b3BufferInfoCL(pairs->getBufferCL(), true), +							b3BufferInfoCL(bodyBuf->getBufferCL(), true), +							b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +							b3BufferInfoCL(convexData.getBufferCL(), true), +							b3BufferInfoCL(gpuVertices.getBufferCL(), true), +							b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true), +							b3BufferInfoCL(m_sepNormals.getBufferCL()), +							b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +							b3BufferInfoCL(m_dmins.getBufferCL())}; + +						b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel"); +						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +						int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); +						launcher.setConst(numDirections); + +						launcher.setConst(nPairs); +  						int num = nPairs; -						launcher.launch1D( num); +						launcher.launch1D(num);  						clFinish(m_queue);  					} +				}  			} -				 - -			} else +			else  			{  				B3_PROFILE("findSeparatingAxisKernel"); -				b3BufferInfoCL bInfo[] = {  -					b3BufferInfoCL( pairs->getBufferCL(), true ),  -					b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -					b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -					b3BufferInfoCL( convexData.getBufferCL(),true), -					b3BufferInfoCL( gpuVertices.getBufferCL(),true), -					b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -					b3BufferInfoCL( gpuFaces.getBufferCL(),true), -					b3BufferInfoCL( gpuIndices.getBufferCL(),true), -					b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -					b3BufferInfoCL( m_sepNormals.getBufferCL()), -					b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) -				}; - -				b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); -				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -				launcher.setConst( nPairs  ); +				b3BufferInfoCL bInfo[] = { +					b3BufferInfoCL(pairs->getBufferCL(), true), +					b3BufferInfoCL(bodyBuf->getBufferCL(), true), +					b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +					b3BufferInfoCL(convexData.getBufferCL(), true), +					b3BufferInfoCL(gpuVertices.getBufferCL(), true), +					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +					b3BufferInfoCL(gpuFaces.getBufferCL(), true), +					b3BufferInfoCL(gpuIndices.getBufferCL(), true), +					b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +					b3BufferInfoCL(m_sepNormals.getBufferCL()), +					b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())}; + +				b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel"); +				launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +				launcher.setConst(nPairs);  				int num = nPairs; -				launcher.launch1D( num); +				launcher.launch1D(num);  				clFinish(m_queue);  			} -			 -			  		} -        else -        { -             +		else +		{  			B3_PROFILE("findSeparatingAxisKernel CPU"); -            -             -            b3AlignedObjectArray<b3Int4> hostPairs; -            pairs->copyToHost(hostPairs); -            b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; -            bodyBuf->copyToHost(hostBodyBuf); - -            b3AlignedObjectArray<b3Collidable> hostCollidables; -            gpuCollidables.copyToHost(hostCollidables); -             -            b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; -            gpuChildShapes.copyToHost(cpuChildShapes); -             -            b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; -            convexData.copyToHost(hostConvexShapeData); -             -            b3AlignedObjectArray<b3Vector3> hostVertices; -            gpuVertices.copyToHost(hostVertices); -             -            b3AlignedObjectArray<int> hostHasSepAxis; -            hostHasSepAxis.resize(nPairs); -            b3AlignedObjectArray<b3Vector3> hostSepAxis; -            hostSepAxis.resize(nPairs); -             -            b3AlignedObjectArray<b3Vector3> hostUniqueEdges; -            gpuUniqueEdges.copyToHost(hostUniqueEdges); -            b3AlignedObjectArray<b3GpuFace> hostFaces; -            gpuFaces.copyToHost(hostFaces); -             -            b3AlignedObjectArray<int> hostIndices; -            gpuIndices.copyToHost(hostIndices); -			 + +			b3AlignedObjectArray<b3Int4> hostPairs; +			pairs->copyToHost(hostPairs); +			b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; +			bodyBuf->copyToHost(hostBodyBuf); + +			b3AlignedObjectArray<b3Collidable> hostCollidables; +			gpuCollidables.copyToHost(hostCollidables); + +			b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; +			gpuChildShapes.copyToHost(cpuChildShapes); + +			b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; +			convexData.copyToHost(hostConvexShapeData); + +			b3AlignedObjectArray<b3Vector3> hostVertices; +			gpuVertices.copyToHost(hostVertices); + +			b3AlignedObjectArray<int> hostHasSepAxis; +			hostHasSepAxis.resize(nPairs); +			b3AlignedObjectArray<b3Vector3> hostSepAxis; +			hostSepAxis.resize(nPairs); + +			b3AlignedObjectArray<b3Vector3> hostUniqueEdges; +			gpuUniqueEdges.copyToHost(hostUniqueEdges); +			b3AlignedObjectArray<b3GpuFace> hostFaces; +			gpuFaces.copyToHost(hostFaces); + +			b3AlignedObjectArray<int> hostIndices; +			gpuIndices.copyToHost(hostIndices); +  			b3AlignedObjectArray<b3Contact4> hostContacts;  			if (nContacts)  			{ @@ -3201,61 +3008,56 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  			}  			hostContacts.resize(maxContactCapacity);  			int nGlobalContactsOut = nContacts; -			 -             -            for (int i=0;i<nPairs;i++) -            { -                 -                int bodyIndexA = hostPairs[i].x; -                int bodyIndexB = hostPairs[i].y; -                int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; -                int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; -                 -                int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; -                int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; -                 -                hostHasSepAxis[i] = 0; -                 -                //once the broadphase avoids static-static pairs, we can remove this test -                if ((hostBodyBuf[bodyIndexA].m_invMass==0) &&(hostBodyBuf[bodyIndexB].m_invMass==0)) -                { -                    continue; -                } -                 -                 -                if ((hostCollidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(hostCollidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) -                { -                    continue; -                } -                 -                float dmin = FLT_MAX; -                 -                b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; -                b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; -                b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; -                b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; -                b3Quaternion ornA =hostBodyBuf[bodyIndexA].m_quat; -                b3Quaternion ornB =hostBodyBuf[bodyIndexB].m_quat; -				 -				 -				if (useGjk) + +			for (int i = 0; i < nPairs; i++) +			{ +				int bodyIndexA = hostPairs[i].x; +				int bodyIndexB = hostPairs[i].y; +				int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; +				int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + +				int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; +				int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; + +				hostHasSepAxis[i] = 0; + +				//once the broadphase avoids static-static pairs, we can remove this test +				if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0))  				{ +					continue; +				} +				if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL)) +				{ +					continue; +				} + +				float dmin = FLT_MAX; + +				b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; +				b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; +				b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; +				b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; +				b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat; +				b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat; + +				if (useGjk) +				{  					//first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR  					{  						b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;  						b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);  						b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; -						b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); +						b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);  						b3Vector3 DeltaC2 = c0 - c1; -                 +  						b3Vector3 sepAxis; -                 +  						bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, -							&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -							&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -											 &sepAxis, &dmin); -                 +																&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +																&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +																&sepAxis, &dmin); +  						if (hasSepAxisA)  						{  							bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, @@ -3264,11 +3066,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  																	&sepAxis, &dmin);  							if (hasSepAxisB)  							{ -								bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, -															 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -															 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -															 &sepAxis, &dmin,false); -													  +								bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, +																				&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +																				&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +																				&sepAxis, &dmin, false); +  								if (hasEdgeEdge)  								{  									hostHasSepAxis[i] = 1; @@ -3282,163 +3084,150 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  					if (hostHasSepAxis[i])  					{  						int pairIndex = i; -				 +  						bool useMpr = true;  						if (useMpr)  						{ -							int res=0; +							int res = 0;  							float depth = 0.f; -							b3Vector3 sepAxis2 = b3MakeVector3(1,0,0); -							b3Vector3 resultPointOnBWorld = b3MakeVector3(0,0,0); +							b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0); +							b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0); -						float depthOut; -						b3Vector3 dirOut; -						b3Vector3 posOut; -						 +							float depthOut; +							b3Vector3 dirOut; +							b3Vector3 posOut; -						//res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); -						res = b3MprPenetration(pairIndex,bodyIndexA,bodyIndexB,&hostBodyBuf[0],&hostConvexShapeData[0],&hostCollidables[0],&hostVertices[0],&hostSepAxis[0],&hostHasSepAxis[0],&depthOut,&dirOut,&posOut); -						depth = depthOut; -						sepAxis2 =  b3MakeVector3(-dirOut.x,-dirOut.y,-dirOut.z); -						resultPointOnBWorld = posOut; -						//hostHasSepAxis[i] = 0; +							//res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); +							res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut); +							depth = depthOut; +							sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z); +							resultPointOnBWorld = posOut; +							//hostHasSepAxis[i] = 0; +							if (res == 0) +							{ +								//add point? +								//printf("depth = %f\n",depth); +								//printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); +								//qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); -						if (res==0) -						{ -							//add point? -							//printf("depth = %f\n",depth); -							//printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); -							//qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); -						 -							 - -							float dist=0.f; +								float dist = 0.f; -							const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; -							const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; +								const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; +								const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; -							if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) -							{ -								if (depth > dist) +								if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))  								{ -									float diff = depth - dist; -									 -									static float maxdiff = 0.f; -									if (maxdiff < diff) +									if (depth > dist)  									{ -										maxdiff = diff; -										printf("maxdiff = %20.10f\n",maxdiff); +										float diff = depth - dist; + +										static float maxdiff = 0.f; +										if (maxdiff < diff) +										{ +											maxdiff = diff; +											printf("maxdiff = %20.10f\n", maxdiff); +										}  									}  								} -							} -							if (depth > dmin) -							{ -								b3Vector3 oldAxis = hostSepAxis[i]; -								depth = dmin; -								sepAxis2 = oldAxis; -							} - -							 +								if (depth > dmin) +								{ +									b3Vector3 oldAxis = hostSepAxis[i]; +									depth = dmin; +									sepAxis2 = oldAxis; +								} -							if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) -							{ -								if (depth > dist) +								if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist))  								{ -									float diff = depth - dist; -									//printf("?diff  = %f\n",diff ); -									static float maxdiff = 0.f; -									if (maxdiff < diff) +									if (depth > dist) +									{ +										float diff = depth - dist; +										//printf("?diff  = %f\n",diff ); +										static float maxdiff = 0.f; +										if (maxdiff < diff) +										{ +											maxdiff = diff; +											printf("maxdiff = %20.10f\n", maxdiff); +										} +									} +									//this is used for SAT +									//hostHasSepAxis[i] = 1; +									//hostSepAxis[i] = sepAxis2; + +									//add contact point + +									//int contactIndex = nGlobalContactsOut; +									b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); +									nGlobalContactsOut++; +									newContact.m_batchIdx = 0;  //i; +									newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; +									newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; + +									newContact.m_frictionCoeffCmp = 45874; +									newContact.m_restituitionCoeffCmp = 0; + +									static float maxDepth = 0.f; + +									if (depth > maxDepth)  									{ -										maxdiff = diff; -										printf("maxdiff = %20.10f\n",maxdiff); +										maxDepth = depth; +										printf("MPR maxdepth = %f\n", maxDepth);  									} + +									resultPointOnBWorld.w = -depth; +									newContact.m_worldPosB[0] = resultPointOnBWorld; +									//b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; +									newContact.m_worldNormalOnB = sepAxis2; +									newContact.m_worldNormalOnB.w = (b3Scalar)1;  								} -								//this is used for SAT -								//hostHasSepAxis[i] = 1; -								//hostSepAxis[i] = sepAxis2; - -								//add contact point - -								//int contactIndex = nGlobalContactsOut; -								b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); -								nGlobalContactsOut++; -								newContact.m_batchIdx = 0;//i; -								newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; -								newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; - -								newContact.m_frictionCoeffCmp = 45874; -								newContact.m_restituitionCoeffCmp = 0; -						 -								 -								static float maxDepth = 0.f; -							 -								if (depth > maxDepth) +								else  								{ -									maxDepth  = depth; -									printf("MPR maxdepth = %f\n",maxDepth ); -							 +									printf("rejected\n");  								} -							 - -								resultPointOnBWorld.w = -depth; -								newContact.m_worldPosB[0] = resultPointOnBWorld; -								//b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; -								newContact.m_worldNormalOnB = sepAxis2; -								newContact.m_worldNormalOnB.w = (b3Scalar)1; -							} else -							{ -								printf("rejected\n");  							} - -			  						} -						} else +						else  						{ - -			 -				 -			//int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); -							b3AlignedObjectArray<b3Contact4> oldHostContacts;	 +							//int contactIndex = computeContactConvexConvex2(           i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); +							b3AlignedObjectArray<b3Contact4> oldHostContacts;  							int result; -							result = computeContactConvexConvex2( //hostPairs, -													   pairIndex, -													bodyIndexA, bodyIndexB, -													   collidableIndexA, collidableIndexB, -													   hostBodyBuf, -													   hostCollidables, -													   hostConvexShapeData, -													   hostVertices, -													   hostUniqueEdges, -													   hostIndices, -													   hostFaces, -													   hostContacts, -													   nGlobalContactsOut, -														maxContactCapacity, -														oldHostContacts -														//hostHasSepAxis, -														//hostSepAxis -														 -																); -						}//mpr -					}//hostHasSepAxis[i] = 1; -					 -				} else +							result = computeContactConvexConvex2(  //hostPairs, +								pairIndex, +								bodyIndexA, bodyIndexB, +								collidableIndexA, collidableIndexB, +								hostBodyBuf, +								hostCollidables, +								hostConvexShapeData, +								hostVertices, +								hostUniqueEdges, +								hostIndices, +								hostFaces, +								hostContacts, +								nGlobalContactsOut, +								maxContactCapacity, +								oldHostContacts +								//hostHasSepAxis, +								//hostSepAxis + +							); +						}  //mpr +					}      //hostHasSepAxis[i] = 1; +				} +				else  				{ -				  					b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter;  					b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA);  					b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; -					b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); +					b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB);  					b3Vector3 DeltaC2 = c0 - c1; -                 +  					b3Vector3 sepAxis; -                 +  					bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, -						&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -						&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -										 &sepAxis, &dmin); -                 +															&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +															&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +															&sepAxis, &dmin); +  					if (hasSepAxisA)  					{  						bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, @@ -3447,11 +3236,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  																&sepAxis, &dmin);  						if (hasSepAxisB)  						{ -							bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, -														 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -														 &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), -														 &sepAxis, &dmin,true); -													  +							bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, +																			&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +																			&hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), +																			&sepAxis, &dmin, true); +  							if (hasEdgeEdge)  							{  								hostHasSepAxis[i] = 1; @@ -3460,21 +3249,21 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  						}  					}  				} -            } -             -			if (useGjkContacts)//nGlobalContactsOut>0) +			} + +			if (useGjkContacts)  //nGlobalContactsOut>0)  			{  				//printf("nGlobalContactsOut=%d\n",nGlobalContactsOut);  				nContacts = nGlobalContactsOut;  				contactOut->copyFromHost(hostContacts); -	 -				m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + +				m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true);  			} -             -            m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); -            m_sepNormals.copyFromHost(hostSepAxis); -         -            /* + +			m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); +			m_sepNormals.copyFromHost(hostSepAxis); + +			/*               //double-check results from GPU (comment-out the 'else' so both paths are executed              b3AlignedObjectArray<int> checkHasSepAxis;              m_hasSeparatingNormals.copyToHost(checkHasSepAxis); @@ -3491,352 +3280,314 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*              //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis);              //    m_sepNormals.copyFromHost(hostSepAxis);              */ -        } -         -         -        numCompoundPairs = m_numCompoundPairsOut.at(0); -        bool useGpuFindCompoundPairs=true; -        if (useGpuFindCompoundPairs) -        { -            B3_PROFILE("findCompoundPairsKernel"); -            b3BufferInfoCL bInfo[] =  -            {  -                b3BufferInfoCL( pairs->getBufferCL(), true ),  -                b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -                b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -                b3BufferInfoCL( convexData.getBufferCL(),true), -                b3BufferInfoCL( gpuVertices.getBufferCL(),true), -                b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -                b3BufferInfoCL( gpuFaces.getBufferCL(),true), -                b3BufferInfoCL( gpuIndices.getBufferCL(),true), -                b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), -                b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -                b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), -                b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), -                b3BufferInfoCL(subTreesGPU->getBufferCL()), -                b3BufferInfoCL(treeNodesGPU->getBufferCL()), -                b3BufferInfoCL(bvhInfo->getBufferCL()) -            }; - -            b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); -            launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -            launcher.setConst( nPairs  ); -            launcher.setConst( compoundPairCapacity); - -            int num = nPairs; -            launcher.launch1D( num); -            clFinish(m_queue); - -            numCompoundPairs = m_numCompoundPairsOut.at(0); -            //printf("numCompoundPairs =%d\n",numCompoundPairs ); -            if (numCompoundPairs) -            { -                //printf("numCompoundPairs=%d\n",numCompoundPairs); -            } -             - -        } else -        { - - -            b3AlignedObjectArray<b3QuantizedBvhNode>	treeNodesCPU; -            treeNodesGPU->copyToHost(treeNodesCPU); - -            b3AlignedObjectArray<b3BvhSubtreeInfo>	subTreesCPU; -            subTreesGPU->copyToHost(subTreesCPU); +		} -            b3AlignedObjectArray<b3BvhInfo>	bvhInfoCPU; -            bvhInfo->copyToHost(bvhInfoCPU); +		numCompoundPairs = m_numCompoundPairsOut.at(0); +		bool useGpuFindCompoundPairs = true; +		if (useGpuFindCompoundPairs) +		{ +			B3_PROFILE("findCompoundPairsKernel"); +			b3BufferInfoCL bInfo[] = +				{ +					b3BufferInfoCL(pairs->getBufferCL(), true), +					b3BufferInfoCL(bodyBuf->getBufferCL(), true), +					b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +					b3BufferInfoCL(convexData.getBufferCL(), true), +					b3BufferInfoCL(gpuVertices.getBufferCL(), true), +					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +					b3BufferInfoCL(gpuFaces.getBufferCL(), true), +					b3BufferInfoCL(gpuIndices.getBufferCL(), true), +					b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true), +					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()), +					b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()), +					b3BufferInfoCL(subTreesGPU->getBufferCL()), +					b3BufferInfoCL(treeNodesGPU->getBufferCL()), +					b3BufferInfoCL(bvhInfo->getBufferCL())}; + +			b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel"); +			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +			launcher.setConst(nPairs); +			launcher.setConst(compoundPairCapacity); -            b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; -            clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); +			int num = nPairs; +			launcher.launch1D(num); +			clFinish(m_queue); -            b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; -            clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); +			numCompoundPairs = m_numCompoundPairsOut.at(0); +			//printf("numCompoundPairs =%d\n",numCompoundPairs ); +			if (numCompoundPairs) +			{ +				//printf("numCompoundPairs=%d\n",numCompoundPairs); +			} +		} +		else +		{ +			b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; +			treeNodesGPU->copyToHost(treeNodesCPU); -            b3AlignedObjectArray<b3Int4> hostPairs; -            pairs->copyToHost(hostPairs); +			b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; +			subTreesGPU->copyToHost(subTreesCPU); -            b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; -            bodyBuf->copyToHost(hostBodyBuf); +			b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; +			bvhInfo->copyToHost(bvhInfoCPU); +			b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; +			clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); -            b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; -            cpuCompoundPairsOut.resize(compoundPairCapacity); +			b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; +			clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); -            b3AlignedObjectArray<b3Collidable> hostCollidables; -            gpuCollidables.copyToHost(hostCollidables); +			b3AlignedObjectArray<b3Int4> hostPairs; +			pairs->copyToHost(hostPairs); -            b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; -            gpuChildShapes.copyToHost(cpuChildShapes); +			b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; +			bodyBuf->copyToHost(hostBodyBuf); -            b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; -            convexData.copyToHost(hostConvexData); +			b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; +			cpuCompoundPairsOut.resize(compoundPairCapacity); -            b3AlignedObjectArray<b3Vector3> hostVertices; -            gpuVertices.copyToHost(hostVertices); +			b3AlignedObjectArray<b3Collidable> hostCollidables; +			gpuCollidables.copyToHost(hostCollidables); +			b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; +			gpuChildShapes.copyToHost(cpuChildShapes); +			b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; +			convexData.copyToHost(hostConvexData); +			b3AlignedObjectArray<b3Vector3> hostVertices; +			gpuVertices.copyToHost(hostVertices); -            for (int pairIndex=0;pairIndex<nPairs;pairIndex++) -            { -                int bodyIndexA = hostPairs[pairIndex].x; -                int bodyIndexB = hostPairs[pairIndex].y; -                int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; -                int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; +			for (int pairIndex = 0; pairIndex < nPairs; pairIndex++) +			{ +				int bodyIndexA = hostPairs[pairIndex].x; +				int bodyIndexB = hostPairs[pairIndex].y; +				int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; +				int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx;  				if (cpuChildShapes.size())  				{ -                findCompoundPairsKernel(  -                            pairIndex, -                            bodyIndexA, -                            bodyIndexB, -                            collidableIndexA, -                            collidableIndexB, -                            &hostBodyBuf[0], -                            &hostCollidables[0], -                            &hostConvexData[0], -                            hostVertices, -                            hostAabbsWorldSpace, -                            hostAabbsLocalSpace, -                            &cpuChildShapes[0], -                            &cpuCompoundPairsOut[0], -                            &numCompoundPairs, -                            compoundPairCapacity, -                            treeNodesCPU, -                            subTreesCPU, -                            bvhInfoCPU -                            ); +					findCompoundPairsKernel( +						pairIndex, +						bodyIndexA, +						bodyIndexB, +						collidableIndexA, +						collidableIndexB, +						&hostBodyBuf[0], +						&hostCollidables[0], +						&hostConvexData[0], +						hostVertices, +						hostAabbsWorldSpace, +						hostAabbsLocalSpace, +						&cpuChildShapes[0], +						&cpuCompoundPairsOut[0], +						&numCompoundPairs, +						compoundPairCapacity, +						treeNodesCPU, +						subTreesCPU, +						bvhInfoCPU);  				} -            } -             +			} -			m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs,1,0,true); +			m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true);  			if (numCompoundPairs)  			{  				b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0]; -				m_gpuCompoundPairs.copyFromHostPointer(ptr,numCompoundPairs,0,true); +				m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true);  			}  			//cpuCompoundPairsOut -             -        } +		}  		if (numCompoundPairs)  		{ -			printf("numCompoundPairs=%d\n",numCompoundPairs); +			printf("numCompoundPairs=%d\n", numCompoundPairs);  		} -        if (numCompoundPairs > compoundPairCapacity) -        { -            b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs,  compoundPairCapacity); -            numCompoundPairs = compoundPairCapacity; -        } - -         - -        m_gpuCompoundPairs.resize(numCompoundPairs); -        m_gpuHasCompoundSepNormals.resize(numCompoundPairs); -        m_gpuCompoundSepNormals.resize(numCompoundPairs); -         - -        if (numCompoundPairs) -        { -            B3_PROFILE("processCompoundPairsPrimitivesKernel"); -            b3BufferInfoCL bInfo[] =  -            {  -                b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ),  -                b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -                b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -                b3BufferInfoCL( convexData.getBufferCL(),true), -                b3BufferInfoCL( gpuVertices.getBufferCL(),true), -                b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -                b3BufferInfoCL( gpuFaces.getBufferCL(),true), -                b3BufferInfoCL( gpuIndices.getBufferCL(),true), -                b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -                b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -                b3BufferInfoCL( contactOut->getBufferCL()), -                b3BufferInfoCL( m_totalContactsOut.getBufferCL())	 -            }; - -            b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); -            launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -            launcher.setConst( numCompoundPairs  ); -            launcher.setConst(maxContactCapacity); - -            int num = numCompoundPairs; -            launcher.launch1D( num); -            clFinish(m_queue); -            nContacts = m_totalContactsOut.at(0); -            //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); -            if (nContacts>maxContactCapacity) -            { -                 -                b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); -                nContacts = maxContactCapacity; -            } -        } -         - -        if (numCompoundPairs) -        { -            B3_PROFILE("processCompoundPairsKernel"); -            b3BufferInfoCL bInfo[] =  -            {  -                b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ),  -                b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -                b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -                b3BufferInfoCL( convexData.getBufferCL(),true), -                b3BufferInfoCL( gpuVertices.getBufferCL(),true), -                b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -                b3BufferInfoCL( gpuFaces.getBufferCL(),true), -                b3BufferInfoCL( gpuIndices.getBufferCL(),true), -                b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -                b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -                b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL()), -                b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) -            }; - -            b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); -            launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -            launcher.setConst( numCompoundPairs  ); - -            int num = numCompoundPairs; -            launcher.launch1D( num); -            clFinish(m_queue); -         -        } - - -        //printf("numConcave  = %d\n",numConcave); - -     - -//		printf("hostNormals.size()=%d\n",hostNormals.size()); +		if (numCompoundPairs > compoundPairCapacity) +		{ +			b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); +			numCompoundPairs = compoundPairCapacity; +		} + +		m_gpuCompoundPairs.resize(numCompoundPairs); +		m_gpuHasCompoundSepNormals.resize(numCompoundPairs); +		m_gpuCompoundSepNormals.resize(numCompoundPairs); + +		if (numCompoundPairs) +		{ +			B3_PROFILE("processCompoundPairsPrimitivesKernel"); +			b3BufferInfoCL bInfo[] = +				{ +					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), +					b3BufferInfoCL(bodyBuf->getBufferCL(), true), +					b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +					b3BufferInfoCL(convexData.getBufferCL(), true), +					b3BufferInfoCL(gpuVertices.getBufferCL(), true), +					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +					b3BufferInfoCL(gpuFaces.getBufferCL(), true), +					b3BufferInfoCL(gpuIndices.getBufferCL(), true), +					b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +					b3BufferInfoCL(contactOut->getBufferCL()), +					b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + +			b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel"); +			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +			launcher.setConst(numCompoundPairs); +			launcher.setConst(maxContactCapacity); + +			int num = numCompoundPairs; +			launcher.launch1D(num); +			clFinish(m_queue); +			nContacts = m_totalContactsOut.at(0); +			//printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); +			if (nContacts > maxContactCapacity) +			{ +				b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); +				nContacts = maxContactCapacity; +			} +		} + +		if (numCompoundPairs) +		{ +			B3_PROFILE("processCompoundPairsKernel"); +			b3BufferInfoCL bInfo[] = +				{ +					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), +					b3BufferInfoCL(bodyBuf->getBufferCL(), true), +					b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +					b3BufferInfoCL(convexData.getBufferCL(), true), +					b3BufferInfoCL(gpuVertices.getBufferCL(), true), +					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +					b3BufferInfoCL(gpuFaces.getBufferCL(), true), +					b3BufferInfoCL(gpuIndices.getBufferCL(), true), +					b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +					b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()), +					b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())}; + +			b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel"); +			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +			launcher.setConst(numCompoundPairs); + +			int num = numCompoundPairs; +			launcher.launch1D(num); +			clFinish(m_queue); +		} + +		//printf("numConcave  = %d\n",numConcave); + +		//		printf("hostNormals.size()=%d\n",hostNormals.size());  		//int numPairs = pairCount.at(0); -		 -		 -		  	}  	int vertexFaceCapacity = 64; - -		  	{  		//now perform the tree query on GPU -			 -					 -				 -				 +  		if (treeNodesGPU->size() && treeNodesGPU->size())  		{  			if (bvhTraversalKernelGPU)  			{ -						  				B3_PROFILE("m_bvhTraversalKernel"); -						 -						 +  				numConcavePairs = m_numConcavePairsOut.at(0); -						 -				b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); -				launcher.setBuffer( pairs->getBufferCL()); -				launcher.setBuffer(  bodyBuf->getBufferCL()); -				launcher.setBuffer( gpuCollidables.getBufferCL()); -				launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); -				launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); -				launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); -				launcher.setBuffer( subTreesGPU->getBufferCL()); -				launcher.setBuffer( treeNodesGPU->getBufferCL()); -				launcher.setBuffer( bvhInfo->getBufferCL()); -						 -				launcher.setConst( nPairs  ); -				launcher.setConst( maxTriConvexPairCapacity); + +				b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel"); +				launcher.setBuffer(pairs->getBufferCL()); +				launcher.setBuffer(bodyBuf->getBufferCL()); +				launcher.setBuffer(gpuCollidables.getBufferCL()); +				launcher.setBuffer(clAabbsWorldSpace.getBufferCL()); +				launcher.setBuffer(triangleConvexPairsOut.getBufferCL()); +				launcher.setBuffer(m_numConcavePairsOut.getBufferCL()); +				launcher.setBuffer(subTreesGPU->getBufferCL()); +				launcher.setBuffer(treeNodesGPU->getBufferCL()); +				launcher.setBuffer(bvhInfo->getBufferCL()); + +				launcher.setConst(nPairs); +				launcher.setConst(maxTriConvexPairCapacity);  				int num = nPairs; -				launcher.launch1D( num); +				launcher.launch1D(num);  				clFinish(m_queue);  				numConcavePairs = m_numConcavePairsOut.at(0); -			} else +			} +			else  			{ -					b3AlignedObjectArray<b3Int4> hostPairs; -					pairs->copyToHost(hostPairs); -					b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; -					bodyBuf->copyToHost(hostBodyBuf); -					b3AlignedObjectArray<b3Collidable> hostCollidables; -					gpuCollidables.copyToHost(hostCollidables); -					b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; -					clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); +				b3AlignedObjectArray<b3Int4> hostPairs; +				pairs->copyToHost(hostPairs); +				b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; +				bodyBuf->copyToHost(hostBodyBuf); +				b3AlignedObjectArray<b3Collidable> hostCollidables; +				gpuCollidables.copyToHost(hostCollidables); +				b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; +				clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); -					//int maxTriConvexPairCapacity, -					b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; -					triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); +				//int maxTriConvexPairCapacity, +				b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; +				triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); -					//int numTriConvexPairsOutHost=0; -					numConcavePairs = 0; -					//m_numConcavePairsOut +				//int numTriConvexPairsOutHost=0; +				numConcavePairs = 0; +				//m_numConcavePairsOut -					b3AlignedObjectArray<b3QuantizedBvhNode>	treeNodesCPU; -					treeNodesGPU->copyToHost(treeNodesCPU); -					b3AlignedObjectArray<b3BvhSubtreeInfo>	subTreesCPU; -					subTreesGPU->copyToHost(subTreesCPU); -					b3AlignedObjectArray<b3BvhInfo>	bvhInfoCPU; -					bvhInfo->copyToHost(bvhInfoCPU); -					//compute it... +				b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; +				treeNodesGPU->copyToHost(treeNodesCPU); +				b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; +				subTreesGPU->copyToHost(subTreesCPU); +				b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; +				bvhInfo->copyToHost(bvhInfoCPU); +				//compute it... -					volatile int hostNumConcavePairsOut=0; +				volatile int hostNumConcavePairsOut = 0; -					// -					for (int i=0;i<nPairs;i++) -					{ -						b3BvhTraversal( &hostPairs.at(0),  -						&hostBodyBuf.at(0), -						&hostCollidables.at(0), -						&hostAabbsWorldSpace.at(0), -						&triangleConvexPairsOutHost.at(0), -						&hostNumConcavePairsOut, -						&subTreesCPU.at(0), -						&treeNodesCPU.at(0), -						&bvhInfoCPU.at(0), -						nPairs, -						maxTriConvexPairCapacity, -						i); -					} -					numConcavePairs = hostNumConcavePairsOut; +				// +				for (int i = 0; i < nPairs; i++) +				{ +					b3BvhTraversal(&hostPairs.at(0), +								   &hostBodyBuf.at(0), +								   &hostCollidables.at(0), +								   &hostAabbsWorldSpace.at(0), +								   &triangleConvexPairsOutHost.at(0), +								   &hostNumConcavePairsOut, +								   &subTreesCPU.at(0), +								   &treeNodesCPU.at(0), +								   &bvhInfoCPU.at(0), +								   nPairs, +								   maxTriConvexPairCapacity, +								   i); +				} +				numConcavePairs = hostNumConcavePairsOut; -					if (hostNumConcavePairsOut) -					{ -						triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); -						triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); -					} -					// +				if (hostNumConcavePairsOut) +				{ +					triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); +					triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); +				} +				// -					m_numConcavePairsOut.resize(0); -					m_numConcavePairsOut.push_back(numConcavePairs); +				m_numConcavePairsOut.resize(0); +				m_numConcavePairsOut.push_back(numConcavePairs);  			} -				//printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); -						 +			//printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); +  			if (numConcavePairs > maxTriConvexPairCapacity)  			{  				static int exceeded_maxTriConvexPairCapacity_count = 0;  				b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", -					numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); +						numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++);  				numConcavePairs = maxTriConvexPairCapacity;  			}  			triangleConvexPairsOut.resize(numConcavePairs); -	 +  			if (numConcavePairs)  			{ - -				 -						 -	  				clippingFacesOutGPU.resize(numConcavePairs);  				worldNormalsAGPU.resize(numConcavePairs); -				worldVertsA1GPU.resize(vertexFaceCapacity*(numConcavePairs)); -				worldVertsB1GPU.resize(vertexFaceCapacity*(numConcavePairs)); - +				worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs)); +				worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs));  				if (findConcaveSeparatingAxisKernelGPU)  				{ -  					/*  					m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU);  						clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); @@ -3846,236 +3597,213 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  					*/  					//now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) -                    if (splitSearchSepAxisConcave) -                    { -                        //printf("numConcavePairs = %d\n",numConcavePairs); -                        m_dmins.resize(numConcavePairs); -                        { -                            B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); -                            b3BufferInfoCL bInfo[] = { -                                b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), -                                b3BufferInfoCL( bodyBuf->getBufferCL(),true), -                                b3BufferInfoCL( gpuCollidables.getBufferCL(),true), -                                b3BufferInfoCL( convexData.getBufferCL(),true), -                                b3BufferInfoCL( gpuVertices.getBufferCL(),true), -                                b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -                                b3BufferInfoCL( gpuFaces.getBufferCL(),true), -                                b3BufferInfoCL( gpuIndices.getBufferCL(),true), -                                b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -                                b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -                                b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), -                                b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), -                                b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -                                b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), -                                b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), -                                b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), -                                b3BufferInfoCL(m_dmins.getBufferCL()) -                            }; -                             -                            b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel,"m_findConcaveSeparatingAxisVertexFaceKernel"); -                            launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -                            launcher.setConst(vertexFaceCapacity); -                            launcher.setConst( numConcavePairs  ); -                             -                            int num = numConcavePairs; -                            launcher.launch1D( num); -                            clFinish(m_queue); - -                             -                        } -//                        numConcavePairs = 0; -                        if (1) -                        { -                            B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); -                            b3BufferInfoCL bInfo[] = { -                                b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), -                                b3BufferInfoCL( bodyBuf->getBufferCL(),true), -                                b3BufferInfoCL( gpuCollidables.getBufferCL(),true), -                                b3BufferInfoCL( convexData.getBufferCL(),true), -                                b3BufferInfoCL( gpuVertices.getBufferCL(),true), -                                b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -                                b3BufferInfoCL( gpuFaces.getBufferCL(),true), -                                b3BufferInfoCL( gpuIndices.getBufferCL(),true), -                                b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -                                b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -                                b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), -                                b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), -                                b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -                                b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), -                                b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), -                                b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), -                                b3BufferInfoCL(m_dmins.getBufferCL()) -                            }; -                             -                            b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel,"m_findConcaveSeparatingAxisEdgeEdgeKernel"); -                            launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -                            launcher.setConst(vertexFaceCapacity); -                            launcher.setConst( numConcavePairs  ); -                             -                            int num = numConcavePairs; -                            launcher.launch1D( num); -                            clFinish(m_queue); -                        } -                       -                         -                        // numConcavePairs = 0; -                         -                         -                         -                         -                         -                         -                    } else -                    { -                        B3_PROFILE("findConcaveSeparatingAxisKernel"); -                        b3BufferInfoCL bInfo[] = {  -                            b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),  -                            b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -                            b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -                            b3BufferInfoCL( convexData.getBufferCL(),true), -                            b3BufferInfoCL( gpuVertices.getBufferCL(),true), -                            b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -                            b3BufferInfoCL( gpuFaces.getBufferCL(),true), -                            b3BufferInfoCL( gpuIndices.getBufferCL(),true), -                            b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -                            b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -                            b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), -                            b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), -                            b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -                            b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), -                            b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), -                            b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) -                        }; - -                        b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); -                        launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -                        launcher.setConst(vertexFaceCapacity); -                        launcher.setConst( numConcavePairs  ); - -                        int num = numConcavePairs; -                        launcher.launch1D( num); -                        clFinish(m_queue); -                    } -                     -                     -				} else -				{ - -						b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; -						b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; -						b3AlignedObjectArray<b3Vector3> worldNormalsACPU; -						b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; -						b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; +					if (splitSearchSepAxisConcave) +					{ +						//printf("numConcavePairs = %d\n",numConcavePairs); +						m_dmins.resize(numConcavePairs); +						{ +							B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); +							b3BufferInfoCL bInfo[] = { +								b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), +								b3BufferInfoCL(bodyBuf->getBufferCL(), true), +								b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +								b3BufferInfoCL(convexData.getBufferCL(), true), +								b3BufferInfoCL(gpuVertices.getBufferCL(), true), +								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +								b3BufferInfoCL(gpuFaces.getBufferCL(), true), +								b3BufferInfoCL(gpuIndices.getBufferCL(), true), +								b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +								b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), +								b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), +								b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +								b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), +								b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), +								b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), +								b3BufferInfoCL(m_dmins.getBufferCL())}; + +							b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel"); +							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +							launcher.setConst(vertexFaceCapacity); +							launcher.setConst(numConcavePairs); -						b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; -						triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); -						//triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); -						b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; -						bodyBuf->copyToHost(hostBodyBuf); -						b3AlignedObjectArray<b3Collidable> hostCollidables; -						gpuCollidables.copyToHost(hostCollidables); -						b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; -						clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); +							int num = numConcavePairs; +							launcher.launch1D(num); +							clFinish(m_queue); +						} +						//                        numConcavePairs = 0; +						if (1) +						{ +							B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); +							b3BufferInfoCL bInfo[] = { +								b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), +								b3BufferInfoCL(bodyBuf->getBufferCL(), true), +								b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +								b3BufferInfoCL(convexData.getBufferCL(), true), +								b3BufferInfoCL(gpuVertices.getBufferCL(), true), +								b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +								b3BufferInfoCL(gpuFaces.getBufferCL(), true), +								b3BufferInfoCL(gpuIndices.getBufferCL(), true), +								b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +								b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +								b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), +								b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), +								b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +								b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), +								b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), +								b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), +								b3BufferInfoCL(m_dmins.getBufferCL())}; + +							b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel"); +							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +							launcher.setConst(vertexFaceCapacity); +							launcher.setConst(numConcavePairs); -						b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; -						convexData.copyToHost(hostConvexData); +							int num = numConcavePairs; +							launcher.launch1D(num); +							clFinish(m_queue); +						} -						b3AlignedObjectArray<b3Vector3> hostVertices; -						gpuVertices.copyToHost(hostVertices); +						// numConcavePairs = 0; +					} +					else +					{ +						B3_PROFILE("findConcaveSeparatingAxisKernel"); +						b3BufferInfoCL bInfo[] = { +							b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), +							b3BufferInfoCL(bodyBuf->getBufferCL(), true), +							b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +							b3BufferInfoCL(convexData.getBufferCL(), true), +							b3BufferInfoCL(gpuVertices.getBufferCL(), true), +							b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +							b3BufferInfoCL(gpuFaces.getBufferCL(), true), +							b3BufferInfoCL(gpuIndices.getBufferCL(), true), +							b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +							b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +							b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), +							b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), +							b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +							b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), +							b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), +							b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; + +						b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel"); +						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +						launcher.setConst(vertexFaceCapacity); +						launcher.setConst(numConcavePairs); -						b3AlignedObjectArray<b3Vector3> hostUniqueEdges; -						gpuUniqueEdges.copyToHost(hostUniqueEdges); -						b3AlignedObjectArray<b3GpuFace> hostFaces; -						gpuFaces.copyToHost(hostFaces); -						b3AlignedObjectArray<int> hostIndices; -						gpuIndices.copyToHost(hostIndices); -						b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; -						gpuChildShapes.copyToHost(cpuChildShapes); +						int num = numConcavePairs; +						launcher.launch1D(num); +						clFinish(m_queue); +					} +				} +				else +				{ +					b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; +					b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; +					b3AlignedObjectArray<b3Vector3> worldNormalsACPU; +					b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; +					b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; +					b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; +					triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); +					//triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); +					b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; +					bodyBuf->copyToHost(hostBodyBuf); +					b3AlignedObjectArray<b3Collidable> hostCollidables; +					gpuCollidables.copyToHost(hostCollidables); +					b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; +					clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); -								 -						b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; -						m_concaveSepNormals.copyToHost(concaveSepNormalsHost); -						concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); +					b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; +					convexData.copyToHost(hostConvexData); -						b3GpuChildShape* childShapePointerCPU = 0; -						if (cpuChildShapes.size()) -							childShapePointerCPU  = &cpuChildShapes.at(0); +					b3AlignedObjectArray<b3Vector3> hostVertices; +					gpuVertices.copyToHost(hostVertices); -						clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); -						worldVertsA1CPU.resize(worldVertsA1GPU.size()); -    					worldNormalsACPU.resize(worldNormalsAGPU.size()); -						worldVertsB1CPU.resize(worldVertsB1GPU.size()); +					b3AlignedObjectArray<b3Vector3> hostUniqueEdges; +					gpuUniqueEdges.copyToHost(hostUniqueEdges); +					b3AlignedObjectArray<b3GpuFace> hostFaces; +					gpuFaces.copyToHost(hostFaces); +					b3AlignedObjectArray<int> hostIndices; +					gpuIndices.copyToHost(hostIndices); +					b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; +					gpuChildShapes.copyToHost(cpuChildShapes); -						for (int i=0;i<numConcavePairs;i++) -						{ -							b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), -								&hostBodyBuf.at(0), -								&hostCollidables.at(0), -								&hostConvexData.at(0), &hostVertices.at(0),&hostUniqueEdges.at(0), -								&hostFaces.at(0),&hostIndices.at(0),childShapePointerCPU, -								&hostAabbsWorldSpace.at(0), -								&concaveSepNormalsHost.at(0), -								&clippingFacesOutCPU.at(0), -								&worldVertsA1CPU.at(0), -								&worldNormalsACPU.at(0), -								&worldVertsB1CPU.at(0), -								&concaveHasSeparatingNormalsCPU.at(0), -								vertexFaceCapacity, -								numConcavePairs,i); -						}; +					b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; +					m_concaveSepNormals.copyToHost(concaveSepNormalsHost); +					concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); -						m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); -						m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); -						clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); -						worldVertsA1GPU.copyFromHost(worldVertsA1CPU); -						worldNormalsAGPU.copyFromHost(worldNormalsACPU); -						worldVertsB1GPU.copyFromHost(worldVertsB1CPU); +					b3GpuChildShape* childShapePointerCPU = 0; +					if (cpuChildShapes.size()) +						childShapePointerCPU = &cpuChildShapes.at(0); +					clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); +					worldVertsA1CPU.resize(worldVertsA1GPU.size()); +					worldNormalsACPU.resize(worldNormalsAGPU.size()); +					worldVertsB1CPU.resize(worldVertsB1GPU.size()); +					for (int i = 0; i < numConcavePairs; i++) +					{ +						b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), +														  &hostBodyBuf.at(0), +														  &hostCollidables.at(0), +														  &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), +														  &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU, +														  &hostAabbsWorldSpace.at(0), +														  &concaveSepNormalsHost.at(0), +														  &clippingFacesOutCPU.at(0), +														  &worldVertsA1CPU.at(0), +														  &worldNormalsACPU.at(0), +														  &worldVertsB1CPU.at(0), +														  &concaveHasSeparatingNormalsCPU.at(0), +														  vertexFaceCapacity, +														  numConcavePairs, i); +					}; +					m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); +					m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); +					clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); +					worldVertsA1GPU.copyFromHost(worldVertsA1CPU); +					worldNormalsAGPU.copyFromHost(worldNormalsACPU); +					worldVertsB1GPU.copyFromHost(worldVertsB1CPU);  				} -//							b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; -//						m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); -//					b3AlignedObjectArray<b3Int4> cpuConcavePairs; -//				triangleConvexPairsOut.copyToHost(cpuConcavePairs); - - +				//							b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; +				//						m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); +				//					b3AlignedObjectArray<b3Int4> cpuConcavePairs; +				//				triangleConvexPairsOut.copyToHost(cpuConcavePairs);  			}  		} -		 -		  	}  	if (numConcavePairs)  	{ -			if (numConcavePairs) +		if (numConcavePairs)  		{  			B3_PROFILE("findConcaveSphereContactsKernel"); -				nContacts = m_totalContactsOut.at(0); -//				printf("nContacts1 = %d\n",nContacts); -			b3BufferInfoCL bInfo[] = {  -				b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ),  -				b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -				b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -				b3BufferInfoCL( convexData.getBufferCL(),true), -				b3BufferInfoCL( gpuVertices.getBufferCL(),true), -				b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -				b3BufferInfoCL( gpuFaces.getBufferCL(),true), -				b3BufferInfoCL( gpuIndices.getBufferCL(),true), -				b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), -				b3BufferInfoCL( contactOut->getBufferCL()), -				b3BufferInfoCL( m_totalContactsOut.getBufferCL()) -			}; - -			b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); -			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - -			launcher.setConst( numConcavePairs  ); +			nContacts = m_totalContactsOut.at(0); +			//				printf("nContacts1 = %d\n",nContacts); +			b3BufferInfoCL bInfo[] = { +				b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), +				b3BufferInfoCL(bodyBuf->getBufferCL(), true), +				b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +				b3BufferInfoCL(convexData.getBufferCL(), true), +				b3BufferInfoCL(gpuVertices.getBufferCL(), true), +				b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +				b3BufferInfoCL(gpuFaces.getBufferCL(), true), +				b3BufferInfoCL(gpuIndices.getBufferCL(), true), +				b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), +				b3BufferInfoCL(contactOut->getBufferCL()), +				b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + +			b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel"); +			launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + +			launcher.setConst(numConcavePairs);  			launcher.setConst(maxContactCapacity);  			int num = numConcavePairs; -			launcher.launch1D( num); +			launcher.launch1D(num);  			clFinish(m_queue);  			nContacts = m_totalContactsOut.at(0);  			//printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); @@ -4088,11 +3816,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  				nContacts = maxContactCapacity;  			}  		} -		  	} - -  #ifdef __APPLE__  	bool contactClippingOnGpu = true;  #else @@ -4101,9 +3826,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  	if (contactClippingOnGpu)  	{ -		m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -//		printf("nContacts3 = %d\n",nContacts); - +		m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); +		//		printf("nContacts3 = %d\n",nContacts);  		//B3_PROFILE("clipHullHullKernel"); @@ -4122,15 +3846,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  			if (breakupConcaveConvexKernel)  			{ - -				worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); - +				worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs);  				//clipFacesAndFindContacts  				if (clipConcaveFacesAndFindContactsCPU)  				{ -  					b3AlignedObjectArray<b3Int4> clippingFacesOutCPU;  					b3AlignedObjectArray<b3Vector3> worldVertsA1CPU;  					b3AlignedObjectArray<b3Vector3> worldNormalsACPU; @@ -4141,120 +3862,108 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  					worldNormalsAGPU.copyToHost(worldNormalsACPU);  					worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - -					b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; +					b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;  					m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);  					b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;  					m_concaveSepNormals.copyToHost(concaveSepNormalsHost); -					b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;   +					b3AlignedObjectArray<b3Vector3> worldVertsB2CPU;  					worldVertsB2CPU.resize(worldVertsB2GPU.size()); - -					for (int i=0;i<numConcavePairs;i++) +					for (int i = 0; i < numConcavePairs; i++)  					{ - -						clipFacesAndFindContactsKernel(   &concaveSepNormalsHost.at(0), -							&concaveHasSeparatingNormalsCPU.at(0), -							&clippingFacesOutCPU.at(0), -							&worldVertsA1CPU.at(0), -							&worldNormalsACPU.at(0), -							&worldVertsB1CPU.at(0), -							&worldVertsB2CPU.at(0), -							vertexFaceCapacity, -							i); +						clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0), +													   &concaveHasSeparatingNormalsCPU.at(0), +													   &clippingFacesOutCPU.at(0), +													   &worldVertsA1CPU.at(0), +													   &worldNormalsACPU.at(0), +													   &worldVertsB1CPU.at(0), +													   &worldVertsB2CPU.at(0), +													   vertexFaceCapacity, +													   i);  					}  					clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU);  					worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - - -				} else +				} +				else  				{ -  					if (1)  					{ - - -  						B3_PROFILE("clipFacesAndFindContacts");  						//nContacts = m_totalContactsOut.at(0);  						//int h = m_hasSeparatingNormals.at(0);  						//int4 p = clippingFacesOutGPU.at(0);  						b3BufferInfoCL bInfo[] = { -							b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), -							b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), -							b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -							b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), -							b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), -							b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), -							b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) -						}; -						b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); -						launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); +							b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), +							b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), +							b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +							b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), +							b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), +							b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), +							b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; +						b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); +						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));  						launcher.setConst(vertexFaceCapacity); -						launcher.setConst( numConcavePairs  ); +						launcher.setConst(numConcavePairs);  						int debugMode = 0; -						launcher.setConst( debugMode); +						launcher.setConst(debugMode);  						int num = numConcavePairs; -						launcher.launch1D( num); +						launcher.launch1D(num);  						clFinish(m_queue);  						//int bla = m_totalContactsOut.at(0);  					}  				}  				//contactReduction  				{ -					int newContactCapacity=nContacts+numConcavePairs;  +					int newContactCapacity = nContacts + numConcavePairs;  					contactOut->reserve(newContactCapacity);  					if (reduceConcaveContactsOnGPU)  					{ -//						printf("newReservation = %d\n",newReservation); +						//						printf("newReservation = %d\n",newReservation);  						{  							B3_PROFILE("newContactReductionKernel");  							b3BufferInfoCL bInfo[] = -							{ -								b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), -								b3BufferInfoCL( bodyBuf->getBufferCL(),true), -								b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), -								b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), -								b3BufferInfoCL( contactOut->getBufferCL()), -								b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -								b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), -								b3BufferInfoCL( m_totalContactsOut.getBufferCL()) -							}; - -							b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); -							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); +								{ +									b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), +									b3BufferInfoCL(bodyBuf->getBufferCL(), true), +									b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), +									b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), +									b3BufferInfoCL(contactOut->getBufferCL()), +									b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +									b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), +									b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + +							b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); +							launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));  							launcher.setConst(vertexFaceCapacity);  							launcher.setConst(newContactCapacity); -							launcher.setConst( numConcavePairs  ); +							launcher.setConst(numConcavePairs);  							int num = numConcavePairs; -							launcher.launch1D( num); +							launcher.launch1D(num);  						}  						nContacts = m_totalContactsOut.at(0);  						contactOut->resize(nContacts);  						//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); -					}else +					} +					else  					{ -						  						volatile int nGlobalContactsOut = nContacts;  						b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost;  						triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost);  						b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf;  						bodyBuf->copyToHost(hostBodyBuf); -						b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; +						b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU;  						m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU);  						b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost;  						m_concaveSepNormals.copyToHost(concaveSepNormalsHost); -  						b3AlignedObjectArray<b3Contact4> hostContacts;  						if (nContacts)  						{ @@ -4268,67 +3977,59 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  						clippingFacesOutGPU.copyToHost(clippingFacesOutCPU);  						worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - -						for (int i=0;i<numConcavePairs;i++) +						for (int i = 0; i < numConcavePairs; i++)  						{ -							b3NewContactReductionKernel( &triangleConvexPairsOutHost.at(0), -                                                   &hostBodyBuf.at(0), -												   &concaveSepNormalsHost.at(0), -												   &concaveHasSeparatingNormalsCPU.at(0), -												   &hostContacts.at(0), -                                                   &clippingFacesOutCPU.at(0), -                                                   &worldVertsB2CPU.at(0), -                                                   &nGlobalContactsOut, -                                                   vertexFaceCapacity, -												   newContactCapacity, -                                                   numConcavePairs, -												   i -                                                   ); - +							b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0), +														&hostBodyBuf.at(0), +														&concaveSepNormalsHost.at(0), +														&concaveHasSeparatingNormalsCPU.at(0), +														&hostContacts.at(0), +														&clippingFacesOutCPU.at(0), +														&worldVertsB2CPU.at(0), +														&nGlobalContactsOut, +														vertexFaceCapacity, +														newContactCapacity, +														numConcavePairs, +														i);  						} -  						nContacts = nGlobalContactsOut; -						m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -//						nContacts = m_totalContactsOut.at(0); +						m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); +						//						nContacts = m_totalContactsOut.at(0);  						//contactOut->resize(nContacts);  						hostContacts.resize(nContacts);  						//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts);  						contactOut->copyFromHost(hostContacts);  					} -  				}  				//re-use? - - -			} else +			} +			else  			{  				B3_PROFILE("clipHullHullConcaveConvexKernel");  				nContacts = m_totalContactsOut.at(0);  				int newContactCapacity = contactOut->capacity();  				//printf("contactOut5 = %d\n",nContacts); -				b3BufferInfoCL bInfo[] = {  -					b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ),  -					b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -					b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -					b3BufferInfoCL( convexData.getBufferCL(),true), -					b3BufferInfoCL( gpuVertices.getBufferCL(),true), -					b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -					b3BufferInfoCL( gpuFaces.getBufferCL(),true), -					b3BufferInfoCL( gpuIndices.getBufferCL(),true), -					b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -					b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), -					b3BufferInfoCL( contactOut->getBufferCL()), -					b3BufferInfoCL( m_totalContactsOut.getBufferCL())	 -				}; -				b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); -				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); +				b3BufferInfoCL bInfo[] = { +					b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), +					b3BufferInfoCL(bodyBuf->getBufferCL(), true), +					b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +					b3BufferInfoCL(convexData.getBufferCL(), true), +					b3BufferInfoCL(gpuVertices.getBufferCL(), true), +					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +					b3BufferInfoCL(gpuFaces.getBufferCL(), true), +					b3BufferInfoCL(gpuIndices.getBufferCL(), true), +					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +					b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), +					b3BufferInfoCL(contactOut->getBufferCL()), +					b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; +				b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel"); +				launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));  				launcher.setConst(newContactCapacity); -				launcher.setConst( numConcavePairs  ); +				launcher.setConst(numConcavePairs);  				int num = numConcavePairs; -				launcher.launch1D( num); +				launcher.launch1D(num);  				clFinish(m_queue);  				nContacts = m_totalContactsOut.at(0);  				contactOut->resize(nContacts); @@ -4337,12 +4038,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  				contactOut->copyToHost(cpuContacts);  			}  			//			printf("nContacts after = %d\n", nContacts); -		}//numConcavePairs - - +		}  //numConcavePairs  		//convex-convex contact clipping -		 +  		bool breakupKernel = false;  #ifdef __APPLE__ @@ -4350,166 +4049,149 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*  #endif  #ifdef CHECK_ON_HOST -	bool computeConvexConvex = false; +		bool computeConvexConvex = false;  #else -	bool computeConvexConvex = true; -#endif//CHECK_ON_HOST +		bool computeConvexConvex = true; +#endif  //CHECK_ON_HOST  		if (computeConvexConvex)  		{  			B3_PROFILE("clipHullHullKernel"); -		if (breakupKernel) -		{ - - - - -			worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); -			clippingFacesOutGPU.resize(nPairs); -			worldNormalsAGPU.resize(nPairs); -			worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); -			worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); - -			if (findConvexClippingFacesGPU) -			{ -				B3_PROFILE("findClippingFacesKernel"); -				b3BufferInfoCL bInfo[] = { -					b3BufferInfoCL( pairs->getBufferCL(), true ), -					b3BufferInfoCL( bodyBuf->getBufferCL(),true), -					b3BufferInfoCL( gpuCollidables.getBufferCL(),true), -					b3BufferInfoCL( convexData.getBufferCL(),true), -					b3BufferInfoCL( gpuVertices.getBufferCL(),true), -					b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -					b3BufferInfoCL( gpuFaces.getBufferCL(),true),  -					b3BufferInfoCL( gpuIndices.getBufferCL(),true), -					b3BufferInfoCL( m_sepNormals.getBufferCL()), -					b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -					b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -					b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), -					b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), -					b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) -				}; - -				b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); -				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -				launcher.setConst( vertexFaceCapacity); -				launcher.setConst( nPairs  ); -				int num = nPairs; -				launcher.launch1D( num); -				clFinish(m_queue); - -			} else +			if (breakupKernel)  			{ -				 -				float minDist = -1e30f; -				float maxDist = 0.02f; +				worldVertsB1GPU.resize(vertexFaceCapacity * nPairs); +				clippingFacesOutGPU.resize(nPairs); +				worldNormalsAGPU.resize(nPairs); +				worldVertsA1GPU.resize(vertexFaceCapacity * nPairs); +				worldVertsB2GPU.resize(vertexFaceCapacity * nPairs); -				b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; -				convexData.copyToHost(hostConvexData); -				b3AlignedObjectArray<b3Collidable> hostCollidables; -				gpuCollidables.copyToHost(hostCollidables); +				if (findConvexClippingFacesGPU) +				{ +					B3_PROFILE("findClippingFacesKernel"); +					b3BufferInfoCL bInfo[] = { +						b3BufferInfoCL(pairs->getBufferCL(), true), +						b3BufferInfoCL(bodyBuf->getBufferCL(), true), +						b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +						b3BufferInfoCL(convexData.getBufferCL(), true), +						b3BufferInfoCL(gpuVertices.getBufferCL(), true), +						b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +						b3BufferInfoCL(gpuFaces.getBufferCL(), true), +						b3BufferInfoCL(gpuIndices.getBufferCL(), true), +						b3BufferInfoCL(m_sepNormals.getBufferCL()), +						b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +						b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +						b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), +						b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), +						b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; + +					b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel"); +					launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +					launcher.setConst(vertexFaceCapacity); +					launcher.setConst(nPairs); +					int num = nPairs; +					launcher.launch1D(num); +					clFinish(m_queue); +				} +				else +				{ +					float minDist = -1e30f; +					float maxDist = 0.02f; -				b3AlignedObjectArray<int> hostHasSepNormals; -				m_hasSeparatingNormals.copyToHost(hostHasSepNormals); -				b3AlignedObjectArray<b3Vector3> cpuSepNormals; -				m_sepNormals.copyToHost(cpuSepNormals); +					b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; +					convexData.copyToHost(hostConvexData); +					b3AlignedObjectArray<b3Collidable> hostCollidables; +					gpuCollidables.copyToHost(hostCollidables); -				b3AlignedObjectArray<b3Int4> hostPairs; -				pairs->copyToHost(hostPairs); -				b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; -				bodyBuf->copyToHost(hostBodyBuf); +					b3AlignedObjectArray<int> hostHasSepNormals; +					m_hasSeparatingNormals.copyToHost(hostHasSepNormals); +					b3AlignedObjectArray<b3Vector3> cpuSepNormals; +					m_sepNormals.copyToHost(cpuSepNormals); +					b3AlignedObjectArray<b3Int4> hostPairs; +					pairs->copyToHost(hostPairs); +					b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; +					bodyBuf->copyToHost(hostBodyBuf); -				//worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); -				b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; -				worldVertsB1GPU.copyToHost(worldVertsB1CPU); +					//worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); +					b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; +					worldVertsB1GPU.copyToHost(worldVertsB1CPU); -				b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; -				clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); +					b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; +					clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); -				b3AlignedObjectArray<b3Vector3> worldNormalsACPU; -				worldNormalsACPU.resize(nPairs); +					b3AlignedObjectArray<b3Vector3> worldNormalsACPU; +					worldNormalsACPU.resize(nPairs); -				b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; -				worldVertsA1CPU.resize(worldVertsA1GPU.size()); -			 -			 -				b3AlignedObjectArray<b3Vector3> hostVertices; -				gpuVertices.copyToHost(hostVertices); -				b3AlignedObjectArray<b3GpuFace> hostFaces; -				gpuFaces.copyToHost(hostFaces); -				b3AlignedObjectArray<int> hostIndices; -				gpuIndices.copyToHost(hostIndices); -				 - -				for (int i=0;i<nPairs;i++) -				{ +					b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; +					worldVertsA1CPU.resize(worldVertsA1GPU.size()); -					int bodyIndexA = hostPairs[i].x; -					int bodyIndexB = hostPairs[i].y; -			 -					int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; -					int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; -			 -					int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; -					int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; -			 +					b3AlignedObjectArray<b3Vector3> hostVertices; +					gpuVertices.copyToHost(hostVertices); +					b3AlignedObjectArray<b3GpuFace> hostFaces; +					gpuFaces.copyToHost(hostFaces); +					b3AlignedObjectArray<int> hostIndices; +					gpuIndices.copyToHost(hostIndices); -					if (hostHasSepNormals[i]) +					for (int i = 0; i < nPairs; i++)  					{ -						b3FindClippingFaces(cpuSepNormals[i], -							&hostConvexData[shapeIndexA], -							&hostConvexData[shapeIndexB], -							hostBodyBuf[bodyIndexA].m_pos,hostBodyBuf[bodyIndexA].m_quat, -							hostBodyBuf[bodyIndexB].m_pos,hostBodyBuf[bodyIndexB].m_quat, -							&worldVertsA1CPU.at(0),&worldNormalsACPU.at(0), -							&worldVertsB1CPU.at(0), -							vertexFaceCapacity,minDist,maxDist, -							&hostVertices.at(0),&hostFaces.at(0), -							&hostIndices.at(0), -							&hostVertices.at(0),&hostFaces.at(0), -							&hostIndices.at(0),&clippingFacesOutCPU.at(0),i); -					} -				} - -				clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); -				worldVertsA1GPU.copyFromHost(worldVertsA1CPU); -				worldNormalsAGPU.copyFromHost(worldNormalsACPU); -				worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - -			} +						int bodyIndexA = hostPairs[i].x; +						int bodyIndexB = hostPairs[i].y; +						int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; +						int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; +						int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; +						int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; +						if (hostHasSepNormals[i]) +						{ +							b3FindClippingFaces(cpuSepNormals[i], +												&hostConvexData[shapeIndexA], +												&hostConvexData[shapeIndexB], +												hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat, +												hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat, +												&worldVertsA1CPU.at(0), &worldNormalsACPU.at(0), +												&worldVertsB1CPU.at(0), +												vertexFaceCapacity, minDist, maxDist, +												&hostVertices.at(0), &hostFaces.at(0), +												&hostIndices.at(0), +												&hostVertices.at(0), &hostFaces.at(0), +												&hostIndices.at(0), &clippingFacesOutCPU.at(0), i); +						} +					} +					clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); +					worldVertsA1GPU.copyFromHost(worldVertsA1CPU); +					worldNormalsAGPU.copyFromHost(worldNormalsACPU); +					worldVertsB1GPU.copyFromHost(worldVertsB1CPU); +				} -			///clip face B against face A, reduce contacts and append them to a global contact array -			if (1) -			{ -				if (clipConvexFacesAndFindContactsCPU) +				///clip face B against face A, reduce contacts and append them to a global contact array +				if (1)  				{ +					if (clipConvexFacesAndFindContactsCPU) +					{ +						//b3AlignedObjectArray<b3Int4> hostPairs; +						//pairs->copyToHost(hostPairs); -					//b3AlignedObjectArray<b3Int4> hostPairs; -					//pairs->copyToHost(hostPairs); +						b3AlignedObjectArray<b3Vector3> hostSepNormals; +						m_sepNormals.copyToHost(hostSepNormals); +						b3AlignedObjectArray<int> hostHasSepAxis; +						m_hasSeparatingNormals.copyToHost(hostHasSepAxis); -					b3AlignedObjectArray<b3Vector3> hostSepNormals; -					m_sepNormals.copyToHost(hostSepNormals); -					b3AlignedObjectArray<int> hostHasSepAxis; -					m_hasSeparatingNormals.copyToHost(hostHasSepAxis); +						b3AlignedObjectArray<b3Int4> hostClippingFaces; +						clippingFacesOutGPU.copyToHost(hostClippingFaces); +						b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; +						worldVertsB2CPU.resize(vertexFaceCapacity * nPairs); -					b3AlignedObjectArray<b3Int4> hostClippingFaces; -					clippingFacesOutGPU.copyToHost(hostClippingFaces); -					b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; -					worldVertsB2CPU.resize(vertexFaceCapacity*nPairs); -					 -					b3AlignedObjectArray<b3Vector3>worldVertsA1CPU; -					worldVertsA1GPU.copyToHost(worldVertsA1CPU); -					b3AlignedObjectArray<b3Vector3> worldNormalsACPU; -					worldNormalsAGPU.copyToHost(worldNormalsACPU); +						b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; +						worldVertsA1GPU.copyToHost(worldVertsA1CPU); +						b3AlignedObjectArray<b3Vector3> worldNormalsACPU; +						worldNormalsAGPU.copyToHost(worldNormalsACPU); -					b3AlignedObjectArray<b3Vector3>  worldVertsB1CPU; -					worldVertsB1GPU.copyToHost(worldVertsB1CPU); +						b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; +						worldVertsB1GPU.copyToHost(worldVertsB1CPU); -					/* +						/*  					  __global const b3Float4* separatingNormals,                                                     __global const int* hasSeparatingAxis,                                                     __global b3Int4* clippingFacesOut, @@ -4520,214 +4202,207 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>*                                                      int vertexFaceCapacity,  															int pairIndex  					*/ -					for (int i=0;i<nPairs;i++) -					{ -						clipFacesAndFindContactsKernel( -							&hostSepNormals.at(0), -							&hostHasSepAxis.at(0), -							&hostClippingFaces.at(0), -							&worldVertsA1CPU.at(0), -							&worldNormalsACPU.at(0), -							&worldVertsB1CPU.at(0), -							&worldVertsB2CPU.at(0), - -						vertexFaceCapacity, -							i); -					} -					 -					clippingFacesOutGPU.copyFromHost(hostClippingFaces); -					worldVertsB2GPU.copyFromHost(worldVertsB2CPU); +						for (int i = 0; i < nPairs; i++) +						{ +							clipFacesAndFindContactsKernel( +								&hostSepNormals.at(0), +								&hostHasSepAxis.at(0), +								&hostClippingFaces.at(0), +								&worldVertsA1CPU.at(0), +								&worldNormalsACPU.at(0), +								&worldVertsB1CPU.at(0), +								&worldVertsB2CPU.at(0), -				} else -				{ -					B3_PROFILE("clipFacesAndFindContacts"); -					//nContacts = m_totalContactsOut.at(0); -					//int h = m_hasSeparatingNormals.at(0); -					//int4 p = clippingFacesOutGPU.at(0); -					b3BufferInfoCL bInfo[] = { -						b3BufferInfoCL( m_sepNormals.getBufferCL()), -						b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -						b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -						b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), -						b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), -						b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), -						b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) -					}; +								vertexFaceCapacity, +								i); +						} -					b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); -					launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -					launcher.setConst(vertexFaceCapacity); +						clippingFacesOutGPU.copyFromHost(hostClippingFaces); +						worldVertsB2GPU.copyFromHost(worldVertsB2CPU); +					} +					else +					{ +						B3_PROFILE("clipFacesAndFindContacts"); +						//nContacts = m_totalContactsOut.at(0); +						//int h = m_hasSeparatingNormals.at(0); +						//int4 p = clippingFacesOutGPU.at(0); +						b3BufferInfoCL bInfo[] = { +							b3BufferInfoCL(m_sepNormals.getBufferCL()), +							b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +							b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +							b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), +							b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), +							b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), +							b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; + +						b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); +						launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +						launcher.setConst(vertexFaceCapacity); -					launcher.setConst( nPairs  ); -					int debugMode = 0; -					launcher.setConst( debugMode); -					int num = nPairs; -					launcher.launch1D( num); -					clFinish(m_queue); -				}  +						launcher.setConst(nPairs); +						int debugMode = 0; +						launcher.setConst(debugMode); +						int num = nPairs; +						launcher.launch1D(num); +						clFinish(m_queue); +					} -				{ -					nContacts = m_totalContactsOut.at(0); -					//printf("nContacts = %d\n",nContacts); +					{ +						nContacts = m_totalContactsOut.at(0); +						//printf("nContacts = %d\n",nContacts); -					int newContactCapacity = nContacts+nPairs; -					contactOut->reserve(newContactCapacity); +						int newContactCapacity = nContacts + nPairs; +						contactOut->reserve(newContactCapacity); -					if (reduceConvexContactsOnGPU) -					{ +						if (reduceConvexContactsOnGPU)  						{ -							B3_PROFILE("newContactReductionKernel"); -							b3BufferInfoCL bInfo[] =  							{ -								b3BufferInfoCL( pairs->getBufferCL(), true ), -								b3BufferInfoCL( bodyBuf->getBufferCL(),true), -								b3BufferInfoCL( m_sepNormals.getBufferCL()), -								b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -								b3BufferInfoCL( contactOut->getBufferCL()), -								b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), -								b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), -								b3BufferInfoCL( m_totalContactsOut.getBufferCL()) -							}; - -							b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); -							launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -							launcher.setConst(vertexFaceCapacity); -							launcher.setConst(newContactCapacity); -							launcher.setConst( nPairs  ); -							int num = nPairs; - -							launcher.launch1D( num); +								B3_PROFILE("newContactReductionKernel"); +								b3BufferInfoCL bInfo[] = +									{ +										b3BufferInfoCL(pairs->getBufferCL(), true), +										b3BufferInfoCL(bodyBuf->getBufferCL(), true), +										b3BufferInfoCL(m_sepNormals.getBufferCL()), +										b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +										b3BufferInfoCL(contactOut->getBufferCL()), +										b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), +										b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), +										b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + +								b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); +								launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +								launcher.setConst(vertexFaceCapacity); +								launcher.setConst(newContactCapacity); +								launcher.setConst(nPairs); +								int num = nPairs; + +								launcher.launch1D(num); +							} +							nContacts = m_totalContactsOut.at(0); +							contactOut->resize(nContacts);  						} -						nContacts = m_totalContactsOut.at(0); -						contactOut->resize(nContacts); -					} else -					{ - -						volatile int nGlobalContactsOut = nContacts; -						b3AlignedObjectArray<b3Int4> hostPairs; -						pairs->copyToHost(hostPairs); -						b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; -						bodyBuf->copyToHost(hostBodyBuf); -						b3AlignedObjectArray<b3Vector3> hostSepNormals; -						m_sepNormals.copyToHost(hostSepNormals); -						b3AlignedObjectArray<int> hostHasSepAxis; -						m_hasSeparatingNormals.copyToHost(hostHasSepAxis); -						b3AlignedObjectArray<b3Contact4> hostContactsOut; -						contactOut->copyToHost(hostContactsOut); -						hostContactsOut.resize(newContactCapacity); - -						b3AlignedObjectArray<b3Int4> hostClippingFaces; -						clippingFacesOutGPU.copyToHost(hostClippingFaces); -						b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; -						worldVertsB2GPU.copyToHost(worldVertsB2CPU); - -						for (int i=0;i<nPairs;i++) +						else  						{ -							b3NewContactReductionKernel(&hostPairs.at(0), -								&hostBodyBuf.at(0), -								&hostSepNormals.at(0), -								&hostHasSepAxis.at(0), -								&hostContactsOut.at(0), -								&hostClippingFaces.at(0), -								&worldVertsB2CPU.at(0), -								&nGlobalContactsOut, -								vertexFaceCapacity, -								newContactCapacity, -								nPairs, -								i); +							volatile int nGlobalContactsOut = nContacts; +							b3AlignedObjectArray<b3Int4> hostPairs; +							pairs->copyToHost(hostPairs); +							b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; +							bodyBuf->copyToHost(hostBodyBuf); +							b3AlignedObjectArray<b3Vector3> hostSepNormals; +							m_sepNormals.copyToHost(hostSepNormals); +							b3AlignedObjectArray<int> hostHasSepAxis; +							m_hasSeparatingNormals.copyToHost(hostHasSepAxis); +							b3AlignedObjectArray<b3Contact4> hostContactsOut; +							contactOut->copyToHost(hostContactsOut); +							hostContactsOut.resize(newContactCapacity); + +							b3AlignedObjectArray<b3Int4> hostClippingFaces; +							clippingFacesOutGPU.copyToHost(hostClippingFaces); +							b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; +							worldVertsB2GPU.copyToHost(worldVertsB2CPU); + +							for (int i = 0; i < nPairs; i++) +							{ +								b3NewContactReductionKernel(&hostPairs.at(0), +															&hostBodyBuf.at(0), +															&hostSepNormals.at(0), +															&hostHasSepAxis.at(0), +															&hostContactsOut.at(0), +															&hostClippingFaces.at(0), +															&worldVertsB2CPU.at(0), +															&nGlobalContactsOut, +															vertexFaceCapacity, +															newContactCapacity, +															nPairs, +															i); +							} + +							nContacts = nGlobalContactsOut; +							m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); +							hostContactsOut.resize(nContacts); +							//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); +							contactOut->copyFromHost(hostContactsOut);  						} +						//                    b3Contact4 pt = contactOut->at(0); +						//                  printf("nContacts = %d\n",nContacts); +					} +				} +			} +			else  //breakupKernel +			{ +				if (nPairs) +				{ +					b3BufferInfoCL bInfo[] = { +						b3BufferInfoCL(pairs->getBufferCL(), true), +						b3BufferInfoCL(bodyBuf->getBufferCL(), true), +						b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +						b3BufferInfoCL(convexData.getBufferCL(), true), +						b3BufferInfoCL(gpuVertices.getBufferCL(), true), +						b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +						b3BufferInfoCL(gpuFaces.getBufferCL(), true), +						b3BufferInfoCL(gpuIndices.getBufferCL(), true), +						b3BufferInfoCL(m_sepNormals.getBufferCL()), +						b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), +						b3BufferInfoCL(contactOut->getBufferCL()), +						b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; +					b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel"); +					launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +					launcher.setConst(nPairs); +					launcher.setConst(maxContactCapacity); -						nContacts = nGlobalContactsOut; -						m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -						hostContactsOut.resize(nContacts); -						//printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); -						contactOut->copyFromHost(hostContactsOut); +					int num = nPairs; +					launcher.launch1D(num); +					clFinish(m_queue); + +					nContacts = m_totalContactsOut.at(0); +					if (nContacts >= maxContactCapacity) +					{ +						b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity); +						nContacts = maxContactCapacity;  					} -					//                    b3Contact4 pt = contactOut->at(0); -					//                  printf("nContacts = %d\n",nContacts); +					contactOut->resize(nContacts);  				}  			} -		}             -		else//breakupKernel -		{ -			if (nPairs) +			int nCompoundsPairs = m_gpuCompoundPairs.size(); + +			if (nCompoundsPairs)  			{  				b3BufferInfoCL bInfo[] = { -					b3BufferInfoCL( pairs->getBufferCL(), true ),  -					b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -					b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -					b3BufferInfoCL( convexData.getBufferCL(),true), -					b3BufferInfoCL( gpuVertices.getBufferCL(),true), -					b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -					b3BufferInfoCL( gpuFaces.getBufferCL(),true), -					b3BufferInfoCL( gpuIndices.getBufferCL(),true), -					b3BufferInfoCL( m_sepNormals.getBufferCL()), -					b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), -					b3BufferInfoCL( contactOut->getBufferCL()), -					b3BufferInfoCL( m_totalContactsOut.getBufferCL())	 -				}; -				b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); -				launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -				launcher.setConst( nPairs  ); +					b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), +					b3BufferInfoCL(bodyBuf->getBufferCL(), true), +					b3BufferInfoCL(gpuCollidables.getBufferCL(), true), +					b3BufferInfoCL(convexData.getBufferCL(), true), +					b3BufferInfoCL(gpuVertices.getBufferCL(), true), +					b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), +					b3BufferInfoCL(gpuFaces.getBufferCL(), true), +					b3BufferInfoCL(gpuIndices.getBufferCL(), true), +					b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), +					b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true), +					b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true), +					b3BufferInfoCL(contactOut->getBufferCL()), +					b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; +				b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel"); +				launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); +				launcher.setConst(nCompoundsPairs);  				launcher.setConst(maxContactCapacity); -				int num = nPairs; -				launcher.launch1D( num); +				int num = nCompoundsPairs; +				launcher.launch1D(num);  				clFinish(m_queue);  				nContacts = m_totalContactsOut.at(0); -				if (nContacts >= maxContactCapacity) +				if (nContacts > maxContactCapacity)  				{ -					b3Error("Exceeded contact capacity (%d/%d)\n",nContacts,maxContactCapacity); +					b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity);  					nContacts = maxContactCapacity;  				}  				contactOut->resize(nContacts); -			} -		} - - -		int nCompoundsPairs = m_gpuCompoundPairs.size(); - -		if (nCompoundsPairs) -		{ -			b3BufferInfoCL bInfo[] = { -				b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ),  -				b3BufferInfoCL( bodyBuf->getBufferCL(),true),  -				b3BufferInfoCL( gpuCollidables.getBufferCL(),true),  -				b3BufferInfoCL( convexData.getBufferCL(),true), -				b3BufferInfoCL( gpuVertices.getBufferCL(),true), -				b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), -				b3BufferInfoCL( gpuFaces.getBufferCL(),true), -				b3BufferInfoCL( gpuIndices.getBufferCL(),true), -				b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), -				b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL(),true), -				b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL(),true), -				b3BufferInfoCL( contactOut->getBufferCL()), -				b3BufferInfoCL( m_totalContactsOut.getBufferCL())	 -			}; -			b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); -			launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); -			launcher.setConst( nCompoundsPairs  ); -			launcher.setConst(maxContactCapacity); - -			int num = nCompoundsPairs; -			launcher.launch1D( num); -			clFinish(m_queue); - -			nContacts = m_totalContactsOut.at(0); -			if (nContacts>maxContactCapacity) -			{ - -				b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); -				nContacts = maxContactCapacity; -			} -			contactOut->resize(nContacts); -		}//if nCompoundsPairs +			}  //if nCompoundsPairs  		} -	}//contactClippingOnGpu +	}  //contactClippingOnGpu  	//printf("nContacts end = %d\n",nContacts); -	 +  	//printf("frameCount = %d\n",frameCount++);  } diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h index e24c1579c6..53e8c4ed4d 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -17,102 +17,90 @@  //#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" - - -  struct GpuSatCollision  { -	cl_context				m_context; -	cl_device_id			m_device; -	cl_command_queue		m_queue; -	cl_kernel				m_findSeparatingAxisKernel; -	cl_kernel				m_mprPenetrationKernel; -	cl_kernel				m_findSeparatingAxisUnitSphereKernel; - +	cl_context m_context; +	cl_device_id m_device; +	cl_command_queue m_queue; +	cl_kernel m_findSeparatingAxisKernel; +	cl_kernel m_mprPenetrationKernel; +	cl_kernel m_findSeparatingAxisUnitSphereKernel;  	cl_kernel m_findSeparatingAxisVertexFaceKernel;  	cl_kernel m_findSeparatingAxisEdgeEdgeKernel; -	 -	cl_kernel				m_findConcaveSeparatingAxisKernel; -    cl_kernel				m_findConcaveSeparatingAxisVertexFaceKernel; -    cl_kernel				m_findConcaveSeparatingAxisEdgeEdgeKernel; -  -     -     -     -	cl_kernel				m_findCompoundPairsKernel; -	cl_kernel				m_processCompoundPairsKernel; - -	cl_kernel				m_clipHullHullKernel; -	cl_kernel				m_clipCompoundsHullHullKernel; -     -    cl_kernel               m_clipFacesAndFindContacts; -    cl_kernel               m_findClippingFacesKernel; -     -	cl_kernel				m_clipHullHullConcaveConvexKernel; -//	cl_kernel				m_extractManifoldAndAddContactKernel; -    cl_kernel               m_newContactReductionKernel; - -	cl_kernel				m_bvhTraversalKernel; -	cl_kernel				m_primitiveContactsKernel; -	cl_kernel				m_findConcaveSphereContactsKernel; - -	cl_kernel				m_processCompoundPairsPrimitivesKernel; -     + +	cl_kernel m_findConcaveSeparatingAxisKernel; +	cl_kernel m_findConcaveSeparatingAxisVertexFaceKernel; +	cl_kernel m_findConcaveSeparatingAxisEdgeEdgeKernel; + +	cl_kernel m_findCompoundPairsKernel; +	cl_kernel m_processCompoundPairsKernel; + +	cl_kernel m_clipHullHullKernel; +	cl_kernel m_clipCompoundsHullHullKernel; + +	cl_kernel m_clipFacesAndFindContacts; +	cl_kernel m_findClippingFacesKernel; + +	cl_kernel m_clipHullHullConcaveConvexKernel; +	//	cl_kernel				m_extractManifoldAndAddContactKernel; +	cl_kernel m_newContactReductionKernel; + +	cl_kernel m_bvhTraversalKernel; +	cl_kernel m_primitiveContactsKernel; +	cl_kernel m_findConcaveSphereContactsKernel; + +	cl_kernel m_processCompoundPairsPrimitivesKernel; +  	b3OpenCLArray<b3Vector3> m_unitSphereDirections; -	b3OpenCLArray<int>		m_totalContactsOut; +	b3OpenCLArray<int> m_totalContactsOut;  	b3OpenCLArray<b3Vector3> m_sepNormals;  	b3OpenCLArray<float> m_dmins; -	b3OpenCLArray<int>		m_hasSeparatingNormals; +	b3OpenCLArray<int> m_hasSeparatingNormals;  	b3OpenCLArray<b3Vector3> m_concaveSepNormals; -	b3OpenCLArray<int>		m_concaveHasSeparatingNormals; -	b3OpenCLArray<int>		m_numConcavePairsOut; +	b3OpenCLArray<int> m_concaveHasSeparatingNormals; +	b3OpenCLArray<int> m_numConcavePairsOut;  	b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs;  	b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals; -	b3OpenCLArray<int>		m_gpuHasCompoundSepNormals; -	b3OpenCLArray<int>		m_numCompoundPairsOut; -	 +	b3OpenCLArray<int> m_gpuHasCompoundSepNormals; +	b3OpenCLArray<int> m_numCompoundPairsOut; -	GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue  q ); +	GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q);  	virtual ~GpuSatCollision(); -	 - -	void computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs,  -			const b3OpenCLArray<b3RigidBodyData>* bodyBuf, -			b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, -			const b3OpenCLArray<b3Contact4>* oldContacts, -			int maxContactCapacity, -			int compoundPairCapacity, -			const b3OpenCLArray<b3ConvexPolyhedronData>& hostConvexData, -			const b3OpenCLArray<b3Vector3>& vertices, -			const b3OpenCLArray<b3Vector3>& uniqueEdges, -			const b3OpenCLArray<b3GpuFace>& faces, -			const b3OpenCLArray<int>& indices, -			const b3OpenCLArray<b3Collidable>& gpuCollidables, -			const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - -			const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, -			const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - -           b3OpenCLArray<b3Vector3>& worldVertsB1GPU, -           b3OpenCLArray<b3Int4>& clippingFacesOutGPU, -           b3OpenCLArray<b3Vector3>& worldNormalsAGPU, -           b3OpenCLArray<b3Vector3>& worldVertsA1GPU, -           b3OpenCLArray<b3Vector3>& worldVertsB2GPU, -		   b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData, -		   b3OpenCLArray<b3QuantizedBvhNode>*	treeNodesGPU, -			b3OpenCLArray<b3BvhSubtreeInfo>*	subTreesGPU, -			b3OpenCLArray<b3BvhInfo>*	bvhInfo, -			int numObjects, -			int maxTriConvexPairCapacity, -			b3OpenCLArray<b3Int4>& triangleConvexPairs, -			int& numTriConvexPairsOut -			); - +	void computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs, +										   const b3OpenCLArray<b3RigidBodyData>* bodyBuf, +										   b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, +										   const b3OpenCLArray<b3Contact4>* oldContacts, +										   int maxContactCapacity, +										   int compoundPairCapacity, +										   const b3OpenCLArray<b3ConvexPolyhedronData>& hostConvexData, +										   const b3OpenCLArray<b3Vector3>& vertices, +										   const b3OpenCLArray<b3Vector3>& uniqueEdges, +										   const b3OpenCLArray<b3GpuFace>& faces, +										   const b3OpenCLArray<int>& indices, +										   const b3OpenCLArray<b3Collidable>& gpuCollidables, +										   const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, + +										   const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, +										   const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, + +										   b3OpenCLArray<b3Vector3>& worldVertsB1GPU, +										   b3OpenCLArray<b3Int4>& clippingFacesOutGPU, +										   b3OpenCLArray<b3Vector3>& worldNormalsAGPU, +										   b3OpenCLArray<b3Vector3>& worldVertsA1GPU, +										   b3OpenCLArray<b3Vector3>& worldVertsB2GPU, +										   b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData, +										   b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, +										   b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, +										   b3OpenCLArray<b3BvhInfo>* bvhInfo, +										   int numObjects, +										   int maxTriConvexPairCapacity, +										   b3OpenCLArray<b3Int4>& triangleConvexPairs, +										   int& numTriConvexPairsOut);  }; -#endif //_CONVEX_HULL_CONTACT_H +#endif  //_CONVEX_HULL_CONTACT_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h index 337100fb1a..c4cf700076 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h @@ -4,6 +4,4 @@  #include "Bullet3Common/b3Transform.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - - -#endif //CONVEX_POLYHEDRON_CL +#endif  //CONVEX_POLYHEDRON_CL diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp index d636f983c6..974b246f03 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp @@ -29,902 +29,951 @@ GJK-EPA collision solver by Nathanael Presson, 2008  namespace gjkepa2_impl2  { +// Config -	// Config +/* GJK	*/ +#define GJK_MAX_ITERATIONS 128 +#define GJK_ACCURACY ((b3Scalar)0.0001) +#define GJK_MIN_DISTANCE ((b3Scalar)0.0001) +#define GJK_DUPLICATED_EPS ((b3Scalar)0.0001) +#define GJK_SIMPLEX2_EPS ((b3Scalar)0.0) +#define GJK_SIMPLEX3_EPS ((b3Scalar)0.0) +#define GJK_SIMPLEX4_EPS ((b3Scalar)0.0) -	/* GJK	*/  -#define GJK_MAX_ITERATIONS	128 -#define GJK_ACCURACY		((b3Scalar)0.0001) -#define GJK_MIN_DISTANCE	((b3Scalar)0.0001) -#define GJK_DUPLICATED_EPS	((b3Scalar)0.0001) -#define GJK_SIMPLEX2_EPS	((b3Scalar)0.0) -#define GJK_SIMPLEX3_EPS	((b3Scalar)0.0) -#define GJK_SIMPLEX4_EPS	((b3Scalar)0.0) +/* EPA	*/ +#define EPA_MAX_VERTICES 64 +#define EPA_MAX_FACES (EPA_MAX_VERTICES * 2) +#define EPA_MAX_ITERATIONS 255 +#define EPA_ACCURACY ((b3Scalar)0.0001) +#define EPA_FALLBACK (10 * EPA_ACCURACY) +#define EPA_PLANE_EPS ((b3Scalar)0.00001) +#define EPA_INSIDE_EPS ((b3Scalar)0.01) -	/* EPA	*/  -#define EPA_MAX_VERTICES	64 -#define EPA_MAX_FACES		(EPA_MAX_VERTICES*2) -#define EPA_MAX_ITERATIONS	255 -#define EPA_ACCURACY		((b3Scalar)0.0001) -#define EPA_FALLBACK		(10*EPA_ACCURACY) -#define EPA_PLANE_EPS		((b3Scalar)0.00001) -#define EPA_INSIDE_EPS		((b3Scalar)0.01) +// Shorthands +// MinkowskiDiff +struct b3MinkowskiDiff +{ +	const b3ConvexPolyhedronData* m_shapes[2]; -	// Shorthands -	 - -	// MinkowskiDiff -	struct	b3MinkowskiDiff -	{ -		 - -		const b3ConvexPolyhedronData*	m_shapes[2]; -		 - -		b3Matrix3x3				m_toshape1; -		b3Transform				m_toshape0; +	b3Matrix3x3 m_toshape1; +	b3Transform m_toshape0; -		bool					m_enableMargin; -		 +	bool m_enableMargin; -			void					EnableMargin(bool enable) -		{ -			m_enableMargin = enable; -		}	 -		inline b3Vector3		Support0(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA) const +	void EnableMargin(bool enable) +	{ +		m_enableMargin = enable; +	} +	inline b3Vector3 Support0(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA) const +	{ +		if (m_enableMargin)  		{ -			if (m_enableMargin) -			{ -				return localGetSupportVertexWithMargin(d,m_shapes[0],verticesA,0.f); -			} else -			{ -				return localGetSupportVertexWithoutMargin(d,m_shapes[0],verticesA); -			} +			return localGetSupportVertexWithMargin(d, m_shapes[0], verticesA, 0.f);  		} -		inline b3Vector3		Support1(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesB) const +		else  		{ -			if (m_enableMargin) -			{ -				return m_toshape0*(localGetSupportVertexWithMargin(m_toshape1*d,m_shapes[1],verticesB,0.f)); -			} else -			{ -				return m_toshape0*(localGetSupportVertexWithoutMargin(m_toshape1*d,m_shapes[1],verticesB)); -			} +			return localGetSupportVertexWithoutMargin(d, m_shapes[0], verticesA);  		} - -		inline b3Vector3		Support(const b3Vector3& d,  const b3AlignedObjectArray<b3Vector3>& verticesA,  const b3AlignedObjectArray<b3Vector3>& verticesB) const +	} +	inline b3Vector3 Support1(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesB) const +	{ +		if (m_enableMargin)  		{ -			return(Support0(d,verticesA)-Support1(-d,verticesB)); +			return m_toshape0 * (localGetSupportVertexWithMargin(m_toshape1 * d, m_shapes[1], verticesB, 0.f));  		} -		b3Vector3				Support(const b3Vector3& d,unsigned int index,const b3AlignedObjectArray<b3Vector3>& verticesA,  const b3AlignedObjectArray<b3Vector3>& verticesB) const +		else  		{ -			if(index) -				return(Support1(d,verticesA)); -			else -				return(Support0(d,verticesB)); +			return m_toshape0 * (localGetSupportVertexWithoutMargin(m_toshape1 * d, m_shapes[1], verticesB));  		} -	}; +	} -	typedef	b3MinkowskiDiff	tShape; +	inline b3Vector3 Support(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const +	{ +		return (Support0(d, verticesA) - Support1(-d, verticesB)); +	} +	b3Vector3 Support(const b3Vector3& d, unsigned int index, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const +	{ +		if (index) +			return (Support1(d, verticesA)); +		else +			return (Support0(d, verticesB)); +	} +}; +typedef b3MinkowskiDiff tShape; -	// GJK -	struct	b3GJK +// GJK +struct b3GJK +{ +	/* Types		*/ +	struct sSV  	{ -		/* Types		*/  -		struct	sSV -		{ -			b3Vector3	d,w; -		}; -		struct	sSimplex +		b3Vector3 d, w; +	}; +	struct sSimplex +	{ +		sSV* c[4]; +		b3Scalar p[4]; +		unsigned int rank; +	}; +	struct eStatus +	{ +		enum _  		{ -			sSV*		c[4]; -			b3Scalar	p[4]; -			unsigned int			rank; -		}; -		struct	eStatus	{ enum _ {  			Valid,  			Inside, -			Failed		};}; -			/* Fields		*/  -			tShape			m_shape; -			const b3AlignedObjectArray<b3Vector3>& m_verticesA; -			const b3AlignedObjectArray<b3Vector3>& m_verticesB; -			b3Vector3		m_ray; -			b3Scalar		m_distance; -			sSimplex		m_simplices[2]; -			sSV				m_store[4]; -			sSV*			m_free[4]; -			unsigned int				m_nfree; -			unsigned int				m_current; -			sSimplex*		m_simplex; -			eStatus::_		m_status; -			/* Methods		*/  -			b3GJK(const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB) -				:m_verticesA(verticesA),m_verticesB(verticesB) -			{ -				Initialize(); +			Failed +		}; +	}; +	/* Fields		*/ +	tShape m_shape; +	const b3AlignedObjectArray<b3Vector3>& m_verticesA; +	const b3AlignedObjectArray<b3Vector3>& m_verticesB; +	b3Vector3 m_ray; +	b3Scalar m_distance; +	sSimplex m_simplices[2]; +	sSV m_store[4]; +	sSV* m_free[4]; +	unsigned int m_nfree; +	unsigned int m_current; +	sSimplex* m_simplex; +	eStatus::_ m_status; +	/* Methods		*/ +	b3GJK(const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) +		: m_verticesA(verticesA), m_verticesB(verticesB) +	{ +		Initialize(); +	} +	void Initialize() +	{ +		m_ray = b3MakeVector3(0, 0, 0); +		m_nfree = 0; +		m_status = eStatus::Failed; +		m_current = 0; +		m_distance = 0; +	} +	eStatus::_ Evaluate(const tShape& shapearg, const b3Vector3& guess) +	{ +		unsigned int iterations = 0; +		b3Scalar sqdist = 0; +		b3Scalar alpha = 0; +		b3Vector3 lastw[4]; +		unsigned int clastw = 0; +		/* Initialize solver		*/ +		m_free[0] = &m_store[0]; +		m_free[1] = &m_store[1]; +		m_free[2] = &m_store[2]; +		m_free[3] = &m_store[3]; +		m_nfree = 4; +		m_current = 0; +		m_status = eStatus::Valid; +		m_shape = shapearg; +		m_distance = 0; +		/* Initialize simplex		*/ +		m_simplices[0].rank = 0; +		m_ray = guess; +		const b3Scalar sqrl = m_ray.length2(); +		appendvertice(m_simplices[0], sqrl > 0 ? -m_ray : b3MakeVector3(1, 0, 0)); +		m_simplices[0].p[0] = 1; +		m_ray = m_simplices[0].c[0]->w; +		sqdist = sqrl; +		lastw[0] = +			lastw[1] = +				lastw[2] = +					lastw[3] = m_ray; +		/* Loop						*/ +		do +		{ +			const unsigned int next = 1 - m_current; +			sSimplex& cs = m_simplices[m_current]; +			sSimplex& ns = m_simplices[next]; +			/* Check zero							*/ +			const b3Scalar rl = m_ray.length(); +			if (rl < GJK_MIN_DISTANCE) +			{ /* Touching or inside				*/ +				m_status = eStatus::Inside; +				break;  			} -			void				Initialize() +			/* Append new vertice in -'v' direction	*/ +			appendvertice(cs, -m_ray); +			const b3Vector3& w = cs.c[cs.rank - 1]->w; +			bool found = false; +			for (unsigned int i = 0; i < 4; ++i)  			{ -				m_ray		=	b3MakeVector3(0,0,0); -				m_nfree		=	0; -				m_status	=	eStatus::Failed; -				m_current	=	0; -				m_distance	=	0; +				if ((w - lastw[i]).length2() < GJK_DUPLICATED_EPS) +				{ +					found = true; +					break; +				} +			} +			if (found) +			{ /* Return old simplex				*/ +				removevertice(m_simplices[m_current]); +				break;  			} -			eStatus::_			Evaluate(const tShape& shapearg,const b3Vector3& guess) +			else +			{ /* Update lastw					*/ +				lastw[clastw = (clastw + 1) & 3] = w; +			} +			/* Check for termination				*/ +			const b3Scalar omega = b3Dot(m_ray, w) / rl; +			alpha = b3Max(omega, alpha); +			if (((rl - alpha) - (GJK_ACCURACY * rl)) <= 0) +			{ /* Return old simplex				*/ +				removevertice(m_simplices[m_current]); +				break; +			} +			/* Reduce simplex						*/ +			b3Scalar weights[4]; +			unsigned int mask = 0; +			switch (cs.rank)  			{ -				unsigned int			iterations=0; -				b3Scalar	sqdist=0; -				b3Scalar	alpha=0; -				b3Vector3	lastw[4]; -				unsigned int			clastw=0; -				/* Initialize solver		*/  -				m_free[0]			=	&m_store[0]; -				m_free[1]			=	&m_store[1]; -				m_free[2]			=	&m_store[2]; -				m_free[3]			=	&m_store[3]; -				m_nfree				=	4; -				m_current			=	0; -				m_status			=	eStatus::Valid; -				m_shape				=	shapearg; -				m_distance			=	0; -				/* Initialize simplex		*/  -				m_simplices[0].rank	=	0; -				m_ray				=	guess; -				const b3Scalar	sqrl=	m_ray.length2(); -				appendvertice(m_simplices[0],sqrl>0?-m_ray:b3MakeVector3(1,0,0)); -				m_simplices[0].p[0]	=	1; -				m_ray				=	m_simplices[0].c[0]->w;	 -				sqdist				=	sqrl; -				lastw[0]			= -					lastw[1]			= -					lastw[2]			= -					lastw[3]			=	m_ray; -				/* Loop						*/  -				do	{ -					const unsigned int		next=1-m_current; -					sSimplex&	cs=m_simplices[m_current]; -					sSimplex&	ns=m_simplices[next]; -					/* Check zero							*/  -					const b3Scalar	rl=m_ray.length(); -					if(rl<GJK_MIN_DISTANCE) -					{/* Touching or inside				*/  -						m_status=eStatus::Inside; -						break; -					} -					/* Append new vertice in -'v' direction	*/  -					appendvertice(cs,-m_ray); -					const b3Vector3&	w=cs.c[cs.rank-1]->w; -					bool				found=false; -					for(unsigned int i=0;i<4;++i) +				case 2: +					sqdist = projectorigin(cs.c[0]->w, +										   cs.c[1]->w, +										   weights, mask); +					break; +				case 3: +					sqdist = projectorigin(cs.c[0]->w, +										   cs.c[1]->w, +										   cs.c[2]->w, +										   weights, mask); +					break; +				case 4: +					sqdist = projectorigin(cs.c[0]->w, +										   cs.c[1]->w, +										   cs.c[2]->w, +										   cs.c[3]->w, +										   weights, mask); +					break; +			} +			if (sqdist >= 0) +			{ /* Valid	*/ +				ns.rank = 0; +				m_ray = b3MakeVector3(0, 0, 0); +				m_current = next; +				for (unsigned int i = 0, ni = cs.rank; i < ni; ++i) +				{ +					if (mask & (1 << i))  					{ -						if((w-lastw[i]).length2()<GJK_DUPLICATED_EPS) -						{ found=true;break; } -					} -					if(found) -					{/* Return old simplex				*/  -						removevertice(m_simplices[m_current]); -						break; +						ns.c[ns.rank] = cs.c[i]; +						ns.p[ns.rank++] = weights[i]; +						m_ray += cs.c[i]->w * weights[i];  					}  					else -					{/* Update lastw					*/  -						lastw[clastw=(clastw+1)&3]=w; -					} -					/* Check for termination				*/  -					const b3Scalar	omega=b3Dot(m_ray,w)/rl; -					alpha=b3Max(omega,alpha); -					if(((rl-alpha)-(GJK_ACCURACY*rl))<=0) -					{/* Return old simplex				*/  -						removevertice(m_simplices[m_current]); -						break; -					}		 -					/* Reduce simplex						*/  -					b3Scalar	weights[4]; -					unsigned int			mask=0; -					switch(cs.rank)  					{ -					case	2:	sqdist=projectorigin(	cs.c[0]->w, -									cs.c[1]->w, -									weights,mask);break; -					case	3:	sqdist=projectorigin(	cs.c[0]->w, -									cs.c[1]->w, -									cs.c[2]->w, -									weights,mask);break; -					case	4:	sqdist=projectorigin(	cs.c[0]->w, -									cs.c[1]->w, -									cs.c[2]->w, -									cs.c[3]->w, -									weights,mask);break; -					} -					if(sqdist>=0) -					{/* Valid	*/  -						ns.rank		=	0; -						m_ray		=	b3MakeVector3(0,0,0); -						m_current	=	next; -						for(unsigned int i=0,ni=cs.rank;i<ni;++i) -						{ -							if(mask&(1<<i)) -							{ -								ns.c[ns.rank]		=	cs.c[i]; -								ns.p[ns.rank++]		=	weights[i]; -								m_ray				+=	cs.c[i]->w*weights[i]; -							} -							else -							{ -								m_free[m_nfree++]	=	cs.c[i]; -							} -						} -						if(mask==15) m_status=eStatus::Inside; +						m_free[m_nfree++] = cs.c[i];  					} -					else -					{/* Return old simplex				*/  -						removevertice(m_simplices[m_current]); -						break; -					} -					m_status=((++iterations)<GJK_MAX_ITERATIONS)?m_status:eStatus::Failed; -				} while(m_status==eStatus::Valid); -				m_simplex=&m_simplices[m_current]; -				switch(m_status) +				} +				if (mask == 15) m_status = eStatus::Inside; +			} +			else +			{ /* Return old simplex				*/ +				removevertice(m_simplices[m_current]); +				break; +			} +			m_status = ((++iterations) < GJK_MAX_ITERATIONS) ? m_status : eStatus::Failed; +		} while (m_status == eStatus::Valid); +		m_simplex = &m_simplices[m_current]; +		switch (m_status) +		{ +			case eStatus::Valid: +				m_distance = m_ray.length(); +				break; +			case eStatus::Inside: +				m_distance = 0; +				break; +			default: +			{ +			} +		} +		return (m_status); +	} +	bool EncloseOrigin() +	{ +		switch (m_simplex->rank) +		{ +			case 1: +			{ +				for (unsigned int i = 0; i < 3; ++i)  				{ -				case	eStatus::Valid:		m_distance=m_ray.length();break; -				case	eStatus::Inside:	m_distance=0;break; -				default: -					{ -					} -				}	 -				return(m_status); +					b3Vector3 axis = b3MakeVector3(0, 0, 0); +					axis[i] = 1; +					appendvertice(*m_simplex, axis); +					if (EncloseOrigin()) return (true); +					removevertice(*m_simplex); +					appendvertice(*m_simplex, -axis); +					if (EncloseOrigin()) return (true); +					removevertice(*m_simplex); +				}  			} -			bool					EncloseOrigin() +			break; +			case 2:  			{ -				switch(m_simplex->rank) +				const b3Vector3 d = m_simplex->c[1]->w - m_simplex->c[0]->w; +				for (unsigned int i = 0; i < 3; ++i)  				{ -				case	1: +					b3Vector3 axis = b3MakeVector3(0, 0, 0); +					axis[i] = 1; +					const b3Vector3 p = b3Cross(d, axis); +					if (p.length2() > 0)  					{ -						for(unsigned int i=0;i<3;++i) -						{ -							b3Vector3		axis=b3MakeVector3(0,0,0); -							axis[i]=1; -							appendvertice(*m_simplex, axis); -							if(EncloseOrigin())	return(true); -							removevertice(*m_simplex); -							appendvertice(*m_simplex,-axis); -							if(EncloseOrigin())	return(true); -							removevertice(*m_simplex); -						} +						appendvertice(*m_simplex, p); +						if (EncloseOrigin()) return (true); +						removevertice(*m_simplex); +						appendvertice(*m_simplex, -p); +						if (EncloseOrigin()) return (true); +						removevertice(*m_simplex);  					} -					break; -				case	2: -					{ -						const b3Vector3	d=m_simplex->c[1]->w-m_simplex->c[0]->w; -						for(unsigned int i=0;i<3;++i) -						{ -							b3Vector3		axis=b3MakeVector3(0,0,0); -							axis[i]=1; -							const b3Vector3	p=b3Cross(d,axis); -							if(p.length2()>0) -							{ -								appendvertice(*m_simplex, p); -								if(EncloseOrigin())	return(true); -								removevertice(*m_simplex); -								appendvertice(*m_simplex,-p); -								if(EncloseOrigin())	return(true); -								removevertice(*m_simplex); -							} -						} -					} -					break; -				case	3: -					{ -						const b3Vector3	n=b3Cross(m_simplex->c[1]->w-m_simplex->c[0]->w, -							m_simplex->c[2]->w-m_simplex->c[0]->w); -						if(n.length2()>0) -						{ -							appendvertice(*m_simplex,n); -							if(EncloseOrigin())	return(true); -							removevertice(*m_simplex); -							appendvertice(*m_simplex,-n); -							if(EncloseOrigin())	return(true); -							removevertice(*m_simplex); -						} -					} -					break; -				case	4: -					{ -						if(b3Fabs(det(	m_simplex->c[0]->w-m_simplex->c[3]->w, -							m_simplex->c[1]->w-m_simplex->c[3]->w, -							m_simplex->c[2]->w-m_simplex->c[3]->w))>0) -							return(true); -					} -					break;  				} -				return(false);  			} -			/* Internals	*/  -			void				getsupport(const b3Vector3& d,sSV& sv) const +			break; +			case 3:  			{ -				sv.d	=	d/d.length(); -				sv.w	=	m_shape.Support(sv.d,m_verticesA,m_verticesB); +				const b3Vector3 n = b3Cross(m_simplex->c[1]->w - m_simplex->c[0]->w, +											m_simplex->c[2]->w - m_simplex->c[0]->w); +				if (n.length2() > 0) +				{ +					appendvertice(*m_simplex, n); +					if (EncloseOrigin()) return (true); +					removevertice(*m_simplex); +					appendvertice(*m_simplex, -n); +					if (EncloseOrigin()) return (true); +					removevertice(*m_simplex); +				}  			} -			void				removevertice(sSimplex& simplex) +			break; +			case 4:  			{ -				m_free[m_nfree++]=simplex.c[--simplex.rank]; +				if (b3Fabs(det(m_simplex->c[0]->w - m_simplex->c[3]->w, +							   m_simplex->c[1]->w - m_simplex->c[3]->w, +							   m_simplex->c[2]->w - m_simplex->c[3]->w)) > 0) +					return (true);  			} -			void				appendvertice(sSimplex& simplex,const b3Vector3& v) +			break; +		} +		return (false); +	} +	/* Internals	*/ +	void getsupport(const b3Vector3& d, sSV& sv) const +	{ +		sv.d = d / d.length(); +		sv.w = m_shape.Support(sv.d, m_verticesA, m_verticesB); +	} +	void removevertice(sSimplex& simplex) +	{ +		m_free[m_nfree++] = simplex.c[--simplex.rank]; +	} +	void appendvertice(sSimplex& simplex, const b3Vector3& v) +	{ +		simplex.p[simplex.rank] = 0; +		simplex.c[simplex.rank] = m_free[--m_nfree]; +		getsupport(v, *simplex.c[simplex.rank++]); +	} +	static b3Scalar det(const b3Vector3& a, const b3Vector3& b, const b3Vector3& c) +	{ +		return (a.y * b.z * c.x + a.z * b.x * c.y - +				a.x * b.z * c.y - a.y * b.x * c.z + +				a.x * b.y * c.z - a.z * b.y * c.x); +	} +	static b3Scalar projectorigin(const b3Vector3& a, +								  const b3Vector3& b, +								  b3Scalar* w, unsigned int& m) +	{ +		const b3Vector3 d = b - a; +		const b3Scalar l = d.length2(); +		if (l > GJK_SIMPLEX2_EPS) +		{ +			const b3Scalar t(l > 0 ? -b3Dot(a, d) / l : 0); +			if (t >= 1)  			{ -				simplex.p[simplex.rank]=0; -				simplex.c[simplex.rank]=m_free[--m_nfree]; -				getsupport(v,*simplex.c[simplex.rank++]); +				w[0] = 0; +				w[1] = 1; +				m = 2; +				return (b.length2());  			} -			static b3Scalar		det(const b3Vector3& a,const b3Vector3& b,const b3Vector3& c) +			else if (t <= 0)  			{ -				return(	a.y*b.z*c.x+a.z*b.x*c.y- -					a.x*b.z*c.y-a.y*b.x*c.z+ -					a.x*b.y*c.z-a.z*b.y*c.x); +				w[0] = 1; +				w[1] = 0; +				m = 1; +				return (a.length2());  			} -			static b3Scalar		projectorigin(	const b3Vector3& a, -				const b3Vector3& b, -				b3Scalar* w,unsigned int& m) +			else  			{ -				const b3Vector3	d=b-a; -				const b3Scalar	l=d.length2(); -				if(l>GJK_SIMPLEX2_EPS) -				{ -					const b3Scalar	t(l>0?-b3Dot(a,d)/l:0); -					if(t>=1)		{ w[0]=0;w[1]=1;m=2;return(b.length2()); } -					else if(t<=0)	{ w[0]=1;w[1]=0;m=1;return(a.length2()); } -					else			{ w[0]=1-(w[1]=t);m=3;return((a+d*t).length2()); } -				} -				return(-1); +				w[0] = 1 - (w[1] = t); +				m = 3; +				return ((a + d * t).length2());  			} -			static b3Scalar		projectorigin(	const b3Vector3& a, -				const b3Vector3& b, -				const b3Vector3& c, -				b3Scalar* w,unsigned int& m) +		} +		return (-1); +	} +	static b3Scalar projectorigin(const b3Vector3& a, +								  const b3Vector3& b, +								  const b3Vector3& c, +								  b3Scalar* w, unsigned int& m) +	{ +		static const unsigned int imd3[] = {1, 2, 0}; +		const b3Vector3* vt[] = {&a, &b, &c}; +		const b3Vector3 dl[] = {a - b, b - c, c - a}; +		const b3Vector3 n = b3Cross(dl[0], dl[1]); +		const b3Scalar l = n.length2(); +		if (l > GJK_SIMPLEX3_EPS) +		{ +			b3Scalar mindist = -1; +			b3Scalar subw[2] = {0.f, 0.f}; +			unsigned int subm(0); +			for (unsigned int i = 0; i < 3; ++i)  			{ -				static const unsigned int		imd3[]={1,2,0}; -				const b3Vector3*	vt[]={&a,&b,&c}; -				const b3Vector3		dl[]={a-b,b-c,c-a}; -				const b3Vector3		n=b3Cross(dl[0],dl[1]); -				const b3Scalar		l=n.length2(); -				if(l>GJK_SIMPLEX3_EPS) +				if (b3Dot(*vt[i], b3Cross(dl[i], n)) > 0)  				{ -					b3Scalar	mindist=-1; -					b3Scalar	subw[2]={0.f,0.f}; -					unsigned int			subm(0); -					for(unsigned int i=0;i<3;++i) -					{ -						if(b3Dot(*vt[i],b3Cross(dl[i],n))>0) -						{ -							const unsigned int			j=imd3[i]; -							const b3Scalar	subd(projectorigin(*vt[i],*vt[j],subw,subm)); -							if((mindist<0)||(subd<mindist)) -							{ -								mindist		=	subd; -								m			=	static_cast<unsigned int>(((subm&1)?1<<i:0)+((subm&2)?1<<j:0)); -								w[i]		=	subw[0]; -								w[j]		=	subw[1]; -								w[imd3[j]]	=	0;				 -							} -						} -					} -					if(mindist<0) +					const unsigned int j = imd3[i]; +					const b3Scalar subd(projectorigin(*vt[i], *vt[j], subw, subm)); +					if ((mindist < 0) || (subd < mindist))  					{ -						const b3Scalar	d=b3Dot(a,n);	 -						const b3Scalar	s=b3Sqrt(l); -						const b3Vector3	p=n*(d/l); -						mindist	=	p.length2(); -						m		=	7; -						w[0]	=	(b3Cross(dl[1],b-p)).length()/s; -						w[1]	=	(b3Cross(dl[2],c-p)).length()/s; -						w[2]	=	1-(w[0]+w[1]); +						mindist = subd; +						m = static_cast<unsigned int>(((subm & 1) ? 1 << i : 0) + ((subm & 2) ? 1 << j : 0)); +						w[i] = subw[0]; +						w[j] = subw[1]; +						w[imd3[j]] = 0;  					} -					return(mindist);  				} -				return(-1);  			} -			static b3Scalar		projectorigin(	const b3Vector3& a, -				const b3Vector3& b, -				const b3Vector3& c, -				const b3Vector3& d, -				b3Scalar* w,unsigned int& m) +			if (mindist < 0) +			{ +				const b3Scalar d = b3Dot(a, n); +				const b3Scalar s = b3Sqrt(l); +				const b3Vector3 p = n * (d / l); +				mindist = p.length2(); +				m = 7; +				w[0] = (b3Cross(dl[1], b - p)).length() / s; +				w[1] = (b3Cross(dl[2], c - p)).length() / s; +				w[2] = 1 - (w[0] + w[1]); +			} +			return (mindist); +		} +		return (-1); +	} +	static b3Scalar projectorigin(const b3Vector3& a, +								  const b3Vector3& b, +								  const b3Vector3& c, +								  const b3Vector3& d, +								  b3Scalar* w, unsigned int& m) +	{ +		static const unsigned int imd3[] = {1, 2, 0}; +		const b3Vector3* vt[] = {&a, &b, &c, &d}; +		const b3Vector3 dl[] = {a - d, b - d, c - d}; +		const b3Scalar vl = det(dl[0], dl[1], dl[2]); +		const bool ng = (vl * b3Dot(a, b3Cross(b - c, a - b))) <= 0; +		if (ng && (b3Fabs(vl) > GJK_SIMPLEX4_EPS)) +		{ +			b3Scalar mindist = -1; +			b3Scalar subw[3] = {0.f, 0.f, 0.f}; +			unsigned int subm(0); +			for (unsigned int i = 0; i < 3; ++i)  			{ -				static const unsigned int		imd3[]={1,2,0}; -				const b3Vector3*	vt[]={&a,&b,&c,&d}; -				const b3Vector3		dl[]={a-d,b-d,c-d}; -				const b3Scalar		vl=det(dl[0],dl[1],dl[2]); -				const bool			ng=(vl*b3Dot(a,b3Cross(b-c,a-b)))<=0; -				if(ng&&(b3Fabs(vl)>GJK_SIMPLEX4_EPS)) +				const unsigned int j = imd3[i]; +				const b3Scalar s = vl * b3Dot(d, b3Cross(dl[i], dl[j])); +				if (s > 0)  				{ -					b3Scalar	mindist=-1; -					b3Scalar	subw[3]={0.f,0.f,0.f}; -					unsigned int			subm(0); -					for(unsigned int i=0;i<3;++i) +					const b3Scalar subd = projectorigin(*vt[i], *vt[j], d, subw, subm); +					if ((mindist < 0) || (subd < mindist))  					{ -						const unsigned int			j=imd3[i]; -						const b3Scalar	s=vl*b3Dot(d,b3Cross(dl[i],dl[j])); -						if(s>0) -						{ -							const b3Scalar	subd=projectorigin(*vt[i],*vt[j],d,subw,subm); -							if((mindist<0)||(subd<mindist)) -							{ -								mindist		=	subd; -								m			=	static_cast<unsigned int>((subm&1?1<<i:0)+ -									(subm&2?1<<j:0)+ -									(subm&4?8:0)); -								w[i]		=	subw[0]; -								w[j]		=	subw[1]; -								w[imd3[j]]	=	0; -								w[3]		=	subw[2]; -							} -						} +						mindist = subd; +						m = static_cast<unsigned int>((subm & 1 ? 1 << i : 0) + +													  (subm & 2 ? 1 << j : 0) + +													  (subm & 4 ? 8 : 0)); +						w[i] = subw[0]; +						w[j] = subw[1]; +						w[imd3[j]] = 0; +						w[3] = subw[2];  					} -					if(mindist<0) -					{ -						mindist	=	0; -						m		=	15; -						w[0]	=	det(c,b,d)/vl; -						w[1]	=	det(a,c,d)/vl; -						w[2]	=	det(b,a,d)/vl; -						w[3]	=	1-(w[0]+w[1]+w[2]); -					} -					return(mindist);  				} -				return(-1);  			} -	}; +			if (mindist < 0) +			{ +				mindist = 0; +				m = 15; +				w[0] = det(c, b, d) / vl; +				w[1] = det(a, c, d) / vl; +				w[2] = det(b, a, d) / vl; +				w[3] = 1 - (w[0] + w[1] + w[2]); +			} +			return (mindist); +		} +		return (-1); +	} +}; -	// EPA -	struct	b3EPA +// EPA +struct b3EPA +{ +	/* Types		*/ +	typedef b3GJK::sSV sSV; +	struct sFace  	{ -		/* Types		*/  -		typedef	b3GJK::sSV	sSV; -		struct	sFace -		{ -			b3Vector3	n; -			b3Scalar	d; -			sSV*		c[3]; -			sFace*		f[3]; -			sFace*		l[2]; -			unsigned char			e[3]; -			unsigned char			pass; -		}; -		struct	sList -		{ -			sFace*		root; -			unsigned int			count; -			sList() : root(0),count(0)	{} -		}; -		struct	sHorizon +		b3Vector3 n; +		b3Scalar d; +		sSV* c[3]; +		sFace* f[3]; +		sFace* l[2]; +		unsigned char e[3]; +		unsigned char pass; +	}; +	struct sList +	{ +		sFace* root; +		unsigned int count; +		sList() : root(0), count(0) {} +	}; +	struct sHorizon +	{ +		sFace* cf; +		sFace* ff; +		unsigned int nf; +		sHorizon() : cf(0), ff(0), nf(0) {} +	}; +	struct eStatus +	{ +		enum _  		{ -			sFace*		cf; -			sFace*		ff; -			unsigned int			nf; -			sHorizon() : cf(0),ff(0),nf(0)	{} -		}; -		struct	eStatus { enum _ {  			Valid,  			Touching,  			Degenerated,  			NonConvex, -			InvalidHull,		 +			InvalidHull,  			OutOfFaces,  			OutOfVertices,  			AccuraryReached,  			FallBack, -			Failed		};}; -			/* Fields		*/  -			eStatus::_		m_status; -			b3GJK::sSimplex	m_result; -			b3Vector3		m_normal; -			b3Scalar		m_depth; -			sSV				m_sv_store[EPA_MAX_VERTICES]; -			sFace			m_fc_store[EPA_MAX_FACES]; -			unsigned int				m_nextsv; -			sList			m_hull; -			sList			m_stock; -			/* Methods		*/  -			b3EPA() -			{ -				Initialize();	 -			} +			Failed +		}; +	}; +	/* Fields		*/ +	eStatus::_ m_status; +	b3GJK::sSimplex m_result; +	b3Vector3 m_normal; +	b3Scalar m_depth; +	sSV m_sv_store[EPA_MAX_VERTICES]; +	sFace m_fc_store[EPA_MAX_FACES]; +	unsigned int m_nextsv; +	sList m_hull; +	sList m_stock; +	/* Methods		*/ +	b3EPA() +	{ +		Initialize(); +	} +	static inline void bind(sFace* fa, unsigned int ea, sFace* fb, unsigned int eb) +	{ +		fa->e[ea] = (unsigned char)eb; +		fa->f[ea] = fb; +		fb->e[eb] = (unsigned char)ea; +		fb->f[eb] = fa; +	} +	static inline void append(sList& list, sFace* face) +	{ +		face->l[0] = 0; +		face->l[1] = list.root; +		if (list.root) list.root->l[0] = face; +		list.root = face; +		++list.count; +	} +	static inline void remove(sList& list, sFace* face) +	{ +		if (face->l[1]) face->l[1]->l[0] = face->l[0]; +		if (face->l[0]) face->l[0]->l[1] = face->l[1]; +		if (face == list.root) list.root = face->l[1]; +		--list.count; +	} -			static inline void		bind(sFace* fa,unsigned int ea,sFace* fb,unsigned int eb) -			{ -				fa->e[ea]=(unsigned char)eb;fa->f[ea]=fb; -				fb->e[eb]=(unsigned char)ea;fb->f[eb]=fa; -			} -			static inline void		append(sList& list,sFace* face) +	void Initialize() +	{ +		m_status = eStatus::Failed; +		m_normal = b3MakeVector3(0, 0, 0); +		m_depth = 0; +		m_nextsv = 0; +		for (unsigned int i = 0; i < EPA_MAX_FACES; ++i) +		{ +			append(m_stock, &m_fc_store[EPA_MAX_FACES - i - 1]); +		} +	} +	eStatus::_ Evaluate(b3GJK& gjk, const b3Vector3& guess) +	{ +		b3GJK::sSimplex& simplex = *gjk.m_simplex; +		if ((simplex.rank > 1) && gjk.EncloseOrigin()) +		{ +			/* Clean up				*/ +			while (m_hull.root)  			{ -				face->l[0]	=	0; -				face->l[1]	=	list.root; -				if(list.root) list.root->l[0]=face; -				list.root	=	face; -				++list.count; +				sFace* f = m_hull.root; +				remove(m_hull, f); +				append(m_stock, f);  			} -			static inline void		remove(sList& list,sFace* face) +			m_status = eStatus::Valid; +			m_nextsv = 0; +			/* Orient simplex		*/ +			if (gjk.det(simplex.c[0]->w - simplex.c[3]->w, +						simplex.c[1]->w - simplex.c[3]->w, +						simplex.c[2]->w - simplex.c[3]->w) < 0)  			{ -				if(face->l[1]) face->l[1]->l[0]=face->l[0]; -				if(face->l[0]) face->l[0]->l[1]=face->l[1]; -				if(face==list.root) list.root=face->l[1]; -				--list.count; +				b3Swap(simplex.c[0], simplex.c[1]); +				b3Swap(simplex.p[0], simplex.p[1]);  			} - - -			void				Initialize() +			/* Build initial hull	*/ +			sFace* tetra[] = {newface(simplex.c[0], simplex.c[1], simplex.c[2], true), +							  newface(simplex.c[1], simplex.c[0], simplex.c[3], true), +							  newface(simplex.c[2], simplex.c[1], simplex.c[3], true), +							  newface(simplex.c[0], simplex.c[2], simplex.c[3], true)}; +			if (m_hull.count == 4)  			{ -				m_status	=	eStatus::Failed; -				m_normal	=	b3MakeVector3(0,0,0); -				m_depth		=	0; -				m_nextsv	=	0; -				for(unsigned int i=0;i<EPA_MAX_FACES;++i) +				sFace* best = findbest(); +				sFace outer = *best; +				unsigned int pass = 0; +				unsigned int iterations = 0; +				bind(tetra[0], 0, tetra[1], 0); +				bind(tetra[0], 1, tetra[2], 0); +				bind(tetra[0], 2, tetra[3], 0); +				bind(tetra[1], 1, tetra[3], 2); +				bind(tetra[1], 2, tetra[2], 1); +				bind(tetra[2], 2, tetra[3], 1); +				m_status = eStatus::Valid; +				for (; iterations < EPA_MAX_ITERATIONS; ++iterations)  				{ -					append(m_stock,&m_fc_store[EPA_MAX_FACES-i-1]); -				} -			} -			eStatus::_			Evaluate(b3GJK& gjk,const b3Vector3& guess) -			{ -				b3GJK::sSimplex&	simplex=*gjk.m_simplex; -				if((simplex.rank>1)&&gjk.EncloseOrigin()) -				{ - -					/* Clean up				*/  -					while(m_hull.root) +					if (m_nextsv < EPA_MAX_VERTICES)  					{ -						sFace*	f = m_hull.root; -						remove(m_hull,f); -						append(m_stock,f); -					} -					m_status	=	eStatus::Valid; -					m_nextsv	=	0; -					/* Orient simplex		*/  -					if(gjk.det(	simplex.c[0]->w-simplex.c[3]->w, -						simplex.c[1]->w-simplex.c[3]->w, -						simplex.c[2]->w-simplex.c[3]->w)<0) -					{ -						b3Swap(simplex.c[0],simplex.c[1]); -						b3Swap(simplex.p[0],simplex.p[1]); -					} -					/* Build initial hull	*/  -					sFace*	tetra[]={newface(simplex.c[0],simplex.c[1],simplex.c[2],true), -						newface(simplex.c[1],simplex.c[0],simplex.c[3],true), -						newface(simplex.c[2],simplex.c[1],simplex.c[3],true), -						newface(simplex.c[0],simplex.c[2],simplex.c[3],true)}; -					if(m_hull.count==4) -					{ -						sFace*		best=findbest(); -						sFace		outer=*best; -						unsigned int			pass=0; -						unsigned int			iterations=0; -						bind(tetra[0],0,tetra[1],0); -						bind(tetra[0],1,tetra[2],0); -						bind(tetra[0],2,tetra[3],0); -						bind(tetra[1],1,tetra[3],2); -						bind(tetra[1],2,tetra[2],1); -						bind(tetra[2],2,tetra[3],1); -						m_status=eStatus::Valid; -						for(;iterations<EPA_MAX_ITERATIONS;++iterations) +						sHorizon horizon; +						sSV* w = &m_sv_store[m_nextsv++]; +						bool valid = true; +						best->pass = (unsigned char)(++pass); +						gjk.getsupport(best->n, *w); +						const b3Scalar wdist = b3Dot(best->n, w->w) - best->d; +						if (wdist > EPA_ACCURACY)  						{ -							if(m_nextsv<EPA_MAX_VERTICES) -							{	 -								sHorizon		horizon; -								sSV*			w=&m_sv_store[m_nextsv++]; -								bool			valid=true;					 -								best->pass	=	(unsigned char)(++pass); -								gjk.getsupport(best->n,*w); -								const b3Scalar	wdist=b3Dot(best->n,w->w)-best->d; -								if(wdist>EPA_ACCURACY) -								{ -									for(unsigned int j=0;(j<3)&&valid;++j) -									{ -										valid&=expand(	pass,w, -											best->f[j],best->e[j], -											horizon); -									} -									if(valid&&(horizon.nf>=3)) -									{ -										bind(horizon.cf,1,horizon.ff,2); -										remove(m_hull,best); -										append(m_stock,best); -										best=findbest(); -										outer=*best; -									} else {  -										m_status=eStatus::Failed; -										//m_status=eStatus::InvalidHull; -									break; } -								} else { m_status=eStatus::AccuraryReached;break; } -							} else { m_status=eStatus::OutOfVertices;break; } +							for (unsigned int j = 0; (j < 3) && valid; ++j) +							{ +								valid &= expand(pass, w, +												best->f[j], best->e[j], +												horizon); +							} +							if (valid && (horizon.nf >= 3)) +							{ +								bind(horizon.cf, 1, horizon.ff, 2); +								remove(m_hull, best); +								append(m_stock, best); +								best = findbest(); +								outer = *best; +							} +							else +							{ +								m_status = eStatus::Failed; +								//m_status=eStatus::InvalidHull; +								break; +							} +						} +						else +						{ +							m_status = eStatus::AccuraryReached; +							break;  						} -						const b3Vector3	projection=outer.n*outer.d; -						m_normal	=	outer.n; -						m_depth		=	outer.d; -						m_result.rank	=	3; -						m_result.c[0]	=	outer.c[0]; -						m_result.c[1]	=	outer.c[1]; -						m_result.c[2]	=	outer.c[2]; -						m_result.p[0]	=	b3Cross(	outer.c[1]->w-projection, -							outer.c[2]->w-projection).length(); -						m_result.p[1]	=	b3Cross(	outer.c[2]->w-projection, -							outer.c[0]->w-projection).length(); -						m_result.p[2]	=	b3Cross(	outer.c[0]->w-projection, -							outer.c[1]->w-projection).length(); -						const b3Scalar	sum=m_result.p[0]+m_result.p[1]+m_result.p[2]; -						m_result.p[0]	/=	sum; -						m_result.p[1]	/=	sum; -						m_result.p[2]	/=	sum; -						return(m_status); -					} -				} -				/* Fallback		*/  -				m_status	=	eStatus::FallBack; -				m_normal	=	-guess; -				const b3Scalar	nl=m_normal.length(); -				if(nl>0) -					m_normal	=	m_normal/nl; -				else -					m_normal	=	b3MakeVector3(1,0,0); -				m_depth	=	0; -				m_result.rank=1; -				m_result.c[0]=simplex.c[0]; -				m_result.p[0]=1;	 -				return(m_status); -			} -			bool getedgedist(sFace* face, sSV* a, sSV* b, b3Scalar& dist) -			{ -				const b3Vector3 ba = b->w - a->w; -				const b3Vector3 n_ab = b3Cross(ba, face->n); // Outward facing edge normal direction, on triangle plane -				const b3Scalar a_dot_nab = b3Dot(a->w, n_ab); // Only care about the sign to determine inside/outside, so not normalization required - -				if(a_dot_nab < 0) -				{ -					// Outside of edge a->b - -					const b3Scalar ba_l2 = ba.length2(); -					const b3Scalar a_dot_ba = b3Dot(a->w, ba); -					const b3Scalar b_dot_ba = b3Dot(b->w, ba); - -					if(a_dot_ba > 0) -					{ -						// Pick distance vertex a -						dist = a->w.length(); -					} -					else if(b_dot_ba < 0) -					{ -						// Pick distance vertex b -						dist = b->w.length();  					}  					else  					{ -						// Pick distance to edge a->b -						const b3Scalar a_dot_b = b3Dot(a->w, b->w); -						dist = b3Sqrt(b3Max((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (b3Scalar)0)); +						m_status = eStatus::OutOfVertices; +						break;  					} - -					return true;  				} +				const b3Vector3 projection = outer.n * outer.d; +				m_normal = outer.n; +				m_depth = outer.d; +				m_result.rank = 3; +				m_result.c[0] = outer.c[0]; +				m_result.c[1] = outer.c[1]; +				m_result.c[2] = outer.c[2]; +				m_result.p[0] = b3Cross(outer.c[1]->w - projection, +										outer.c[2]->w - projection) +									.length(); +				m_result.p[1] = b3Cross(outer.c[2]->w - projection, +										outer.c[0]->w - projection) +									.length(); +				m_result.p[2] = b3Cross(outer.c[0]->w - projection, +										outer.c[1]->w - projection) +									.length(); +				const b3Scalar sum = m_result.p[0] + m_result.p[1] + m_result.p[2]; +				m_result.p[0] /= sum; +				m_result.p[1] /= sum; +				m_result.p[2] /= sum; +				return (m_status); +			} +		} +		/* Fallback		*/ +		m_status = eStatus::FallBack; +		m_normal = -guess; +		const b3Scalar nl = m_normal.length(); +		if (nl > 0) +			m_normal = m_normal / nl; +		else +			m_normal = b3MakeVector3(1, 0, 0); +		m_depth = 0; +		m_result.rank = 1; +		m_result.c[0] = simplex.c[0]; +		m_result.p[0] = 1; +		return (m_status); +	} +	bool getedgedist(sFace* face, sSV* a, sSV* b, b3Scalar& dist) +	{ +		const b3Vector3 ba = b->w - a->w; +		const b3Vector3 n_ab = b3Cross(ba, face->n);   // Outward facing edge normal direction, on triangle plane +		const b3Scalar a_dot_nab = b3Dot(a->w, n_ab);  // Only care about the sign to determine inside/outside, so not normalization required + +		if (a_dot_nab < 0) +		{ +			// Outside of edge a->b + +			const b3Scalar ba_l2 = ba.length2(); +			const b3Scalar a_dot_ba = b3Dot(a->w, ba); +			const b3Scalar b_dot_ba = b3Dot(b->w, ba); -				return false; +			if (a_dot_ba > 0) +			{ +				// Pick distance vertex a +				dist = a->w.length();  			} -			sFace*				newface(sSV* a,sSV* b,sSV* c,bool forced) +			else if (b_dot_ba < 0)  			{ -				if(m_stock.root) -				{ -					sFace*	face=m_stock.root; -					remove(m_stock,face); -					append(m_hull,face); -					face->pass	=	0; -					face->c[0]	=	a; -					face->c[1]	=	b; -					face->c[2]	=	c; -					face->n		=	b3Cross(b->w-a->w,c->w-a->w); -					const b3Scalar	l=face->n.length(); -					const bool		v=l>EPA_ACCURACY; - -					if(v) -					{ -						if(!(getedgedist(face, a, b, face->d) || -							 getedgedist(face, b, c, face->d) || -							 getedgedist(face, c, a, face->d))) -						{ -							// Origin projects to the interior of the triangle -							// Use distance to triangle plane -							face->d = b3Dot(a->w, face->n) / l; -						} +				// Pick distance vertex b +				dist = b->w.length(); +			} +			else +			{ +				// Pick distance to edge a->b +				const b3Scalar a_dot_b = b3Dot(a->w, b->w); +				dist = b3Sqrt(b3Max((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (b3Scalar)0)); +			} -						face->n /= l; -						if(forced || (face->d >= -EPA_PLANE_EPS)) -						{ -							return face; -						} -						else -							m_status=eStatus::NonConvex; -					} -					else -						m_status=eStatus::Degenerated; +			return true; +		} -					remove(m_hull, face); -					append(m_stock, face); -					return 0; +		return false; +	} +	sFace* newface(sSV* a, sSV* b, sSV* c, bool forced) +	{ +		if (m_stock.root) +		{ +			sFace* face = m_stock.root; +			remove(m_stock, face); +			append(m_hull, face); +			face->pass = 0; +			face->c[0] = a; +			face->c[1] = b; +			face->c[2] = c; +			face->n = b3Cross(b->w - a->w, c->w - a->w); +			const b3Scalar l = face->n.length(); +			const bool v = l > EPA_ACCURACY; +			if (v) +			{ +				if (!(getedgedist(face, a, b, face->d) || +					  getedgedist(face, b, c, face->d) || +					  getedgedist(face, c, a, face->d))) +				{ +					// Origin projects to the interior of the triangle +					// Use distance to triangle plane +					face->d = b3Dot(a->w, face->n) / l;  				} -				m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces; -				return 0; + +				face->n /= l; +				if (forced || (face->d >= -EPA_PLANE_EPS)) +				{ +					return face; +				} +				else +					m_status = eStatus::NonConvex;  			} -			sFace*				findbest() +			else +				m_status = eStatus::Degenerated; + +			remove(m_hull, face); +			append(m_stock, face); +			return 0; +		} +		m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces; +		return 0; +	} +	sFace* findbest() +	{ +		sFace* minf = m_hull.root; +		b3Scalar mind = minf->d * minf->d; +		for (sFace* f = minf->l[1]; f; f = f->l[1]) +		{ +			const b3Scalar sqd = f->d * f->d; +			if (sqd < mind)  			{ -				sFace*		minf=m_hull.root; -				b3Scalar	mind=minf->d*minf->d; -				for(sFace* f=minf->l[1];f;f=f->l[1]) +				minf = f; +				mind = sqd; +			} +		} +		return (minf); +	} +	bool expand(unsigned int pass, sSV* w, sFace* f, unsigned int e, sHorizon& horizon) +	{ +		static const unsigned int i1m3[] = {1, 2, 0}; +		static const unsigned int i2m3[] = {2, 0, 1}; +		if (f->pass != pass) +		{ +			const unsigned int e1 = i1m3[e]; +			if ((b3Dot(f->n, w->w) - f->d) < -EPA_PLANE_EPS) +			{ +				sFace* nf = newface(f->c[e1], f->c[e], w, false); +				if (nf)  				{ -					const b3Scalar	sqd=f->d*f->d; -					if(sqd<mind) -					{ -						minf=f; -						mind=sqd; -					} +					bind(nf, 0, f, e); +					if (horizon.cf) +						bind(horizon.cf, 1, nf, 2); +					else +						horizon.ff = nf; +					horizon.cf = nf; +					++horizon.nf; +					return (true);  				} -				return(minf);  			} -			bool				expand(unsigned int pass,sSV* w,sFace* f,unsigned int e,sHorizon& horizon) +			else  			{ -				static const unsigned int	i1m3[]={1,2,0}; -				static const unsigned int	i2m3[]={2,0,1}; -				if(f->pass!=pass) +				const unsigned int e2 = i2m3[e]; +				f->pass = (unsigned char)pass; +				if (expand(pass, w, f->f[e1], f->e[e1], horizon) && +					expand(pass, w, f->f[e2], f->e[e2], horizon))  				{ -					const unsigned int	e1=i1m3[e]; -					if((b3Dot(f->n,w->w)-f->d)<-EPA_PLANE_EPS) -					{ -						sFace*	nf=newface(f->c[e1],f->c[e],w,false); -						if(nf) -						{ -							bind(nf,0,f,e); -							if(horizon.cf) bind(horizon.cf,1,nf,2); else horizon.ff=nf; -							horizon.cf=nf; -							++horizon.nf; -							return(true); -						} -					} -					else -					{ -						const unsigned int	e2=i2m3[e]; -						f->pass		=	(unsigned char)pass; -						if(	expand(pass,w,f->f[e1],f->e[e1],horizon)&& -							expand(pass,w,f->f[e2],f->e[e2],horizon)) -						{ -							remove(m_hull,f); -							append(m_stock,f); -							return(true); -						} -					} +					remove(m_hull, f); +					append(m_stock, f); +					return (true);  				} -				return(false);  			} - -	}; - -	// -	static void	Initialize(const b3Transform&	transA, const b3Transform&	transB, -								const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB,  -								const b3AlignedObjectArray<b3Vector3>& verticesA, -								const b3AlignedObjectArray<b3Vector3>& verticesB, -		b3GjkEpaSolver2::sResults& results, -		tShape& shape, -		bool withmargins) -	{ -		/* Results		*/  -		results.witnesses[0]	= -			results.witnesses[1]	=	b3MakeVector3(0,0,0); -		results.status			=	b3GjkEpaSolver2::sResults::Separated; -		/* Shape		*/  -		shape.m_shapes[0]		=	hullA; -		shape.m_shapes[1]		=	hullB; -		shape.m_toshape1		=	transB.getBasis().transposeTimes(transA.getBasis()); -		shape.m_toshape0		=	transA.inverseTimes(transB); -		shape.EnableMargin(withmargins); +		} +		return (false);  	} +}; +// +static void Initialize(const b3Transform& transA, const b3Transform& transB, +					   const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, +					   const b3AlignedObjectArray<b3Vector3>& verticesA, +					   const b3AlignedObjectArray<b3Vector3>& verticesB, +					   b3GjkEpaSolver2::sResults& results, +					   tShape& shape, +					   bool withmargins) +{ +	/* Results		*/ +	results.witnesses[0] = +		results.witnesses[1] = b3MakeVector3(0, 0, 0); +	results.status = b3GjkEpaSolver2::sResults::Separated; +	/* Shape		*/ +	shape.m_shapes[0] = hullA; +	shape.m_shapes[1] = hullB; +	shape.m_toshape1 = transB.getBasis().transposeTimes(transA.getBasis()); +	shape.m_toshape0 = transA.inverseTimes(transB); +	shape.EnableMargin(withmargins);  } +}  // namespace gjkepa2_impl2 +  //  // Api  // -using namespace	gjkepa2_impl2; +using namespace gjkepa2_impl2;  // -int			b3GjkEpaSolver2::StackSizeRequirement() +int b3GjkEpaSolver2::StackSizeRequirement()  { -	return(sizeof(b3GJK)+sizeof(b3EPA)); +	return (sizeof(b3GJK) + sizeof(b3EPA));  }  // -bool		b3GjkEpaSolver2::Distance(	const b3Transform&	transA, const b3Transform&	transB, -										const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB,  -										const b3AlignedObjectArray<b3Vector3>& verticesA, -										const b3AlignedObjectArray<b3Vector3>& verticesB, -									  const b3Vector3&		guess, -									  sResults&				results) +bool b3GjkEpaSolver2::Distance(const b3Transform& transA, const b3Transform& transB, +							   const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, +							   const b3AlignedObjectArray<b3Vector3>& verticesA, +							   const b3AlignedObjectArray<b3Vector3>& verticesB, +							   const b3Vector3& guess, +							   sResults& results)  { -	tShape			shape; -	Initialize(transA,transB,hullA,hullB,verticesA,verticesB,results,shape,false); -	b3GJK				gjk(verticesA,verticesB); -	b3GJK::eStatus::_	gjk_status=gjk.Evaluate(shape,guess); -	if(gjk_status==b3GJK::eStatus::Valid) +	tShape shape; +	Initialize(transA, transB, hullA, hullB, verticesA, verticesB, results, shape, false); +	b3GJK gjk(verticesA, verticesB); +	b3GJK::eStatus::_ gjk_status = gjk.Evaluate(shape, guess); +	if (gjk_status == b3GJK::eStatus::Valid)  	{ -		b3Vector3	w0=b3MakeVector3(0,0,0); -		b3Vector3	w1=b3MakeVector3(0,0,0); -		for(unsigned int i=0;i<gjk.m_simplex->rank;++i) +		b3Vector3 w0 = b3MakeVector3(0, 0, 0); +		b3Vector3 w1 = b3MakeVector3(0, 0, 0); +		for (unsigned int i = 0; i < gjk.m_simplex->rank; ++i)  		{ -			const b3Scalar	p=gjk.m_simplex->p[i]; -			w0+=shape.Support( gjk.m_simplex->c[i]->d,0,verticesA,verticesB)*p; -			w1+=shape.Support(-gjk.m_simplex->c[i]->d,1,verticesA,verticesB)*p; +			const b3Scalar p = gjk.m_simplex->p[i]; +			w0 += shape.Support(gjk.m_simplex->c[i]->d, 0, verticesA, verticesB) * p; +			w1 += shape.Support(-gjk.m_simplex->c[i]->d, 1, verticesA, verticesB) * p;  		} -		results.witnesses[0]	=	transA*w0; -		results.witnesses[1]	=	transA*w1; -		results.normal			=	w0-w1; -		results.distance		=	results.normal.length(); -		results.normal			/=	results.distance>GJK_MIN_DISTANCE?results.distance:1; -		return(true); +		results.witnesses[0] = transA * w0; +		results.witnesses[1] = transA * w1; +		results.normal = w0 - w1; +		results.distance = results.normal.length(); +		results.normal /= results.distance > GJK_MIN_DISTANCE ? results.distance : 1; +		return (true);  	}  	else  	{ -		results.status	=	gjk_status==b3GJK::eStatus::Inside? -			sResults::Penetrating	: -		sResults::GJK_Failed	; -		return(false); +		results.status = gjk_status == b3GJK::eStatus::Inside ? sResults::Penetrating : sResults::GJK_Failed; +		return (false);  	}  }  // -bool	b3GjkEpaSolver2::Penetration(	const b3Transform&	transA, const b3Transform&	transB, -										const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB,  -										const b3AlignedObjectArray<b3Vector3>& verticesA, -										const b3AlignedObjectArray<b3Vector3>& verticesB, -									 const b3Vector3&		guess, -									 sResults&				results, -									 bool					usemargins) +bool b3GjkEpaSolver2::Penetration(const b3Transform& transA, const b3Transform& transB, +								  const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, +								  const b3AlignedObjectArray<b3Vector3>& verticesA, +								  const b3AlignedObjectArray<b3Vector3>& verticesB, +								  const b3Vector3& guess, +								  sResults& results, +								  bool usemargins)  { - -	tShape			shape; -	Initialize(transA,transB,hullA,hullB,verticesA,verticesB,results,shape,usemargins); -	b3GJK				gjk(verticesA,verticesB); -	b3GJK::eStatus::_	gjk_status=gjk.Evaluate(shape,guess); -	switch(gjk_status) +	tShape shape; +	Initialize(transA, transB, hullA, hullB, verticesA, verticesB, results, shape, usemargins); +	b3GJK gjk(verticesA, verticesB); +	b3GJK::eStatus::_ gjk_status = gjk.Evaluate(shape, guess); +	switch (gjk_status)  	{ -	case	b3GJK::eStatus::Inside: +		case b3GJK::eStatus::Inside:  		{ -			b3EPA				epa; -			b3EPA::eStatus::_	epa_status=epa.Evaluate(gjk,-guess); -			if(epa_status!=b3EPA::eStatus::Failed) +			b3EPA epa; +			b3EPA::eStatus::_ epa_status = epa.Evaluate(gjk, -guess); +			if (epa_status != b3EPA::eStatus::Failed)  			{ -				b3Vector3	w0=b3MakeVector3(0,0,0); -				for(unsigned int i=0;i<epa.m_result.rank;++i) +				b3Vector3 w0 = b3MakeVector3(0, 0, 0); +				for (unsigned int i = 0; i < epa.m_result.rank; ++i)  				{ -					w0+=shape.Support(epa.m_result.c[i]->d,0,verticesA,verticesB)*epa.m_result.p[i]; +					w0 += shape.Support(epa.m_result.c[i]->d, 0, verticesA, verticesB) * epa.m_result.p[i];  				} -				results.status			=	sResults::Penetrating; -				results.witnesses[0]	=	transA*w0; -				results.witnesses[1]	=	transA*(w0-epa.m_normal*epa.m_depth); -				results.normal			=	-epa.m_normal; -				results.distance		=	-epa.m_depth; -				return(true); -			} else results.status=sResults::EPA_Failed; +				results.status = sResults::Penetrating; +				results.witnesses[0] = transA * w0; +				results.witnesses[1] = transA * (w0 - epa.m_normal * epa.m_depth); +				results.normal = -epa.m_normal; +				results.distance = -epa.m_depth; +				return (true); +			} +			else +				results.status = sResults::EPA_Failed;  		}  		break; -	case	b3GJK::eStatus::Failed: -		results.status=sResults::GJK_Failed; -		break; +		case b3GJK::eStatus::Failed: +			results.status = sResults::GJK_Failed; +			break;  		default: -					{ -					} +		{ +		}  	} -	return(false); +	return (false);  } -  #if 0  //  b3Scalar	b3GjkEpaSolver2::SignedDistance(const b3Vector3& position, @@ -994,8 +1043,7 @@ bool	b3GjkEpaSolver2::SignedDistance(const btConvexShape*	shape0,  }  #endif - -/* Symbols cleanup		*/  +/* Symbols cleanup		*/  #undef GJK_MAX_ITERATIONS  #undef GJK_ACCURACY diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h index 976238a04c..7db32c6309 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h @@ -29,40 +29,39 @@ GJK-EPA collision solver by Nathanael Presson, 2008  #include "Bullet3Common/b3Transform.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -  ///btGjkEpaSolver contributed under zlib by Nathanael Presson -struct	b3GjkEpaSolver2 +struct b3GjkEpaSolver2  { -struct	sResults +	struct sResults  	{ -	enum eStatus +		enum eStatus  		{ -		Separated,		/* Shapes doesnt penetrate												*/  -		Penetrating,	/* Shapes are penetrating												*/  -		GJK_Failed,		/* GJK phase fail, no big issue, shapes are probably just 'touching'	*/  -		EPA_Failed		/* EPA phase fail, bigger problem, need to save parameters, and debug	*/  -		}		status; -	b3Vector3	witnesses[2]; -	b3Vector3	normal; -	b3Scalar	distance; +			Separated,   /* Shapes doesnt penetrate												*/ +			Penetrating, /* Shapes are penetrating												*/ +			GJK_Failed,  /* GJK phase fail, no big issue, shapes are probably just 'touching'	*/ +			EPA_Failed   /* EPA phase fail, bigger problem, need to save parameters, and debug	*/ +		} status; +		b3Vector3 witnesses[2]; +		b3Vector3 normal; +		b3Scalar distance;  	}; -static int		StackSizeRequirement(); +	static int StackSizeRequirement(); -static bool		Distance(	 const b3Transform&	transA, const b3Transform&	transB, -							const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB,  -							const b3AlignedObjectArray<b3Vector3>& verticesA, -							const b3AlignedObjectArray<b3Vector3>& verticesB, -							const b3Vector3& guess, -							sResults& results); +	static bool Distance(const b3Transform& transA, const b3Transform& transB, +						 const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, +						 const b3AlignedObjectArray<b3Vector3>& verticesA, +						 const b3AlignedObjectArray<b3Vector3>& verticesB, +						 const b3Vector3& guess, +						 sResults& results); -static bool		Penetration( const b3Transform&	transA, const b3Transform&	transB, -							const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB,  +	static bool Penetration(const b3Transform& transA, const b3Transform& transB, +							const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB,  							const b3AlignedObjectArray<b3Vector3>& verticesA,  							const b3AlignedObjectArray<b3Vector3>& verticesB,  							const b3Vector3& guess,  							sResults& results, -							bool usemargins=true); +							bool usemargins = true);  #if 0  static b3Scalar	SignedDistance(	const b3Vector3& position,  								b3Scalar margin, @@ -74,9 +73,7 @@ static bool		SignedDistance(	const btConvexShape* shape0,const btTransform& wtrs  								const btConvexShape* shape1,const btTransform& wtrs1,  								const b3Vector3& guess,  								sResults& results); -#endif  - +#endif  }; -#endif //B3_GJK_EPA2_H - +#endif  //B3_GJK_EPA2_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp index e9e51d5a36..6f2c5251a0 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp @@ -13,50 +13,45 @@ subject to the following restrictions:  3. This notice may not be removed or altered from any source distribution.  */ -  #include "b3OptimizedBvh.h"  #include "b3StridingMeshInterface.h"  #include "Bullet3Geometry/b3AabbUtil.h" -  b3OptimizedBvh::b3OptimizedBvh() -{  +{  }  b3OptimizedBvh::~b3OptimizedBvh()  {  } -  void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax)  {  	m_useQuantization = useQuantizedAabbCompression; -  	// NodeArray	triangleNodes; -	struct	NodeTriangleCallback : public b3InternalTriangleIndexCallback +	struct NodeTriangleCallback : public b3InternalTriangleIndexCallback  	{ - -		NodeArray&	m_triangleNodes; +		NodeArray& m_triangleNodes;  		NodeTriangleCallback& operator=(NodeTriangleCallback& other)  		{  			m_triangleNodes.copyFromArray(other.m_triangleNodes);  			return *this;  		} -		 -		NodeTriangleCallback(NodeArray&	triangleNodes) -			:m_triangleNodes(triangleNodes) + +		NodeTriangleCallback(NodeArray& triangleNodes) +			: m_triangleNodes(triangleNodes)  		{  		} -		virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int  triangleIndex) +		virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex)  		{  			b3OptimizedBvhNode node; -			b3Vector3	aabbMin,aabbMax; -			aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); -			aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));  +			b3Vector3 aabbMin, aabbMax; +			aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); +			aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT));  			aabbMin.setMin(triangle[0]);  			aabbMax.setMax(triangle[0]);  			aabbMin.setMin(triangle[1]); @@ -69,17 +64,17 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized  			node.m_aabbMaxOrg = aabbMax;  			node.m_escapeIndex = -1; -	 +  			//for child nodes  			node.m_subPart = partId;  			node.m_triangleIndex = triangleIndex;  			m_triangleNodes.push_back(node);  		}  	}; -	struct	QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback +	struct QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback  	{ -		QuantizedNodeArray&	m_triangleNodes; -		const b3QuantizedBvh* m_optimizedTree; // for quantization +		QuantizedNodeArray& m_triangleNodes; +		const b3QuantizedBvh* m_optimizedTree;  // for quantization  		QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other)  		{ @@ -88,23 +83,23 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized  			return *this;  		} -		QuantizedNodeTriangleCallback(QuantizedNodeArray&	triangleNodes,const b3QuantizedBvh* tree) -			:m_triangleNodes(triangleNodes),m_optimizedTree(tree) +		QuantizedNodeTriangleCallback(QuantizedNodeArray& triangleNodes, const b3QuantizedBvh* tree) +			: m_triangleNodes(triangleNodes), m_optimizedTree(tree)  		{  		} -		virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int  triangleIndex) +		virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex)  		{  			// The partId and triangle index must fit in the same (positive) integer -			b3Assert(partId < (1<<MAX_NUM_PARTS_IN_BITS)); -			b3Assert(triangleIndex < (1<<(31-MAX_NUM_PARTS_IN_BITS))); +			b3Assert(partId < (1 << MAX_NUM_PARTS_IN_BITS)); +			b3Assert(triangleIndex < (1 << (31 - MAX_NUM_PARTS_IN_BITS)));  			//negative indices are reserved for escapeIndex -			b3Assert(triangleIndex>=0); +			b3Assert(triangleIndex >= 0);  			b3QuantizedBvhNode node; -			b3Vector3	aabbMin,aabbMax; -			aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); -			aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));  +			b3Vector3 aabbMin, aabbMax; +			aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); +			aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT));  			aabbMin.setMin(triangle[0]);  			aabbMax.setMax(triangle[0]);  			aabbMin.setMin(triangle[1]); @@ -131,59 +126,52 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized  				aabbMin.setZ(aabbMin.getZ() - MIN_AABB_HALF_DIMENSION);  			} -			m_optimizedTree->quantize(&node.m_quantizedAabbMin[0],aabbMin,0); -			m_optimizedTree->quantize(&node.m_quantizedAabbMax[0],aabbMax,1); +			m_optimizedTree->quantize(&node.m_quantizedAabbMin[0], aabbMin, 0); +			m_optimizedTree->quantize(&node.m_quantizedAabbMax[0], aabbMax, 1); -			node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex; +			node.m_escapeIndexOrTriangleIndex = (partId << (31 - MAX_NUM_PARTS_IN_BITS)) | triangleIndex;  			m_triangleNodes.push_back(node);  		}  	}; -	 -  	int numLeafNodes = 0; -	  	if (m_useQuantization)  	{ -  		//initialize quantization values -		setQuantizationValues(bvhAabbMin,bvhAabbMax); +		setQuantizationValues(bvhAabbMin, bvhAabbMax); -		QuantizedNodeTriangleCallback	callback(m_quantizedLeafNodes,this); +		QuantizedNodeTriangleCallback callback(m_quantizedLeafNodes, this); -	 -		triangles->InternalProcessAllTriangles(&callback,m_bvhAabbMin,m_bvhAabbMax); +		triangles->InternalProcessAllTriangles(&callback, m_bvhAabbMin, m_bvhAabbMax);  		//now we have an array of leafnodes in m_leafNodes  		numLeafNodes = m_quantizedLeafNodes.size(); - -		m_quantizedContiguousNodes.resize(2*numLeafNodes); - - -	} else +		m_quantizedContiguousNodes.resize(2 * numLeafNodes); +	} +	else  	{ -		NodeTriangleCallback	callback(m_leafNodes); +		NodeTriangleCallback callback(m_leafNodes); -		b3Vector3 aabbMin=b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); -		b3Vector3 aabbMax=b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); +		b3Vector3 aabbMin = b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); +		b3Vector3 aabbMax = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); -		triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax); +		triangles->InternalProcessAllTriangles(&callback, aabbMin, aabbMax);  		//now we have an array of leafnodes in m_leafNodes  		numLeafNodes = m_leafNodes.size(); -		m_contiguousNodes.resize(2*numLeafNodes); +		m_contiguousNodes.resize(2 * numLeafNodes);  	}  	m_curNodeIndex = 0; -	buildTree(0,numLeafNodes); +	buildTree(0, numLeafNodes);  	///if the entire tree is small then subtree size, we need to create a header info for the tree -	if(m_useQuantization && !m_SubtreeHeaders.size()) +	if (m_useQuantization && !m_SubtreeHeaders.size())  	{  		b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();  		subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); @@ -199,37 +187,29 @@ void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantized  	m_leafNodes.clear();  } - - - -void	b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax) +void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface, const b3Vector3& aabbMin, const b3Vector3& aabbMax)  {  	if (m_useQuantization)  	{ +		setQuantizationValues(aabbMin, aabbMax); -		setQuantizationValues(aabbMin,aabbMax); - -		updateBvhNodes(meshInterface,0,m_curNodeIndex,0); +		updateBvhNodes(meshInterface, 0, m_curNodeIndex, 0);  		///now update all subtree headers  		int i; -		for (i=0;i<m_SubtreeHeaders.size();i++) +		for (i = 0; i < m_SubtreeHeaders.size(); i++)  		{  			b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];  			subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]);  		} - -	} else +	} +	else  	{ -  	}  } - - - -void	b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax) +void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface, const b3Vector3& aabbMin, const b3Vector3& aabbMax)  {  	//incrementally initialize quantization values  	b3Assert(m_useQuantization); @@ -244,147 +224,135 @@ void	b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const b  	///we should update all quantization values, using updateBvhNodes(meshInterface);  	///but we only update chunks that overlap the given aabb -	 -	unsigned short	quantizedQueryAabbMin[3]; -	unsigned short	quantizedQueryAabbMax[3]; -	quantize(&quantizedQueryAabbMin[0],aabbMin,0); -	quantize(&quantizedQueryAabbMax[0],aabbMax,1); +	unsigned short quantizedQueryAabbMin[3]; +	unsigned short quantizedQueryAabbMax[3]; + +	quantize(&quantizedQueryAabbMin[0], aabbMin, 0); +	quantize(&quantizedQueryAabbMax[0], aabbMax, 1);  	int i; -	for (i=0;i<this->m_SubtreeHeaders.size();i++) +	for (i = 0; i < this->m_SubtreeHeaders.size(); i++)  	{  		b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];  		//PCK: unsigned instead of bool -		unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); +		unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax);  		if (overlap != 0)  		{ -			updateBvhNodes(meshInterface,subtree.m_rootNodeIndex,subtree.m_rootNodeIndex+subtree.m_subtreeSize,i); +			updateBvhNodes(meshInterface, subtree.m_rootNodeIndex, subtree.m_rootNodeIndex + subtree.m_subtreeSize, i);  			subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]);  		}  	} -	  } -void	b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index) +void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int firstNode, int endNode, int index)  {  	(void)index;  	b3Assert(m_useQuantization); -	int curNodeSubPart=-1; +	int curNodeSubPart = -1;  	//get access info to trianglemesh data -		const unsigned char *vertexbase = 0; -		int numverts = 0; -		PHY_ScalarType type = PHY_INTEGER; -		int stride = 0; -		const unsigned char *indexbase = 0; -		int indexstride = 0; -		int numfaces = 0; -		PHY_ScalarType indicestype = PHY_INTEGER; - -		b3Vector3	triangleVerts[3]; -		b3Vector3	aabbMin,aabbMax; -		const b3Vector3& meshScaling = meshInterface->getScaling(); -		 -		int i; -		for (i=endNode-1;i>=firstNode;i--) +	const unsigned char* vertexbase = 0; +	int numverts = 0; +	PHY_ScalarType type = PHY_INTEGER; +	int stride = 0; +	const unsigned char* indexbase = 0; +	int indexstride = 0; +	int numfaces = 0; +	PHY_ScalarType indicestype = PHY_INTEGER; + +	b3Vector3 triangleVerts[3]; +	b3Vector3 aabbMin, aabbMax; +	const b3Vector3& meshScaling = meshInterface->getScaling(); + +	int i; +	for (i = endNode - 1; i >= firstNode; i--) +	{ +		b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; +		if (curNode.isLeafNode())  		{ +			//recalc aabb from triangle data +			int nodeSubPart = curNode.getPartId(); +			int nodeTriangleIndex = curNode.getTriangleIndex(); +			if (nodeSubPart != curNodeSubPart) +			{ +				if (curNodeSubPart >= 0) +					meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); +				meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart); +				curNodeSubPart = nodeSubPart; +				b3Assert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT); +			} +			//triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, -			b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; -			if (curNode.isLeafNode()) +			unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride); + +			for (int j = 2; j >= 0; j--)  			{ -				//recalc aabb from triangle data -				int nodeSubPart = curNode.getPartId(); -				int nodeTriangleIndex = curNode.getTriangleIndex(); -				if (nodeSubPart != curNodeSubPart) +				int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j]; +				if (type == PHY_FLOAT)  				{ -					if (curNodeSubPart >= 0) -						meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); -					meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,	type,stride,&indexbase,indexstride,numfaces,indicestype,nodeSubPart); - -					curNodeSubPart = nodeSubPart; -					b3Assert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT); +					float* graphicsbase = (float*)(vertexbase + graphicsindex * stride); +					triangleVerts[j] = b3MakeVector3( +						graphicsbase[0] * meshScaling.getX(), +						graphicsbase[1] * meshScaling.getY(), +						graphicsbase[2] * meshScaling.getZ());  				} -				//triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, - -				unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride); -				 -				 -				for (int j=2;j>=0;j--) +				else  				{ -					 -					int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j]; -					if (type == PHY_FLOAT) -					{ -						float* graphicsbase = (float*)(vertexbase+graphicsindex*stride); -						triangleVerts[j] = b3MakeVector3( -							graphicsbase[0]*meshScaling.getX(), -							graphicsbase[1]*meshScaling.getY(), -							graphicsbase[2]*meshScaling.getZ()); -					} -					else -					{ -						double* graphicsbase = (double*)(vertexbase+graphicsindex*stride); -						triangleVerts[j] = b3MakeVector3( b3Scalar(graphicsbase[0]*meshScaling.getX()), b3Scalar(graphicsbase[1]*meshScaling.getY()), b3Scalar(graphicsbase[2]*meshScaling.getZ())); -					} +					double* graphicsbase = (double*)(vertexbase + graphicsindex * stride); +					triangleVerts[j] = b3MakeVector3(b3Scalar(graphicsbase[0] * meshScaling.getX()), b3Scalar(graphicsbase[1] * meshScaling.getY()), b3Scalar(graphicsbase[2] * meshScaling.getZ()));  				} +			} +			aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); +			aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); +			aabbMin.setMin(triangleVerts[0]); +			aabbMax.setMax(triangleVerts[0]); +			aabbMin.setMin(triangleVerts[1]); +			aabbMax.setMax(triangleVerts[1]); +			aabbMin.setMin(triangleVerts[2]); +			aabbMax.setMax(triangleVerts[2]); + +			quantize(&curNode.m_quantizedAabbMin[0], aabbMin, 0); +			quantize(&curNode.m_quantizedAabbMax[0], aabbMax, 1); +		} +		else +		{ +			//combine aabb from both children -				 -				aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); -				aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT));  -				aabbMin.setMin(triangleVerts[0]); -				aabbMax.setMax(triangleVerts[0]); -				aabbMin.setMin(triangleVerts[1]); -				aabbMax.setMax(triangleVerts[1]); -				aabbMin.setMin(triangleVerts[2]); -				aabbMax.setMax(triangleVerts[2]); - -				quantize(&curNode.m_quantizedAabbMin[0],aabbMin,0); -				quantize(&curNode.m_quantizedAabbMax[0],aabbMax,1); -				 -			} else -			{ -				//combine aabb from both children +			b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i + 1]; -				b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1]; -				 -				b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] : -					&m_quantizedContiguousNodes[i+1+leftChildNode->getEscapeIndex()]; -				 +			b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i + 2] : &m_quantizedContiguousNodes[i + 1 + leftChildNode->getEscapeIndex()]; +			{ +				for (int i = 0; i < 3; i++)  				{ -					for (int i=0;i<3;i++) -					{ -						curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; -						if (curNode.m_quantizedAabbMin[i]>rightChildNode->m_quantizedAabbMin[i]) -							curNode.m_quantizedAabbMin[i]=rightChildNode->m_quantizedAabbMin[i]; - -						curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; -						if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) -							curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i]; -					} +					curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; +					if (curNode.m_quantizedAabbMin[i] > rightChildNode->m_quantizedAabbMin[i]) +						curNode.m_quantizedAabbMin[i] = rightChildNode->m_quantizedAabbMin[i]; + +					curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; +					if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) +						curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i];  				}  			} -  		} +	} -		if (curNodeSubPart >= 0) -			meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); - -		 +	if (curNodeSubPart >= 0) +		meshInterface->unLockReadOnlyVertexBase(curNodeSubPart);  }  ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' -b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) +b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)  { -	b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian); -	 +	b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer, i_dataBufferSize, i_swapEndian); +  	//we don't add additional data so just do a static upcast  	return static_cast<b3OptimizedBvh*>(bvh);  } diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h index 0272ef83bf..1286552939 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h @@ -22,44 +22,35 @@ subject to the following restrictions:  class b3StridingMeshInterface; -  ///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface. -B3_ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh +B3_ATTRIBUTE_ALIGNED16(class) +b3OptimizedBvh : public b3QuantizedBvh  { -	  public:  	B3_DECLARE_ALIGNED_ALLOCATOR();  protected: -  public: -  	b3OptimizedBvh();  	virtual ~b3OptimizedBvh(); -	void	build(b3StridingMeshInterface* triangles,bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax); +	void build(b3StridingMeshInterface * triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax); -	void	refit(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin,const b3Vector3& aabbMax); +	void refit(b3StridingMeshInterface * triangles, const b3Vector3& aabbMin, const b3Vector3& aabbMax); -	void	refitPartial(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin, const b3Vector3& aabbMax); +	void refitPartial(b3StridingMeshInterface * triangles, const b3Vector3& aabbMin, const b3Vector3& aabbMax); -	void	updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index); +	void updateBvhNodes(b3StridingMeshInterface * meshInterface, int firstNode, int endNode, int index);  	/// Data buffer MUST be 16 byte aligned -	virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const +	virtual bool serializeInPlace(void* o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const  	{ -		return b3QuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian); - +		return b3QuantizedBvh::serialize(o_alignedDataBuffer, i_dataBufferSize, i_swapEndian);  	}  	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' -	static b3OptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); - - +	static b3OptimizedBvh* deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);  }; - -#endif //B3_OPTIMIZED_BVH_H - - +#endif  //B3_OPTIMIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp index 52027e1118..9a448495f3 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp @@ -17,46 +17,40 @@ subject to the following restrictions:  #include "Bullet3Geometry/b3AabbUtil.h" -  #define RAYAABB2 -b3QuantizedBvh::b3QuantizedBvh() :  -					m_bulletVersion(B3_BULLET_VERSION), -					m_useQuantization(false),  -					m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) -					//m_traversalMode(TRAVERSAL_STACKLESS) -					//m_traversalMode(TRAVERSAL_RECURSIVE) -					,m_subtreeHeaderCount(0) //PCK: add this line +b3QuantizedBvh::b3QuantizedBvh() : m_bulletVersion(B3_BULLET_VERSION), +								   m_useQuantization(false), +								   m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) +								   //m_traversalMode(TRAVERSAL_STACKLESS) +								   //m_traversalMode(TRAVERSAL_RECURSIVE) +								   , +								   m_subtreeHeaderCount(0)  //PCK: add this line  { -	m_bvhAabbMin.setValue(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY); -	m_bvhAabbMax.setValue(B3_INFINITY,B3_INFINITY,B3_INFINITY); +	m_bvhAabbMin.setValue(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY); +	m_bvhAabbMax.setValue(B3_INFINITY, B3_INFINITY, B3_INFINITY);  } - - - -  void b3QuantizedBvh::buildInternal()  {  	///assumes that caller filled in the m_quantizedLeafNodes  	m_useQuantization = true;  	int numLeafNodes = 0; -	 +  	if (m_useQuantization)  	{  		//now we have an array of leafnodes in m_leafNodes  		numLeafNodes = m_quantizedLeafNodes.size(); -		m_quantizedContiguousNodes.resize(2*numLeafNodes); - +		m_quantizedContiguousNodes.resize(2 * numLeafNodes);  	}  	m_curNodeIndex = 0; -	buildTree(0,numLeafNodes); +	buildTree(0, numLeafNodes);  	///if the entire tree is small then subtree size, we need to create a header info for the tree -	if(m_useQuantization && !m_SubtreeHeaders.size()) +	if (m_useQuantization && !m_SubtreeHeaders.size())  	{  		b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();  		subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); @@ -72,35 +66,27 @@ void b3QuantizedBvh::buildInternal()  	m_leafNodes.clear();  } - -  ///just for debugging, to visualize the individual patches/subtrees  #ifdef DEBUG_PATCH_COLORS -b3Vector3 color[4]= -{ -	b3Vector3(1,0,0), -	b3Vector3(0,1,0), -	b3Vector3(0,0,1), -	b3Vector3(0,1,1) -}; -#endif //DEBUG_PATCH_COLORS - - +b3Vector3 color[4] = +	{ +		b3Vector3(1, 0, 0), +		b3Vector3(0, 1, 0), +		b3Vector3(0, 0, 1), +		b3Vector3(0, 1, 1)}; +#endif  //DEBUG_PATCH_COLORS -void	b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin) +void b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax, b3Scalar quantizationMargin)  {  	//enlarge the AABB to avoid division by zero when initializing the quantization values -	b3Vector3 clampValue =b3MakeVector3(quantizationMargin,quantizationMargin,quantizationMargin); +	b3Vector3 clampValue = b3MakeVector3(quantizationMargin, quantizationMargin, quantizationMargin);  	m_bvhAabbMin = bvhAabbMin - clampValue;  	m_bvhAabbMax = bvhAabbMax + clampValue;  	b3Vector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; -	m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0),b3Scalar(65533.0),b3Scalar(65533.0)) / aabbSize; +	m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0), b3Scalar(65533.0), b3Scalar(65533.0)) / aabbSize;  	m_useQuantization = true;  } - - -  b3QuantizedBvh::~b3QuantizedBvh()  {  } @@ -108,104 +94,100 @@ b3QuantizedBvh::~b3QuantizedBvh()  #ifdef DEBUG_TREE_BUILDING  int gStackDepth = 0;  int gMaxStackDepth = 0; -#endif //DEBUG_TREE_BUILDING +#endif  //DEBUG_TREE_BUILDING -void	b3QuantizedBvh::buildTree	(int startIndex,int endIndex) +void b3QuantizedBvh::buildTree(int startIndex, int endIndex)  {  #ifdef DEBUG_TREE_BUILDING  	gStackDepth++;  	if (gStackDepth > gMaxStackDepth)  		gMaxStackDepth = gStackDepth; -#endif //DEBUG_TREE_BUILDING - +#endif  //DEBUG_TREE_BUILDING  	int splitAxis, splitIndex, i; -	int numIndices =endIndex-startIndex; +	int numIndices = endIndex - startIndex;  	int curIndex = m_curNodeIndex; -	b3Assert(numIndices>0); +	b3Assert(numIndices > 0); -	if (numIndices==1) +	if (numIndices == 1)  	{  #ifdef DEBUG_TREE_BUILDING  		gStackDepth--; -#endif //DEBUG_TREE_BUILDING -		 -		assignInternalNodeFromLeafNode(m_curNodeIndex,startIndex); +#endif  //DEBUG_TREE_BUILDING + +		assignInternalNodeFromLeafNode(m_curNodeIndex, startIndex);  		m_curNodeIndex++; -		return;	 +		return;  	}  	//calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'. -	 -	splitAxis = calcSplittingAxis(startIndex,endIndex); -	splitIndex = sortAndCalcSplittingIndex(startIndex,endIndex,splitAxis); +	splitAxis = calcSplittingAxis(startIndex, endIndex); + +	splitIndex = sortAndCalcSplittingIndex(startIndex, endIndex, splitAxis);  	int internalNodeIndex = m_curNodeIndex; -	 +  	//set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value.  	//the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values -	setInternalNodeAabbMin(m_curNodeIndex,m_bvhAabbMax);//can't use b3Vector3(B3_INFINITY,B3_INFINITY,B3_INFINITY)) because of quantization -	setInternalNodeAabbMax(m_curNodeIndex,m_bvhAabbMin);//can't use b3Vector3(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY)) because of quantization -	 -	 -	for (i=startIndex;i<endIndex;i++) +	setInternalNodeAabbMin(m_curNodeIndex, m_bvhAabbMax);  //can't use b3Vector3(B3_INFINITY,B3_INFINITY,B3_INFINITY)) because of quantization +	setInternalNodeAabbMax(m_curNodeIndex, m_bvhAabbMin);  //can't use b3Vector3(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY)) because of quantization + +	for (i = startIndex; i < endIndex; i++)  	{ -		mergeInternalNodeAabb(m_curNodeIndex,getAabbMin(i),getAabbMax(i)); +		mergeInternalNodeAabb(m_curNodeIndex, getAabbMin(i), getAabbMax(i));  	}  	m_curNodeIndex++; -	  	//internalNode->m_escapeIndex; -	 +  	int leftChildNodexIndex = m_curNodeIndex;  	//build left child tree -	buildTree(startIndex,splitIndex); +	buildTree(startIndex, splitIndex);  	int rightChildNodexIndex = m_curNodeIndex;  	//build right child tree -	buildTree(splitIndex,endIndex); +	buildTree(splitIndex, endIndex);  #ifdef DEBUG_TREE_BUILDING  	gStackDepth--; -#endif //DEBUG_TREE_BUILDING +#endif  //DEBUG_TREE_BUILDING  	int escapeIndex = m_curNodeIndex - curIndex;  	if (m_useQuantization)  	{  		//escapeIndex is the number of nodes of this subtree -		const int sizeQuantizedNode =sizeof(b3QuantizedBvhNode); +		const int sizeQuantizedNode = sizeof(b3QuantizedBvhNode);  		const int treeSizeInBytes = escapeIndex * sizeQuantizedNode;  		if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES)  		{ -			updateSubtreeHeaders(leftChildNodexIndex,rightChildNodexIndex); +			updateSubtreeHeaders(leftChildNodexIndex, rightChildNodexIndex);  		} -	} else +	} +	else  	{ -  	} -	setInternalNodeEscapeIndex(internalNodeIndex,escapeIndex); - +	setInternalNodeEscapeIndex(internalNodeIndex, escapeIndex);  } -void	b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex) +void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex, int rightChildNodexIndex)  {  	b3Assert(m_useQuantization);  	b3QuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex];  	int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex(); -	int leftSubTreeSizeInBytes =  leftSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); -	 +	int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); +  	b3QuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex];  	int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex(); -	int rightSubTreeSizeInBytes =  rightSubTreeSize *  static_cast<int>(sizeof(b3QuantizedBvhNode)); +	int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); -	if(leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) +	if (leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES)  	{  		b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();  		subtree.setAabbFromQuantizeNode(leftChildNode); @@ -213,7 +195,7 @@ void	b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChild  		subtree.m_subtreeSize = leftSubTreeSize;  	} -	if(rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) +	if (rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES)  	{  		b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand();  		subtree.setAabbFromQuantizeNode(rightChildNode); @@ -225,32 +207,31 @@ void	b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChild  	m_subtreeHeaderCount = m_SubtreeHeaders.size();  } - -int	b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis) +int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex, int endIndex, int splitAxis)  {  	int i; -	int splitIndex =startIndex; +	int splitIndex = startIndex;  	int numIndices = endIndex - startIndex;  	b3Scalar splitValue; -	b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); -	for (i=startIndex;i<endIndex;i++) +	b3Vector3 means = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); +	for (i = startIndex; i < endIndex; i++)  	{ -		b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); -		means+=center; +		b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); +		means += center;  	} -	means *= (b3Scalar(1.)/(b3Scalar)numIndices); -	 +	means *= (b3Scalar(1.) / (b3Scalar)numIndices); +  	splitValue = means[splitAxis]; -	 +  	//sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'. -	for (i=startIndex;i<endIndex;i++) +	for (i = startIndex; i < endIndex; i++)  	{ -		b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); +		b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i));  		if (center[splitAxis] > splitValue)  		{  			//swap -			swapLeafNodes(i,splitIndex); +			swapLeafNodes(i, splitIndex);  			splitIndex++;  		}  	} @@ -260,56 +241,53 @@ int	b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int sp  	//unbalanced1 is unsafe: it can cause stack overflows  	//bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1))); -	//unbalanced2 should work too: always use center (perfect balanced trees)	 +	//unbalanced2 should work too: always use center (perfect balanced trees)  	//bool unbalanced2 = true;  	//this should be safe too: -	int rangeBalancedIndices = numIndices/3; -	bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices))); -	 +	int rangeBalancedIndices = numIndices / 3; +	bool unbalanced = ((splitIndex <= (startIndex + rangeBalancedIndices)) || (splitIndex >= (endIndex - 1 - rangeBalancedIndices))); +  	if (unbalanced)  	{ -		splitIndex = startIndex+ (numIndices>>1); +		splitIndex = startIndex + (numIndices >> 1);  	} -	bool unbal = (splitIndex==startIndex) || (splitIndex == (endIndex)); +	bool unbal = (splitIndex == startIndex) || (splitIndex == (endIndex));  	(void)unbal;  	b3Assert(!unbal);  	return splitIndex;  } - -int	b3QuantizedBvh::calcSplittingAxis(int startIndex,int endIndex) +int b3QuantizedBvh::calcSplittingAxis(int startIndex, int endIndex)  {  	int i; -	b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); -	b3Vector3 variance=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); -	int numIndices = endIndex-startIndex; +	b3Vector3 means = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); +	b3Vector3 variance = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); +	int numIndices = endIndex - startIndex; -	for (i=startIndex;i<endIndex;i++) +	for (i = startIndex; i < endIndex; i++)  	{ -		b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); -		means+=center; +		b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); +		means += center;  	} -	means *= (b3Scalar(1.)/(b3Scalar)numIndices); -		 -	for (i=startIndex;i<endIndex;i++) +	means *= (b3Scalar(1.) / (b3Scalar)numIndices); + +	for (i = startIndex; i < endIndex; i++)  	{ -		b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); -		b3Vector3 diff2 = center-means; +		b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); +		b3Vector3 diff2 = center - means;  		diff2 = diff2 * diff2;  		variance += diff2;  	} -	variance *= (b3Scalar(1.)/	((b3Scalar)numIndices-1)	); -	 +	variance *= (b3Scalar(1.) / ((b3Scalar)numIndices - 1)); +  	return variance.maxAxis();  } - - -void	b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const  {  	//either choose recursive traversal (walkTree) or stackless (walkStacklessTree) @@ -318,38 +296,37 @@ void	b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallb  		///quantize query AABB  		unsigned short int quantizedQueryAabbMin[3];  		unsigned short int quantizedQueryAabbMax[3]; -		quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0); -		quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1); +		quantizeWithClamp(quantizedQueryAabbMin, aabbMin, 0); +		quantizeWithClamp(quantizedQueryAabbMax, aabbMax, 1);  		switch (m_traversalMode)  		{ -		case TRAVERSAL_STACKLESS: -				walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,0,m_curNodeIndex); -			break; -		case TRAVERSAL_STACKLESS_CACHE_FRIENDLY: -				walkStacklessQuantizedTreeCacheFriendly(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); -			break; -		case TRAVERSAL_RECURSIVE: +			case TRAVERSAL_STACKLESS: +				walkStacklessQuantizedTree(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, 0, m_curNodeIndex); +				break; +			case TRAVERSAL_STACKLESS_CACHE_FRIENDLY: +				walkStacklessQuantizedTreeCacheFriendly(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); +				break; +			case TRAVERSAL_RECURSIVE:  			{  				const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0]; -				walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); +				walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax);  			}  			break; -		default: -			//unsupported -			b3Assert(0); +			default: +				//unsupported +				b3Assert(0);  		} -	} else +	} +	else  	{ -		walkStacklessTree(nodeCallback,aabbMin,aabbMax); +		walkStacklessTree(nodeCallback, aabbMin, aabbMax);  	}  } -  static int b3s_maxIterations = 0; - -void	b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const  {  	b3Assert(!m_useQuantization); @@ -363,24 +340,25 @@ void	b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const  	while (curIndex < m_curNodeIndex)  	{  		//catch bugs in tree data -		b3Assert (walkIterations < m_curNodeIndex); +		b3Assert(walkIterations < m_curNodeIndex);  		walkIterations++; -		aabbOverlap = b3TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); +		aabbOverlap = b3TestAabbAgainstAabb2(aabbMin, aabbMax, rootNode->m_aabbMinOrg, rootNode->m_aabbMaxOrg);  		isLeafNode = rootNode->m_escapeIndex == -1; -		 +  		//PCK: unsigned instead of bool  		if (isLeafNode && (aabbOverlap != 0))  		{ -			nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); -		}  -		 +			nodeCallback->processNode(rootNode->m_subPart, rootNode->m_triangleIndex); +		} +  		//PCK: unsigned instead of bool  		if ((aabbOverlap != 0) || isLeafNode)  		{  			rootNode++;  			curIndex++; -		} else +		} +		else  		{  			escapeIndex = rootNode->m_escapeIndex;  			rootNode += escapeIndex; @@ -389,7 +367,6 @@ void	b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const  	}  	if (b3s_maxIterations < walkIterations)  		b3s_maxIterations = walkIterations; -  }  /* @@ -413,39 +390,38 @@ void	b3QuantizedBvh::walkTree(b3OptimizedBvhNode* rootNode,b3NodeOverlapCallback  }  */ -void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const +void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode, b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const  {  	b3Assert(m_useQuantization); -	 +  	bool isLeafNode;  	//PCK: unsigned instead of bool  	unsigned aabbOverlap;  	//PCK: unsigned instead of bool -	aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,currentNode->m_quantizedAabbMin,currentNode->m_quantizedAabbMax); +	aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, currentNode->m_quantizedAabbMin, currentNode->m_quantizedAabbMax);  	isLeafNode = currentNode->isLeafNode(); -		 +  	//PCK: unsigned instead of bool  	if (aabbOverlap != 0)  	{  		if (isLeafNode)  		{ -			nodeCallback->processNode(currentNode->getPartId(),currentNode->getTriangleIndex()); -		} else +			nodeCallback->processNode(currentNode->getPartId(), currentNode->getTriangleIndex()); +		} +		else  		{  			//process left and right children -			const b3QuantizedBvhNode* leftChildNode = currentNode+1; -			walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); +			const b3QuantizedBvhNode* leftChildNode = currentNode + 1; +			walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); -			const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex(); -			walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); +			const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode + 1 : leftChildNode + leftChildNode->getEscapeIndex(); +			walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax);  		} -	}		 +	}  } - - -void	b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const +void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const  {  	b3Assert(!m_useQuantization); @@ -454,11 +430,11 @@ void	b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall  	int walkIterations = 0;  	bool isLeafNode;  	//PCK: unsigned instead of bool -	unsigned aabbOverlap=0; -	unsigned rayBoxOverlap=0; +	unsigned aabbOverlap = 0; +	unsigned rayBoxOverlap = 0;  	b3Scalar lambda_max = 1.0; -	 -		/* Quick pruning by quantized box */ + +	/* Quick pruning by quantized box */  	b3Vector3 rayAabbMin = raySource;  	b3Vector3 rayAabbMax = raySource;  	rayAabbMin.setMin(rayTarget); @@ -469,15 +445,15 @@ void	b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall  	rayAabbMax += aabbMax;  #ifdef RAYAABB2 -	b3Vector3 rayDir = (rayTarget-raySource); -	rayDir.normalize (); -	lambda_max = rayDir.dot(rayTarget-raySource); +	b3Vector3 rayDir = (rayTarget - raySource); +	rayDir.normalize(); +	lambda_max = rayDir.dot(rayTarget - raySource);  	///what about division by zero? --> just set rayDirection[i] to 1.0  	b3Vector3 rayDirectionInverse;  	rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0];  	rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1];  	rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; -	unsigned int sign[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; +	unsigned int sign[3] = {rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0};  #endif  	b3Vector3 bounds[2]; @@ -486,7 +462,7 @@ void	b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall  	{  		b3Scalar param = 1.0;  		//catch bugs in tree data -		b3Assert (walkIterations < m_curNodeIndex); +		b3Assert(walkIterations < m_curNodeIndex);  		walkIterations++; @@ -496,34 +472,35 @@ void	b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall  		bounds[0] -= aabbMax;  		bounds[1] -= aabbMin; -		aabbOverlap = b3TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); +		aabbOverlap = b3TestAabbAgainstAabb2(rayAabbMin, rayAabbMax, rootNode->m_aabbMinOrg, rootNode->m_aabbMaxOrg);  		//perhaps profile if it is worth doing the aabbOverlap test first  #ifdef RAYAABB2 -			///careful with this check: need to check division by zero (above) and fix the unQuantize method -			///thanks Joerg/hiker for the reproduction case! -			///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 -		rayBoxOverlap = aabbOverlap ? b3RayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false; +		///careful with this check: need to check division by zero (above) and fix the unQuantize method +		///thanks Joerg/hiker for the reproduction case! +		///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 +		rayBoxOverlap = aabbOverlap ? b3RayAabb2(raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false;  #else  		b3Vector3 normal; -		rayBoxOverlap = b3RayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal); +		rayBoxOverlap = b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal);  #endif  		isLeafNode = rootNode->m_escapeIndex == -1; -		 +  		//PCK: unsigned instead of bool  		if (isLeafNode && (rayBoxOverlap != 0))  		{ -			nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); -		}  -		 +			nodeCallback->processNode(rootNode->m_subPart, rootNode->m_triangleIndex); +		} +  		//PCK: unsigned instead of bool  		if ((rayBoxOverlap != 0) || isLeafNode)  		{  			rootNode++;  			curIndex++; -		} else +		} +		else  		{  			escapeIndex = rootNode->m_escapeIndex;  			rootNode += escapeIndex; @@ -532,15 +509,12 @@ void	b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCall  	}  	if (b3s_maxIterations < walkIterations)  		b3s_maxIterations = walkIterations; -  } - - -void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const +void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const  {  	b3Assert(m_useQuantization); -	 +  	int curIndex = startNodeIndex;  	int walkIterations = 0;  	int subTreeSize = endNodeIndex - startNodeIndex; @@ -548,7 +522,7 @@ void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*  	const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];  	int escapeIndex; -	 +  	bool isLeafNode;  	//PCK: unsigned instead of bool  	unsigned boxBoxOverlap = 0; @@ -557,14 +531,14 @@ void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*  	b3Scalar lambda_max = 1.0;  #ifdef RAYAABB2 -	b3Vector3 rayDirection = (rayTarget-raySource); -	rayDirection.normalize (); -	lambda_max = rayDirection.dot(rayTarget-raySource); +	b3Vector3 rayDirection = (rayTarget - raySource); +	rayDirection.normalize(); +	lambda_max = rayDirection.dot(rayTarget - raySource);  	///what about division by zero? --> just set rayDirection[i] to 1.0  	rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0];  	rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1];  	rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2]; -	unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; +	unsigned int sign[3] = {rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0};  #endif  	/* Quick pruning by quantized box */ @@ -579,37 +553,36 @@ void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*  	unsigned short int quantizedQueryAabbMin[3];  	unsigned short int quantizedQueryAabbMax[3]; -	quantizeWithClamp(quantizedQueryAabbMin,rayAabbMin,0); -	quantizeWithClamp(quantizedQueryAabbMax,rayAabbMax,1); +	quantizeWithClamp(quantizedQueryAabbMin, rayAabbMin, 0); +	quantizeWithClamp(quantizedQueryAabbMax, rayAabbMax, 1);  	while (curIndex < endNodeIndex)  	{ -  //#define VISUALLY_ANALYZE_BVH 1  #ifdef VISUALLY_ANALYZE_BVH  		//some code snippet to debugDraw aabb, to visually analyze bvh structure  		static int drawPatch = 0;  		//need some global access to a debugDrawer  		extern b3IDebugDraw* debugDrawerPtr; -		if (curIndex==drawPatch) +		if (curIndex == drawPatch)  		{ -			b3Vector3 aabbMin,aabbMax; +			b3Vector3 aabbMin, aabbMax;  			aabbMin = unQuantize(rootNode->m_quantizedAabbMin);  			aabbMax = unQuantize(rootNode->m_quantizedAabbMax); -			b3Vector3	color(1,0,0); -			debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); +			b3Vector3 color(1, 0, 0); +			debugDrawerPtr->drawAabb(aabbMin, aabbMax, color);  		} -#endif//VISUALLY_ANALYZE_BVH +#endif  //VISUALLY_ANALYZE_BVH  		//catch bugs in tree data -		b3Assert (walkIterations < subTreeSize); +		b3Assert(walkIterations < subTreeSize);  		walkIterations++;  		//PCK: unsigned instead of bool  		// only interested if this is closer than any previous hit  		b3Scalar param = 1.0;  		rayBoxOverlap = 0; -		boxBoxOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); +		boxBoxOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode->m_quantizedAabbMin, rootNode->m_quantizedAabbMax);  		isLeafNode = rootNode->isLeafNode();  		if (boxBoxOverlap)  		{ @@ -634,24 +607,25 @@ void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*  			///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858  			//B3_PROFILE("b3RayAabb2"); -			rayBoxOverlap = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); -			 +			rayBoxOverlap = b3RayAabb2(raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); +  #else -			rayBoxOverlap = true;//b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); +			rayBoxOverlap = true;  //b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal);  #endif  		} -		 +  		if (isLeafNode && rayBoxOverlap)  		{ -			nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); +			nodeCallback->processNode(rootNode->getPartId(), rootNode->getTriangleIndex());  		} -		 +  		//PCK: unsigned instead of bool  		if ((rayBoxOverlap != 0) || isLeafNode)  		{  			rootNode++;  			curIndex++; -		} else +		} +		else  		{  			escapeIndex = rootNode->getEscapeIndex();  			rootNode += escapeIndex; @@ -660,13 +634,12 @@ void	b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback*  	}  	if (b3s_maxIterations < walkIterations)  		b3s_maxIterations = walkIterations; -  } -void	b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const +void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const  {  	b3Assert(m_useQuantization); -	 +  	int curIndex = startNodeIndex;  	int walkIterations = 0;  	int subTreeSize = endNodeIndex - startNodeIndex; @@ -674,49 +647,49 @@ void	b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallb  	const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex];  	int escapeIndex; -	 +  	bool isLeafNode;  	//PCK: unsigned instead of bool  	unsigned aabbOverlap;  	while (curIndex < endNodeIndex)  	{ -  //#define VISUALLY_ANALYZE_BVH 1  #ifdef VISUALLY_ANALYZE_BVH  		//some code snippet to debugDraw aabb, to visually analyze bvh structure  		static int drawPatch = 0;  		//need some global access to a debugDrawer  		extern b3IDebugDraw* debugDrawerPtr; -		if (curIndex==drawPatch) +		if (curIndex == drawPatch)  		{ -			b3Vector3 aabbMin,aabbMax; +			b3Vector3 aabbMin, aabbMax;  			aabbMin = unQuantize(rootNode->m_quantizedAabbMin);  			aabbMax = unQuantize(rootNode->m_quantizedAabbMax); -			b3Vector3	color(1,0,0); -			debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); +			b3Vector3 color(1, 0, 0); +			debugDrawerPtr->drawAabb(aabbMin, aabbMax, color);  		} -#endif//VISUALLY_ANALYZE_BVH +#endif  //VISUALLY_ANALYZE_BVH  		//catch bugs in tree data -		b3Assert (walkIterations < subTreeSize); +		b3Assert(walkIterations < subTreeSize);  		walkIterations++;  		//PCK: unsigned instead of bool -		aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); +		aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode->m_quantizedAabbMin, rootNode->m_quantizedAabbMax);  		isLeafNode = rootNode->isLeafNode(); -		 +  		if (isLeafNode && aabbOverlap)  		{ -			nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); -		}  -		 +			nodeCallback->processNode(rootNode->getPartId(), rootNode->getTriangleIndex()); +		} +  		//PCK: unsigned instead of bool  		if ((aabbOverlap != 0) || isLeafNode)  		{  			rootNode++;  			curIndex++; -		} else +		} +		else  		{  			escapeIndex = rootNode->getEscapeIndex();  			rootNode += escapeIndex; @@ -725,40 +698,36 @@ void	b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallb  	}  	if (b3s_maxIterations < walkIterations)  		b3s_maxIterations = walkIterations; -  }  //This traversal can be called from Playstation 3 SPU -void	b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const +void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const  {  	b3Assert(m_useQuantization);  	int i; - -	for (i=0;i<this->m_SubtreeHeaders.size();i++) +	for (i = 0; i < this->m_SubtreeHeaders.size(); i++)  	{  		const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i];  		//PCK: unsigned instead of bool -		unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); +		unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax);  		if (overlap != 0)  		{ -			walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, -				subtree.m_rootNodeIndex, -				subtree.m_rootNodeIndex+subtree.m_subtreeSize); +			walkStacklessQuantizedTree(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, +									   subtree.m_rootNodeIndex, +									   subtree.m_rootNodeIndex + subtree.m_subtreeSize);  		}  	}  } - -void	b3QuantizedBvh::reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const +void b3QuantizedBvh::reportRayOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const  { -	reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,b3MakeVector3(0,0,0),b3MakeVector3(0,0,0)); +	reportBoxCastOverlappingNodex(nodeCallback, raySource, rayTarget, b3MakeVector3(0, 0, 0), b3MakeVector3(0, 0, 0));  } - -void	b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +void b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const  {  	//always use stackless @@ -782,31 +751,31 @@ void	b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCa  		reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax);  	}  	*/ -  } - -void	b3QuantizedBvh::swapLeafNodes(int i,int splitIndex) +void b3QuantizedBvh::swapLeafNodes(int i, int splitIndex)  {  	if (m_useQuantization)  	{ -			b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i]; -			m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; -			m_quantizedLeafNodes[splitIndex] = tmp; -	} else +		b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i]; +		m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; +		m_quantizedLeafNodes[splitIndex] = tmp; +	} +	else  	{ -			b3OptimizedBvhNode tmp = m_leafNodes[i]; -			m_leafNodes[i] = m_leafNodes[splitIndex]; -			m_leafNodes[splitIndex] = tmp; +		b3OptimizedBvhNode tmp = m_leafNodes[i]; +		m_leafNodes[i] = m_leafNodes[splitIndex]; +		m_leafNodes[splitIndex] = tmp;  	}  } -void	b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex) +void b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode, int leafNodeIndex)  {  	if (m_useQuantization)  	{  		m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex]; -	} else +	} +	else  	{  		m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex];  	} @@ -823,11 +792,10 @@ static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1;  static const unsigned BVH_ALIGNMENT_BLOCKS = 2;  #endif -  unsigned int b3QuantizedBvh::getAlignmentSerializationPadding()  {  	// I changed this to 0 since the extra padding is not needed or used. -	return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT; +	return 0;  //BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT;  }  unsigned b3QuantizedBvh::calculateSerializeBufferSize() const @@ -841,12 +809,12 @@ unsigned b3QuantizedBvh::calculateSerializeBufferSize() const  	return baseSize + m_curNodeIndex * sizeof(b3OptimizedBvhNode);  } -bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const +bool b3QuantizedBvh::serialize(void* o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const  {  	b3Assert(m_subtreeHeaderCount == m_SubtreeHeaders.size());  	m_subtreeHeaderCount = m_SubtreeHeaders.size(); -/*	if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) +	/*	if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))  	{  		///check alignedment for buffer?  		b3Assert(0); @@ -854,7 +822,7 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  	}  */ -	b3QuantizedBvh *targetBvh = (b3QuantizedBvh *)o_alignedDataBuffer; +	b3QuantizedBvh* targetBvh = (b3QuantizedBvh*)o_alignedDataBuffer;  	// construct the class so the virtual function table, etc will be set up  	// Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor @@ -864,10 +832,9 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  	{  		targetBvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(m_curNodeIndex)); - -		b3SwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin); -		b3SwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax); -		b3SwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization); +		b3SwapVector3Endian(m_bvhAabbMin, targetBvh->m_bvhAabbMin); +		b3SwapVector3Endian(m_bvhAabbMax, targetBvh->m_bvhAabbMax); +		b3SwapVector3Endian(m_bvhQuantization, targetBvh->m_bvhQuantization);  		targetBvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(m_traversalMode);  		targetBvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(m_subtreeHeaderCount)); @@ -884,12 +851,12 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  	targetBvh->m_useQuantization = m_useQuantization; -	unsigned char *nodeData = (unsigned char *)targetBvh; +	unsigned char* nodeData = (unsigned char*)targetBvh;  	nodeData += sizeof(b3QuantizedBvh); -	 -	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + +	unsigned sizeToAdd = 0;  //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;  	nodeData += sizeToAdd; -	 +  	int nodeCount = m_curNodeIndex;  	if (m_useQuantization) @@ -915,7 +882,6 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  		{  			for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++)  			{ -	  				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0];  				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1];  				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]; @@ -925,8 +891,6 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2];  				targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex; - -  			}  		}  		nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; @@ -972,7 +936,7 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  		targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0);  	} -	sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; +	sizeToAdd = 0;  //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;  	nodeData += sizeToAdd;  	// Now serialize the subtree headers @@ -1027,14 +991,13 @@ bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBuffe  	return true;  } -b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) +b3QuantizedBvh* b3QuantizedBvh::deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian)  { - -	if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) +	if (i_alignedDataBuffer == NULL)  // || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0))  	{  		return NULL;  	} -	b3QuantizedBvh *bvh = (b3QuantizedBvh *)i_alignedDataBuffer; +	b3QuantizedBvh* bvh = (b3QuantizedBvh*)i_alignedDataBuffer;  	if (i_swapEndian)  	{ @@ -1056,12 +1019,12 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un  		return NULL;  	} -	unsigned char *nodeData = (unsigned char *)bvh; +	unsigned char* nodeData = (unsigned char*)bvh;  	nodeData += sizeof(b3QuantizedBvh); -	 -	unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + +	unsigned sizeToAdd = 0;  //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;  	nodeData += sizeToAdd; -	 +  	int nodeCount = bvh->m_curNodeIndex;  	// Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor @@ -1099,7 +1062,7 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un  			{  				b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg);  				b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); -				 +  				bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex));  				bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart));  				bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex)); @@ -1108,7 +1071,7 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un  		nodeData += sizeof(b3OptimizedBvhNode) * nodeCount;  	} -	sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; +	sizeToAdd = 0;  //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK;  	nodeData += sizeToAdd;  	// Now serialize the subtree headers @@ -1134,13 +1097,11 @@ b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, un  }  // Constructor that prevents b3Vector3's default constructor from being called -b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh &self, bool /* ownsMemory */) : -m_bvhAabbMin(self.m_bvhAabbMin), -m_bvhAabbMax(self.m_bvhAabbMax), -m_bvhQuantization(self.m_bvhQuantization), -m_bulletVersion(B3_BULLET_VERSION) +b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh& self, bool /* ownsMemory */) : m_bvhAabbMin(self.m_bvhAabbMin), +																			  m_bvhAabbMax(self.m_bvhAabbMax), +																			  m_bvhQuantization(self.m_bvhQuantization), +																			  m_bulletVersion(B3_BULLET_VERSION)  { -  }  void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData) @@ -1150,8 +1111,8 @@ void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedB  	m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization);  	m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex; -	m_useQuantization = quantizedBvhFloatData.m_useQuantization!=0; -	 +	m_useQuantization = quantizedBvhFloatData.m_useQuantization != 0; +  	{  		int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes;  		m_contiguousNodes.resize(numElem); @@ -1160,7 +1121,7 @@ void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedB  		{  			b3OptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr; -			for (int i=0;i<numElem;i++,memPtr++) +			for (int i = 0; i < numElem; i++, memPtr++)  			{  				m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg);  				m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg); @@ -1174,11 +1135,11 @@ void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedB  	{  		int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes;  		m_quantizedContiguousNodes.resize(numElem); -		 +  		if (numElem)  		{  			b3QuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr; -			for (int i=0;i<numElem;i++,memPtr++) +			for (int i = 0; i < numElem; i++, memPtr++)  			{  				m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;  				m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; @@ -1192,16 +1153,16 @@ void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedB  	}  	m_traversalMode = b3TraversalMode(quantizedBvhFloatData.m_traversalMode); -	 +  	{  		int numElem = quantizedBvhFloatData.m_numSubtreeHeaders;  		m_SubtreeHeaders.resize(numElem);  		if (numElem)  		{  			b3BvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr; -			for (int i=0;i<numElem;i++,memPtr++) +			for (int i = 0; i < numElem; i++, memPtr++)  			{ -				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ; +				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0];  				m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];  				m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];  				m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; @@ -1221,8 +1182,8 @@ void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantize  	m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization);  	m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex; -	m_useQuantization = quantizedBvhDoubleData.m_useQuantization!=0; -	 +	m_useQuantization = quantizedBvhDoubleData.m_useQuantization != 0; +  	{  		int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes;  		m_contiguousNodes.resize(numElem); @@ -1231,7 +1192,7 @@ void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantize  		{  			b3OptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr; -			for (int i=0;i<numElem;i++,memPtr++) +			for (int i = 0; i < numElem; i++, memPtr++)  			{  				m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg);  				m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg); @@ -1245,11 +1206,11 @@ void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantize  	{  		int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes;  		m_quantizedContiguousNodes.resize(numElem); -		 +  		if (numElem)  		{  			b3QuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr; -			for (int i=0;i<numElem;i++,memPtr++) +			for (int i = 0; i < numElem; i++, memPtr++)  			{  				m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex;  				m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; @@ -1263,16 +1224,16 @@ void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantize  	}  	m_traversalMode = b3TraversalMode(quantizedBvhDoubleData.m_traversalMode); -	 +  	{  		int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders;  		m_SubtreeHeaders.resize(numElem);  		if (numElem)  		{  			b3BvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr; -			for (int i=0;i<numElem;i++,memPtr++) +			for (int i = 0; i < numElem; i++, memPtr++)  			{ -				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ; +				m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0];  				m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1];  				m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2];  				m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; @@ -1283,19 +1244,11 @@ void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantize  			}  		}  	} -  } - -  ///fills the dataBuffer and returns the struct name (and 0 on failure) -const char*	b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const +const char* b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const  {  	b3Assert(0);  	return 0;  } - - - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h index 63c523c758..48b41abcad 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h @@ -22,11 +22,11 @@ class b3Serializer;  #ifdef DEBUG_CHECK_DEQUANTIZATION  #ifdef __SPU__  #define printf spu_printf -#endif //__SPU__ +#endif  //__SPU__  #include <stdio.h>  #include <stdlib.h> -#endif //DEBUG_CHECK_DEQUANTIZATION +#endif  //DEBUG_CHECK_DEQUANTIZATION  #include "Bullet3Common/b3Vector3.h"  #include "Bullet3Common/b3AlignedAllocator.h" @@ -44,13 +44,10 @@ class b3Serializer;  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h"  #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" - -  //http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp -  //Note: currently we have 16 bytes per quantized node -#define MAX_SUBTREE_SIZE_IN_BYTES  2048 +#define MAX_SUBTREE_SIZE_IN_BYTES 2048  // 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one  // actually) triangles each (since the sign bit is reserved @@ -58,7 +55,8 @@ class b3Serializer;  ///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.  ///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -B3_ATTRIBUTE_ALIGNED16	(struct) b3QuantizedBvhNode : public b3QuantizedBvhNodeData +B3_ATTRIBUTE_ALIGNED16(struct) +b3QuantizedBvhNode : public b3QuantizedBvhNodeData  {  	B3_DECLARE_ALIGNED_ALLOCATOR(); @@ -72,48 +70,48 @@ B3_ATTRIBUTE_ALIGNED16	(struct) b3QuantizedBvhNode : public b3QuantizedBvhNodeDa  		b3Assert(!isLeafNode());  		return -m_escapeIndexOrTriangleIndex;  	} -	int	getTriangleIndex() const +	int getTriangleIndex() const  	{  		b3Assert(isLeafNode()); -		unsigned int x=0; -		unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); +		unsigned int x = 0; +		unsigned int y = (~(x & 0)) << (31 - MAX_NUM_PARTS_IN_BITS);  		// Get only the lower bits where the triangle index is stored -		return (m_escapeIndexOrTriangleIndex&~(y)); +		return (m_escapeIndexOrTriangleIndex & ~(y));  	} -	int	getPartId() const +	int getPartId() const  	{  		b3Assert(isLeafNode());  		// Get only the highest bits where the part index is stored -		return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS)); +		return (m_escapeIndexOrTriangleIndex >> (31 - MAX_NUM_PARTS_IN_BITS));  	} -} -; +};  /// b3OptimizedBvhNode contains both internal and leaf node information.  /// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes. -B3_ATTRIBUTE_ALIGNED16 (struct) b3OptimizedBvhNode +B3_ATTRIBUTE_ALIGNED16(struct) +b3OptimizedBvhNode  {  	B3_DECLARE_ALIGNED_ALLOCATOR();  	//32 bytes -	b3Vector3	m_aabbMinOrg; -	b3Vector3	m_aabbMaxOrg; +	b3Vector3 m_aabbMinOrg; +	b3Vector3 m_aabbMaxOrg;  	//4 -	int	m_escapeIndex; +	int m_escapeIndex;  	//8  	//for child nodes -	int	m_subPart; -	int	m_triangleIndex; +	int m_subPart; +	int m_triangleIndex; -//pad the size to 64 bytes -	char	m_padding[20]; +	//pad the size to 64 bytes +	char m_padding[20];  }; -  ///b3BvhSubtreeInfo provides info to gather a subtree of limited size -B3_ATTRIBUTE_ALIGNED16(class) b3BvhSubtreeInfo : public b3BvhSubtreeInfoData +B3_ATTRIBUTE_ALIGNED16(class) +b3BvhSubtreeInfo : public b3BvhSubtreeInfoData  {  public:  	B3_DECLARE_ALIGNED_ALLOCATOR(); @@ -123,8 +121,7 @@ public:  		//memset(&m_padding[0], 0, sizeof(m_padding));  	} - -	void	setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode) +	void setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode)  	{  		m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0];  		m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1]; @@ -133,14 +130,12 @@ public:  		m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1];  		m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2];  	} -} -; - +};  class b3NodeOverlapCallback  {  public: -	virtual ~b3NodeOverlapCallback() {}; +	virtual ~b3NodeOverlapCallback(){};  	virtual void processNode(int subPart, int triangleIndex) = 0;  }; @@ -148,18 +143,16 @@ public:  #include "Bullet3Common/b3AlignedAllocator.h"  #include "Bullet3Common/b3AlignedObjectArray.h" - -  ///for code readability: -typedef b3AlignedObjectArray<b3OptimizedBvhNode>	NodeArray; -typedef b3AlignedObjectArray<b3QuantizedBvhNode>	QuantizedNodeArray; -typedef b3AlignedObjectArray<b3BvhSubtreeInfo>		BvhSubtreeInfoArray; - +typedef b3AlignedObjectArray<b3OptimizedBvhNode> NodeArray; +typedef b3AlignedObjectArray<b3QuantizedBvhNode> QuantizedNodeArray; +typedef b3AlignedObjectArray<b3BvhSubtreeInfo> BvhSubtreeInfoArray;  ///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU.  ///It is used by the b3BvhTriangleMeshShape as midphase  ///It is recommended to use quantization for better performance and lower memory requirements. -B3_ATTRIBUTE_ALIGNED16(class) b3QuantizedBvh +B3_ATTRIBUTE_ALIGNED16(class) +b3QuantizedBvh  {  public:  	enum b3TraversalMode @@ -169,56 +162,48 @@ public:  		TRAVERSAL_RECURSIVE  	}; - - - -	b3Vector3			m_bvhAabbMin; -	b3Vector3			m_bvhAabbMax; -	b3Vector3			m_bvhQuantization; +	b3Vector3 m_bvhAabbMin; +	b3Vector3 m_bvhAabbMax; +	b3Vector3 m_bvhQuantization;  protected: -	int					m_bulletVersion;	//for serialization versioning. It could also be used to detect endianess. +	int m_bulletVersion;  //for serialization versioning. It could also be used to detect endianess. -	int					m_curNodeIndex; +	int m_curNodeIndex;  	//quantization data -	bool				m_useQuantization; +	bool m_useQuantization; +	NodeArray m_leafNodes; +	NodeArray m_contiguousNodes; +	QuantizedNodeArray m_quantizedLeafNodes; +	QuantizedNodeArray m_quantizedContiguousNodes; - -	NodeArray			m_leafNodes; -	NodeArray			m_contiguousNodes; -	QuantizedNodeArray	m_quantizedLeafNodes; -	QuantizedNodeArray	m_quantizedContiguousNodes; -	 -	b3TraversalMode	m_traversalMode; -	BvhSubtreeInfoArray		m_SubtreeHeaders; +	b3TraversalMode m_traversalMode; +	BvhSubtreeInfoArray m_SubtreeHeaders;  	//This is only used for serialization so we don't have to add serialization directly to b3AlignedObjectArray  	mutable int m_subtreeHeaderCount; -	 - - -  	///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!)  	///this might be refactored into a virtual, it is usually not calculated at run-time -	void	setInternalNodeAabbMin(int nodeIndex, const b3Vector3& aabbMin) +	void setInternalNodeAabbMin(int nodeIndex, const b3Vector3& aabbMin)  	{  		if (m_useQuantization)  		{ -			quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] ,aabbMin,0); -		} else +			quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0], aabbMin, 0); +		} +		else  		{  			m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin; -  		}  	} -	void	setInternalNodeAabbMax(int nodeIndex,const b3Vector3& aabbMax) +	void setInternalNodeAabbMax(int nodeIndex, const b3Vector3& aabbMax)  	{  		if (m_useQuantization)  		{ -			quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0],aabbMax,1); -		} else +			quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0], aabbMax, 1); +		} +		else  		{  			m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax;  		} @@ -232,115 +217,102 @@ protected:  		}  		//non-quantized  		return m_leafNodes[nodeIndex].m_aabbMinOrg; -  	}  	b3Vector3 getAabbMax(int nodeIndex) const  	{  		if (m_useQuantization)  		{  			return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]); -		}  +		}  		//non-quantized  		return m_leafNodes[nodeIndex].m_aabbMaxOrg; -		  	} -	 -	void	setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex) +	void setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex)  	{  		if (m_useQuantization)  		{  			m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex; -		}  +		}  		else  		{  			m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex;  		} -  	} -	void mergeInternalNodeAabb(int nodeIndex,const b3Vector3& newAabbMin,const b3Vector3& newAabbMax)  +	void mergeInternalNodeAabb(int nodeIndex, const b3Vector3& newAabbMin, const b3Vector3& newAabbMax)  	{  		if (m_useQuantization)  		{  			unsigned short int quantizedAabbMin[3];  			unsigned short int quantizedAabbMax[3]; -			quantize(quantizedAabbMin,newAabbMin,0); -			quantize(quantizedAabbMax,newAabbMax,1); -			for (int i=0;i<3;i++) +			quantize(quantizedAabbMin, newAabbMin, 0); +			quantize(quantizedAabbMax, newAabbMax, 1); +			for (int i = 0; i < 3; i++)  			{  				if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i])  					m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i];  				if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i])  					m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i]; -  			} -		} else +		} +		else  		{  			//non-quantized  			m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin); -			m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax);		 +			m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax);  		}  	} -	void	swapLeafNodes(int firstIndex,int secondIndex); +	void swapLeafNodes(int firstIndex, int secondIndex); -	void	assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex); +	void assignInternalNodeFromLeafNode(int internalNode, int leafNodeIndex);  protected: +	void buildTree(int startIndex, int endIndex); -	 - -	void	buildTree	(int startIndex,int endIndex); +	int calcSplittingAxis(int startIndex, int endIndex); -	int	calcSplittingAxis(int startIndex,int endIndex); +	int sortAndCalcSplittingIndex(int startIndex, int endIndex, int splitAxis); -	int	sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis); -	 -	void	walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; +	void walkStacklessTree(b3NodeOverlapCallback * nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; -	void	walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const; -	void	walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const; -	void	walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const; +	void walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const; +	void walkStacklessQuantizedTree(b3NodeOverlapCallback * nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const; +	void walkStacklessTreeAgainstRay(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const;  	///tree traversal designed for small-memory processors like PS3 SPU -	void	walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; +	void walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback * nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const;  	///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal -	void	walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; +	void walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode, b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const;  	///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal -	void	walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA,const b3QuantizedBvhNode* treeNodeB,b3NodeOverlapCallback* nodeCallback) const; -	 - - +	void walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA, const b3QuantizedBvhNode* treeNodeB, b3NodeOverlapCallback* nodeCallback) const; -	void	updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex); +	void updateSubtreeHeaders(int leftChildNodexIndex, int rightChildNodexIndex);  public: -	  	B3_DECLARE_ALIGNED_ALLOCATOR();  	b3QuantizedBvh();  	virtual ~b3QuantizedBvh(); -	  	///***************************************** expert/internal use only ************************* -	void	setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0)); -	QuantizedNodeArray&	getLeafNodeArray() {			return	m_quantizedLeafNodes;	} +	void setQuantizationValues(const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax, b3Scalar quantizationMargin = b3Scalar(1.0)); +	QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; }  	///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized -	void	buildInternal(); +	void buildInternal();  	///***************************************** expert/internal use only ************************* -	void	reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; -	void	reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const; -	void	reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; +	void reportAabbOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; +	void reportRayOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const; +	void reportBoxCastOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; -		B3_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point,int isMax) const +	B3_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point, int isMax) const  	{ -  		b3Assert(m_useQuantization);  		b3Assert(point.getX() <= m_bvhAabbMax.getX()); @@ -357,122 +329,114 @@ public:  		///@todo: double-check this  		if (isMax)  		{ -			out[0] = (unsigned short) (((unsigned short)(v.getX()+b3Scalar(1.)) | 1)); -			out[1] = (unsigned short) (((unsigned short)(v.getY()+b3Scalar(1.)) | 1)); -			out[2] = (unsigned short) (((unsigned short)(v.getZ()+b3Scalar(1.)) | 1)); -		} else +			out[0] = (unsigned short)(((unsigned short)(v.getX() + b3Scalar(1.)) | 1)); +			out[1] = (unsigned short)(((unsigned short)(v.getY() + b3Scalar(1.)) | 1)); +			out[2] = (unsigned short)(((unsigned short)(v.getZ() + b3Scalar(1.)) | 1)); +		} +		else  		{ -			out[0] = (unsigned short) (((unsigned short)(v.getX()) & 0xfffe)); -			out[1] = (unsigned short) (((unsigned short)(v.getY()) & 0xfffe)); -			out[2] = (unsigned short) (((unsigned short)(v.getZ()) & 0xfffe)); +			out[0] = (unsigned short)(((unsigned short)(v.getX()) & 0xfffe)); +			out[1] = (unsigned short)(((unsigned short)(v.getY()) & 0xfffe)); +			out[2] = (unsigned short)(((unsigned short)(v.getZ()) & 0xfffe));  		} -  #ifdef DEBUG_CHECK_DEQUANTIZATION  		b3Vector3 newPoint = unQuantize(out);  		if (isMax)  		{  			if (newPoint.getX() < point.getX())  			{ -				printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); +				printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n", newPoint.getX() - point.getX(), newPoint.getX(), point.getX());  			}  			if (newPoint.getY() < point.getY())  			{ -				printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); +				printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n", newPoint.getY() - point.getY(), newPoint.getY(), point.getY());  			}  			if (newPoint.getZ() < point.getZ())  			{ - -				printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); +				printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n", newPoint.getZ() - point.getZ(), newPoint.getZ(), point.getZ());  			} -		} else +		} +		else  		{  			if (newPoint.getX() > point.getX())  			{ -				printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); +				printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n", newPoint.getX() - point.getX(), newPoint.getX(), point.getX());  			}  			if (newPoint.getY() > point.getY())  			{ -				printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); +				printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n", newPoint.getY() - point.getY(), newPoint.getY(), point.getY());  			}  			if (newPoint.getZ() > point.getZ())  			{ -				printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); +				printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n", newPoint.getZ() - point.getZ(), newPoint.getZ(), point.getZ());  			}  		} -#endif //DEBUG_CHECK_DEQUANTIZATION - +#endif  //DEBUG_CHECK_DEQUANTIZATION  	} - -	B3_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2,int isMax) const +	B3_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2, int isMax) const  	{ -  		b3Assert(m_useQuantization);  		b3Vector3 clampedPoint(point2);  		clampedPoint.setMax(m_bvhAabbMin);  		clampedPoint.setMin(m_bvhAabbMax); -		quantize(out,clampedPoint,isMax); - +		quantize(out, clampedPoint, isMax);  	} -	 -	B3_FORCE_INLINE b3Vector3	unQuantize(const unsigned short* vecIn) const + +	B3_FORCE_INLINE b3Vector3 unQuantize(const unsigned short* vecIn) const  	{ -			b3Vector3	vecOut; -			vecOut.setValue( +		b3Vector3 vecOut; +		vecOut.setValue(  			(b3Scalar)(vecIn[0]) / (m_bvhQuantization.getX()),  			(b3Scalar)(vecIn[1]) / (m_bvhQuantization.getY()),  			(b3Scalar)(vecIn[2]) / (m_bvhQuantization.getZ())); -			vecOut += m_bvhAabbMin; -			return vecOut; +		vecOut += m_bvhAabbMin; +		return vecOut;  	}  	///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees. -	void	setTraversalMode(b3TraversalMode	traversalMode) +	void setTraversalMode(b3TraversalMode traversalMode)  	{  		m_traversalMode = traversalMode;  	} - -	B3_FORCE_INLINE QuantizedNodeArray&	getQuantizedNodeArray() -	{	 -		return	m_quantizedContiguousNodes; +	B3_FORCE_INLINE QuantizedNodeArray& getQuantizedNodeArray() +	{ +		return m_quantizedContiguousNodes;  	} - -	B3_FORCE_INLINE BvhSubtreeInfoArray&	getSubtreeInfoArray() +	B3_FORCE_INLINE BvhSubtreeInfoArray& getSubtreeInfoArray()  	{  		return m_SubtreeHeaders;  	} -//////////////////////////////////////////////////////////////////// +	////////////////////////////////////////////////////////////////////  	/////Calculate space needed to store BVH for serialization  	unsigned calculateSerializeBufferSize() const;  	/// Data buffer MUST be 16 byte aligned -	virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const; +	virtual bool serialize(void* o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const;  	///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' -	static b3QuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); +	static b3QuantizedBvh* deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian);  	static unsigned int getAlignmentSerializationPadding(); -////////////////////////////////////////////////////////////////////// +	////////////////////////////////////////////////////////////////////// -	 -	virtual	int	calculateSerializeBufferSizeNew() const; +	virtual int calculateSerializeBufferSizeNew() const;  	///fills the dataBuffer and returns the struct name (and 0 on failure) -	virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const; +	virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; -	virtual	void deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData); +	virtual void deSerializeFloat(struct b3QuantizedBvhFloatData & quantizedBvhFloatData); -	virtual	void deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData); +	virtual void deSerializeDouble(struct b3QuantizedBvhDoubleData & quantizedBvhDoubleData); - -//////////////////////////////////////////////////////////////////// +	////////////////////////////////////////////////////////////////////  	B3_FORCE_INLINE bool isQuantized()  	{ @@ -483,74 +447,65 @@ private:  	// Special "copy" constructor that allows for in-place deserialization  	// Prevents b3Vector3's default constructor from being called, but doesn't inialize much else  	// ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need) -	b3QuantizedBvh(b3QuantizedBvh &other, bool ownsMemory); - -} -; - +	b3QuantizedBvh(b3QuantizedBvh & other, bool ownsMemory); +};  struct b3OptimizedBvhNodeFloatData  { -	b3Vector3FloatData	m_aabbMinOrg; -	b3Vector3FloatData	m_aabbMaxOrg; -	int	m_escapeIndex; -	int	m_subPart; -	int	m_triangleIndex; +	b3Vector3FloatData m_aabbMinOrg; +	b3Vector3FloatData m_aabbMaxOrg; +	int m_escapeIndex; +	int m_subPart; +	int m_triangleIndex;  	char m_pad[4];  };  struct b3OptimizedBvhNodeDoubleData  { -	b3Vector3DoubleData	m_aabbMinOrg; -	b3Vector3DoubleData	m_aabbMaxOrg; -	int	m_escapeIndex; -	int	m_subPart; -	int	m_triangleIndex; -	char	m_pad[4]; +	b3Vector3DoubleData m_aabbMinOrg; +	b3Vector3DoubleData m_aabbMaxOrg; +	int m_escapeIndex; +	int m_subPart; +	int m_triangleIndex; +	char m_pad[4];  }; - - -struct	b3QuantizedBvhFloatData +struct b3QuantizedBvhFloatData  { -	b3Vector3FloatData			m_bvhAabbMin; -	b3Vector3FloatData			m_bvhAabbMax; -	b3Vector3FloatData			m_bvhQuantization; -	int					m_curNodeIndex; -	int					m_useQuantization; -	int					m_numContiguousLeafNodes; -	int					m_numQuantizedContiguousNodes; -	b3OptimizedBvhNodeFloatData	*m_contiguousNodesPtr; -	b3QuantizedBvhNodeData		*m_quantizedContiguousNodesPtr; -	b3BvhSubtreeInfoData	*m_subTreeInfoPtr; -	int					m_traversalMode; -	int					m_numSubtreeHeaders; -	 +	b3Vector3FloatData m_bvhAabbMin; +	b3Vector3FloatData m_bvhAabbMax; +	b3Vector3FloatData m_bvhQuantization; +	int m_curNodeIndex; +	int m_useQuantization; +	int m_numContiguousLeafNodes; +	int m_numQuantizedContiguousNodes; +	b3OptimizedBvhNodeFloatData* m_contiguousNodesPtr; +	b3QuantizedBvhNodeData* m_quantizedContiguousNodesPtr; +	b3BvhSubtreeInfoData* m_subTreeInfoPtr; +	int m_traversalMode; +	int m_numSubtreeHeaders;  }; -struct	b3QuantizedBvhDoubleData +struct b3QuantizedBvhDoubleData  { -	b3Vector3DoubleData			m_bvhAabbMin; -	b3Vector3DoubleData			m_bvhAabbMax; -	b3Vector3DoubleData			m_bvhQuantization; -	int							m_curNodeIndex; -	int							m_useQuantization; -	int							m_numContiguousLeafNodes; -	int							m_numQuantizedContiguousNodes; -	b3OptimizedBvhNodeDoubleData	*m_contiguousNodesPtr; -	b3QuantizedBvhNodeData			*m_quantizedContiguousNodesPtr; - -	int							m_traversalMode; -	int							m_numSubtreeHeaders; -	b3BvhSubtreeInfoData		*m_subTreeInfoPtr; +	b3Vector3DoubleData m_bvhAabbMin; +	b3Vector3DoubleData m_bvhAabbMax; +	b3Vector3DoubleData m_bvhQuantization; +	int m_curNodeIndex; +	int m_useQuantization; +	int m_numContiguousLeafNodes; +	int m_numQuantizedContiguousNodes; +	b3OptimizedBvhNodeDoubleData* m_contiguousNodesPtr; +	b3QuantizedBvhNodeData* m_quantizedContiguousNodesPtr; + +	int m_traversalMode; +	int m_numSubtreeHeaders; +	b3BvhSubtreeInfoData* m_subTreeInfoPtr;  }; - -B3_FORCE_INLINE	int	b3QuantizedBvh::calculateSerializeBufferSizeNew() const +B3_FORCE_INLINE int b3QuantizedBvh::calculateSerializeBufferSizeNew() const  {  	return sizeof(b3QuantizedBvhData);  } - - -#endif //B3_QUANTIZED_BVH_H +#endif  //B3_QUANTIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp index 4d97f7f62b..6b0c941f23 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp @@ -15,35 +15,32 @@ subject to the following restrictions:  #include "b3StridingMeshInterface.h" -  b3StridingMeshInterface::~b3StridingMeshInterface()  { -  } - -void	b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +void b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const  {  	(void)aabbMin;  	(void)aabbMax;  	int numtotalphysicsverts = 0; -	int part,graphicssubparts = getNumSubParts(); -	const unsigned char * vertexbase; -	const unsigned char * indexbase; +	int part, graphicssubparts = getNumSubParts(); +	const unsigned char* vertexbase; +	const unsigned char* indexbase;  	int indexstride;  	PHY_ScalarType type;  	PHY_ScalarType gfxindextype; -	int stride,numverts,numtriangles; +	int stride, numverts, numtriangles;  	int gfxindex;  	b3Vector3 triangle[3];  	b3Vector3 meshScaling = getScaling();  	///if the number of parts is big, the performance might drop due to the innerloop switch on indextype -	for (part=0;part<graphicssubparts ;part++) +	for (part = 0; part < graphicssubparts; part++)  	{ -		getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,type,stride,&indexbase,indexstride,numtriangles,gfxindextype,part); -		numtotalphysicsverts+=numtriangles*3; //upper bound +		getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numtriangles, gfxindextype, part); +		numtotalphysicsverts += numtriangles * 3;  //upper bound  		///unlike that developers want to pass in double-precision meshes in single-precision Bullet build  		///so disable this feature by default @@ -51,143 +48,141 @@ void	b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleInde  		switch (type)  		{ -		case PHY_FLOAT: -		 { - -			 float* graphicsbase; - -			 switch (gfxindextype) -			 { -			 case PHY_INTEGER: -				 { -					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++) -					 { -						 unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride); -						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); -						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); -						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); -						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ()); -						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); -						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ()); -						 callback->internalProcessTriangleIndex(triangle,part,gfxindex); -					 } -					 break; -				 } -			 case PHY_SHORT: -				 { -					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++) -					 { -						 unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride); -						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); -						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); -						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); -						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ()); -						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); -						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ()); -						 callback->internalProcessTriangleIndex(triangle,part,gfxindex); -					 } -					 break; -				 } -			case PHY_UCHAR: -				 { -					 for (gfxindex=0;gfxindex<numtriangles;gfxindex++) -					 { -						 unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride); -						 graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); -						 triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); -						 graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); -						 triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ()); -						 graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); -						 triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),	graphicsbase[2]*meshScaling.getZ()); -						 callback->internalProcessTriangleIndex(triangle,part,gfxindex); -					 } -					 break; -				 } -			 default: -				 b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); -			 } -			 break; -		 } - -		case PHY_DOUBLE: +			case PHY_FLOAT: +			{ +				float* graphicsbase; + +				switch (gfxindextype) +				{ +					case PHY_INTEGER: +					{ +						for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) +						{ +							unsigned int* tri_indices = (unsigned int*)(indexbase + gfxindex * indexstride); +							graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); +							triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); +							triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); +							triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							callback->internalProcessTriangleIndex(triangle, part, gfxindex); +						} +						break; +					} +					case PHY_SHORT: +					{ +						for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) +						{ +							unsigned short int* tri_indices = (unsigned short int*)(indexbase + gfxindex * indexstride); +							graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); +							triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); +							triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); +							triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							callback->internalProcessTriangleIndex(triangle, part, gfxindex); +						} +						break; +					} +					case PHY_UCHAR: +					{ +						for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) +						{ +							unsigned char* tri_indices = (unsigned char*)(indexbase + gfxindex * indexstride); +							graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); +							triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); +							triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); +							triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); +							callback->internalProcessTriangleIndex(triangle, part, gfxindex); +						} +						break; +					} +					default: +						b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); +				} +				break; +			} + +			case PHY_DOUBLE:  			{  				double* graphicsbase;  				switch (gfxindextype)  				{ -				case PHY_INTEGER: +					case PHY_INTEGER:  					{ -						for (gfxindex=0;gfxindex<numtriangles;gfxindex++) +						for (gfxindex = 0; gfxindex < numtriangles; gfxindex++)  						{ -							unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride); -							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); -							triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); -							triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),  (b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); -							triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),  (b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							callback->internalProcessTriangleIndex(triangle,part,gfxindex); +							unsigned int* tri_indices = (unsigned int*)(indexbase + gfxindex * indexstride); +							graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); +							triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); +							triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); +							triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							callback->internalProcessTriangleIndex(triangle, part, gfxindex);  						}  						break;  					} -				case PHY_SHORT: +					case PHY_SHORT:  					{ -						for (gfxindex=0;gfxindex<numtriangles;gfxindex++) +						for (gfxindex = 0; gfxindex < numtriangles; gfxindex++)  						{ -							unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride); -							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); -							triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); -							triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),  (b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); -							triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),  (b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							callback->internalProcessTriangleIndex(triangle,part,gfxindex); +							unsigned short int* tri_indices = (unsigned short int*)(indexbase + gfxindex * indexstride); +							graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); +							triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); +							triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); +							triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							callback->internalProcessTriangleIndex(triangle, part, gfxindex);  						}  						break;  					} -				case PHY_UCHAR: +					case PHY_UCHAR:  					{ -						for (gfxindex=0;gfxindex<numtriangles;gfxindex++) +						for (gfxindex = 0; gfxindex < numtriangles; gfxindex++)  						{ -							unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride); -							graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); -							triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); -							triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),  (b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); -							triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),  (b3Scalar)graphicsbase[2]*meshScaling.getZ()); -							callback->internalProcessTriangleIndex(triangle,part,gfxindex); +							unsigned char* tri_indices = (unsigned char*)(indexbase + gfxindex * indexstride); +							graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); +							triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); +							triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); +							triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); +							callback->internalProcessTriangleIndex(triangle, part, gfxindex);  						}  						break;  					} -				default: -					b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); +					default: +						b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT));  				}  				break;  			} -		default: -			b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE)); +			default: +				b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE));  		}  		unLockReadOnlyVertexBase(part);  	}  } -void	b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax) +void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin, b3Vector3& aabbMax)  { - -	struct	AabbCalculationCallback : public b3InternalTriangleIndexCallback +	struct AabbCalculationCallback : public b3InternalTriangleIndexCallback  	{ -		b3Vector3	m_aabbMin; -		b3Vector3	m_aabbMax; +		b3Vector3 m_aabbMin; +		b3Vector3 m_aabbMax;  		AabbCalculationCallback()  		{ -			m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); -			m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); +			m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); +			m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT));  		} -		virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int  triangleIndex) +		virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex)  		{  			(void)partId;  			(void)triangleIndex; @@ -202,13 +197,11 @@ void	b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin,b3Vecto  	};  	//first calculate the total aabb for all triangles -	AabbCalculationCallback	aabbCallback; -	aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); -	aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); -	InternalProcessAllTriangles(&aabbCallback,aabbMin,aabbMax); +	AabbCalculationCallback aabbCallback; +	aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); +	aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); +	InternalProcessAllTriangles(&aabbCallback, aabbMin, aabbMax);  	aabbMin = aabbCallback.m_aabbMin;  	aabbMax = aabbCallback.m_aabbMax;  } - - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h index 9513f68f77..087b30f3e6 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h @@ -20,148 +20,139 @@ subject to the following restrictions:  #include "b3TriangleCallback.h"  //#include "b3ConcaveShape.h" - -enum  	PHY_ScalarType {  -  PHY_FLOAT, PHY_DOUBLE, PHY_INTEGER, PHY_SHORT,  -  PHY_FIXEDPOINT88, PHY_UCHAR  +enum PHY_ScalarType +{ +	PHY_FLOAT, +	PHY_DOUBLE, +	PHY_INTEGER, +	PHY_SHORT, +	PHY_FIXEDPOINT88, +	PHY_UCHAR  }; -  ///	The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with b3BvhTriangleMeshShape and some other collision shapes.  /// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips.  /// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory. -B3_ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface +B3_ATTRIBUTE_ALIGNED16(class) +b3StridingMeshInterface  { -	protected: -	 -		b3Vector3 m_scaling; - -	public: -		B3_DECLARE_ALIGNED_ALLOCATOR(); -		 -		b3StridingMeshInterface() :m_scaling(b3MakeVector3(b3Scalar(1.),b3Scalar(1.),b3Scalar(1.))) -		{ - -		} - -		virtual ~b3StridingMeshInterface(); - - - -		virtual void	InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; - -		///brute force method to calculate aabb -		void	calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax); - -		/// get read and write access to a subpart of a triangle mesh -		/// this subpart has a continuous array of vertices and indices -		/// in this way the mesh can be handled as chunks of memory with striding -		/// very similar to OpenGL vertexarray support -		/// make a call to unLockVertexBase when the read and write access is finished	 -		virtual void	getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0)=0; -		 -		virtual void	getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const=0; -	 -		/// unLockVertexBase finishes the access to a subpart of the triangle mesh -		/// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished -		virtual void	unLockVertexBase(int subpart)=0; - -		virtual void	unLockReadOnlyVertexBase(int subpart) const=0; - - -		/// getNumSubParts returns the number of seperate subparts -		/// each subpart has a continuous array of vertices and indices -		virtual int		getNumSubParts() const=0; - -		virtual void	preallocateVertices(int numverts)=0; -		virtual void	preallocateIndices(int numindices)=0; - -		virtual bool	hasPremadeAabb() const { return false; } -		virtual void	setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const -                { -                        (void) aabbMin; -                        (void) aabbMax; -                } -		virtual void	getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const -        { -            (void) aabbMin; -            (void) aabbMax; -        } - -		const b3Vector3&	getScaling() const { -			return m_scaling; -		} -		void	setScaling(const b3Vector3& scaling) -		{ -			m_scaling = scaling; -		} - -		virtual	int	calculateSerializeBufferSize() const; - -		///fills the dataBuffer and returns the struct name (and 0 on failure) -		//virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const; - - +protected: +	b3Vector3 m_scaling; + +public: +	B3_DECLARE_ALIGNED_ALLOCATOR(); + +	b3StridingMeshInterface() : m_scaling(b3MakeVector3(b3Scalar(1.), b3Scalar(1.), b3Scalar(1.))) +	{ +	} + +	virtual ~b3StridingMeshInterface(); + +	virtual void InternalProcessAllTriangles(b3InternalTriangleIndexCallback * callback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; + +	///brute force method to calculate aabb +	void calculateAabbBruteForce(b3Vector3 & aabbMin, b3Vector3 & aabbMax); + +	/// get read and write access to a subpart of a triangle mesh +	/// this subpart has a continuous array of vertices and indices +	/// in this way the mesh can be handled as chunks of memory with striding +	/// very similar to OpenGL vertexarray support +	/// make a call to unLockVertexBase when the read and write access is finished +	virtual void getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& stride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) = 0; + +	virtual void getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& stride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) const = 0; + +	/// unLockVertexBase finishes the access to a subpart of the triangle mesh +	/// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished +	virtual void unLockVertexBase(int subpart) = 0; + +	virtual void unLockReadOnlyVertexBase(int subpart) const = 0; + +	/// getNumSubParts returns the number of seperate subparts +	/// each subpart has a continuous array of vertices and indices +	virtual int getNumSubParts() const = 0; + +	virtual void preallocateVertices(int numverts) = 0; +	virtual void preallocateIndices(int numindices) = 0; + +	virtual bool hasPremadeAabb() const { return false; } +	virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const +	{ +		(void)aabbMin; +		(void)aabbMax; +	} +	virtual void getPremadeAabb(b3Vector3 * aabbMin, b3Vector3 * aabbMax) const +	{ +		(void)aabbMin; +		(void)aabbMax; +	} + +	const b3Vector3& getScaling() const +	{ +		return m_scaling; +	} +	void setScaling(const b3Vector3& scaling) +	{ +		m_scaling = scaling; +	} + +	virtual int calculateSerializeBufferSize() const; + +	///fills the dataBuffer and returns the struct name (and 0 on failure) +	//virtual	const char*	serialize(void* dataBuffer, b3Serializer* serializer) const;  }; -struct	b3IntIndexData +struct b3IntIndexData  { -	int	m_value; +	int m_value;  }; -struct	b3ShortIntIndexData +struct b3ShortIntIndexData  {  	short m_value;  	char m_pad[2];  }; -struct	b3ShortIntIndexTripletData +struct b3ShortIntIndexTripletData  { -	short	m_values[3]; -	char	m_pad[2]; +	short m_values[3]; +	char m_pad[2];  }; -struct	b3CharIndexTripletData +struct b3CharIndexTripletData  {  	unsigned char m_values[3]; -	char	m_pad; +	char m_pad;  }; -  ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct	b3MeshPartData +struct b3MeshPartData  { -	b3Vector3FloatData			*m_vertices3f; -	b3Vector3DoubleData			*m_vertices3d; +	b3Vector3FloatData* m_vertices3f; +	b3Vector3DoubleData* m_vertices3d; -	b3IntIndexData				*m_indices32; -	b3ShortIntIndexTripletData	*m_3indices16; -	b3CharIndexTripletData		*m_3indices8; +	b3IntIndexData* m_indices32; +	b3ShortIntIndexTripletData* m_3indices16; +	b3CharIndexTripletData* m_3indices8; -	b3ShortIntIndexData			*m_indices16;//backwards compatibility +	b3ShortIntIndexData* m_indices16;  //backwards compatibility -	int                     m_numTriangles;//length of m_indices = m_numTriangles -	int                     m_numVertices; +	int m_numTriangles;  //length of m_indices = m_numTriangles +	int m_numVertices;  }; -  ///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct	b3StridingMeshInterfaceData +struct b3StridingMeshInterfaceData  { -	b3MeshPartData	*m_meshPartsPtr; -	b3Vector3FloatData	m_scaling; -	int	m_numMeshParts; +	b3MeshPartData* m_meshPartsPtr; +	b3Vector3FloatData m_scaling; +	int m_numMeshParts;  	char m_padding[4];  }; - - - -B3_FORCE_INLINE	int	b3StridingMeshInterface::calculateSerializeBufferSize() const +B3_FORCE_INLINE int b3StridingMeshInterface::calculateSerializeBufferSize() const  {  	return sizeof(b3StridingMeshInterfaceData);  } - - -#endif //B3_STRIDING_MESHINTERFACE_H +#endif  //B3_STRIDING_MESHINTERFACE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h index d073ee57c3..9ca1e22949 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h @@ -6,33 +6,29 @@  #include "Bullet3Common/b3AlignedObjectArray.h"  #include "b3VectorFloat4.h" -  struct b3GjkPairDetector; - - -inline b3Vector3 localGetSupportVertexWithMargin(const float4& supportVec,const struct b3ConvexPolyhedronData* hull,  -	const b3AlignedObjectArray<b3Vector3>& verticesA, b3Scalar margin) +inline b3Vector3 localGetSupportVertexWithMargin(const float4& supportVec, const struct b3ConvexPolyhedronData* hull, +												 const b3AlignedObjectArray<b3Vector3>& verticesA, b3Scalar margin)  { -	b3Vector3 supVec = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); +	b3Vector3 supVec = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));  	b3Scalar maxDot = b3Scalar(-B3_LARGE_FLOAT); -    // Here we take advantage of dot(a, b*c) = dot(a*b, c).  Note: This is true mathematically, but not numerically.  -    if( 0 < hull->m_numVertices ) -    { -        const b3Vector3 scaled = supportVec; -		int index = (int) scaled.maxDot( &verticesA[hull->m_vertexOffset], hull->m_numVertices, maxDot);  -        return verticesA[hull->m_vertexOffset+index]; -    } - -    return supVec; +	// Here we take advantage of dot(a, b*c) = dot(a*b, c).  Note: This is true mathematically, but not numerically. +	if (0 < hull->m_numVertices) +	{ +		const b3Vector3 scaled = supportVec; +		int index = (int)scaled.maxDot(&verticesA[hull->m_vertexOffset], hull->m_numVertices, maxDot); +		return verticesA[hull->m_vertexOffset + index]; +	} +	return supVec;  } -inline b3Vector3 localGetSupportVertexWithoutMargin(const float4& supportVec,const struct b3ConvexPolyhedronData* hull,  -	const b3AlignedObjectArray<b3Vector3>& verticesA) +inline b3Vector3 localGetSupportVertexWithoutMargin(const float4& supportVec, const struct b3ConvexPolyhedronData* hull, +													const b3AlignedObjectArray<b3Vector3>& verticesA)  { -	return localGetSupportVertexWithMargin(supportVec,hull,verticesA,0.f); +	return localGetSupportVertexWithMargin(supportVec, hull, verticesA, 0.f);  } -#endif //B3_SUPPORT_MAPPINGS_H +#endif  //B3_SUPPORT_MAPPINGS_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp index 9066451884..3908c6de89 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp @@ -17,12 +17,8 @@ subject to the following restrictions:  b3TriangleCallback::~b3TriangleCallback()  { -	  } -  b3InternalTriangleIndexCallback::~b3InternalTriangleIndexCallback()  { -  } - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h index 3059fa4f21..a0fd3e7ac7 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h @@ -18,13 +18,11 @@ subject to the following restrictions:  #include "Bullet3Common/b3Vector3.h" -  ///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles.  ///This callback is called by processAllTriangles for all b3ConcaveShape derived class, such as  b3BvhTriangleMeshShape, b3StaticPlaneShape and b3HeightfieldTerrainShape.  class b3TriangleCallback  {  public: -  	virtual ~b3TriangleCallback();  	virtual void processTriangle(b3Vector3* triangle, int partId, int triangleIndex) = 0;  }; @@ -32,11 +30,8 @@ public:  class b3InternalTriangleIndexCallback  {  public: -  	virtual ~b3InternalTriangleIndexCallback(); -	virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int  triangleIndex) = 0; +	virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) = 0;  }; - - -#endif //B3_TRIANGLE_CALLBACK_H +#endif  //B3_TRIANGLE_CALLBACK_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp index a0f59babbe..73faadbdd0 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp @@ -15,81 +15,76 @@ subject to the following restrictions:  #include "b3TriangleIndexVertexArray.h" -b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride) -: m_hasAabb(0) +b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles, int* triangleIndexBase, int triangleIndexStride, int numVertices, b3Scalar* vertexBase, int vertexStride) +	: m_hasAabb(0)  {  	b3IndexedMesh mesh;  	mesh.m_numTriangles = numTriangles; -	mesh.m_triangleIndexBase = (const unsigned char *)triangleIndexBase; +	mesh.m_triangleIndexBase = (const unsigned char*)triangleIndexBase;  	mesh.m_triangleIndexStride = triangleIndexStride;  	mesh.m_numVertices = numVertices; -	mesh.m_vertexBase = (const unsigned char *)vertexBase; +	mesh.m_vertexBase = (const unsigned char*)vertexBase;  	mesh.m_vertexStride = vertexStride;  	addIndexedMesh(mesh); -  }  b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray()  { -  } -void	b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) +void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart)  { -	b3Assert(subpart< getNumSubParts() ); +	b3Assert(subpart < getNumSubParts());  	b3IndexedMesh& mesh = m_indexedMeshes[subpart];  	numverts = mesh.m_numVertices; -	(*vertexbase) = (unsigned char *) mesh.m_vertexBase; +	(*vertexbase) = (unsigned char*)mesh.m_vertexBase; -   type = mesh.m_vertexType; +	type = mesh.m_vertexType;  	vertexStride = mesh.m_vertexStride;  	numfaces = mesh.m_numTriangles; -	(*indexbase) = (unsigned char *)mesh.m_triangleIndexBase; +	(*indexbase) = (unsigned char*)mesh.m_triangleIndexBase;  	indexstride = mesh.m_triangleIndexStride;  	indicestype = mesh.m_indexType;  } -void	b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const +void b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart) const  {  	const b3IndexedMesh& mesh = m_indexedMeshes[subpart];  	numverts = mesh.m_numVertices; -	(*vertexbase) = (const unsigned char *)mesh.m_vertexBase; +	(*vertexbase) = (const unsigned char*)mesh.m_vertexBase; + +	type = mesh.m_vertexType; -   type = mesh.m_vertexType; -     	vertexStride = mesh.m_vertexStride;  	numfaces = mesh.m_numTriangles; -	(*indexbase) = (const unsigned char *)mesh.m_triangleIndexBase; +	(*indexbase) = (const unsigned char*)mesh.m_triangleIndexBase;  	indexstride = mesh.m_triangleIndexStride;  	indicestype = mesh.m_indexType;  } -bool	b3TriangleIndexVertexArray::hasPremadeAabb() const +bool b3TriangleIndexVertexArray::hasPremadeAabb() const  {  	return (m_hasAabb == 1);  } - -void	b3TriangleIndexVertexArray::setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const +void b3TriangleIndexVertexArray::setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const  {  	m_aabbMin = aabbMin;  	m_aabbMax = aabbMax; -	m_hasAabb = 1; // this is intentionally an int see notes in header +	m_hasAabb = 1;  // this is intentionally an int see notes in header  } -void	b3TriangleIndexVertexArray::getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const +void b3TriangleIndexVertexArray::getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax) const  {  	*aabbMin = m_aabbMin;  	*aabbMax = m_aabbMax;  } - - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h index d26b2893bc..b6ceb8df10 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h @@ -20,62 +20,59 @@ subject to the following restrictions:  #include "Bullet3Common/b3AlignedObjectArray.h"  #include "Bullet3Common/b3Scalar.h" -  ///The b3IndexedMesh indexes a single vertex and index array. Multiple b3IndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh.  ///Instead of the number of indices, we pass the number of triangles. -B3_ATTRIBUTE_ALIGNED16( struct)	b3IndexedMesh +B3_ATTRIBUTE_ALIGNED16(struct) +b3IndexedMesh  {  	B3_DECLARE_ALIGNED_ALLOCATOR(); -   int                     m_numTriangles; -   const unsigned char *   m_triangleIndexBase; -   // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) -   int                     m_triangleIndexStride; -   int                     m_numVertices; -   const unsigned char *   m_vertexBase; -   // Size of a vertex, in bytes -   int                     m_vertexStride; - -   // The index type is set when adding an indexed mesh to the -   // b3TriangleIndexVertexArray, do not set it manually -   PHY_ScalarType m_indexType; - -   // The vertex type has a default type similar to Bullet's precision mode (float or double) -   // but can be set manually if you for example run Bullet with double precision but have -   // mesh data in single precision.. -   PHY_ScalarType m_vertexType; - - -   b3IndexedMesh() -	   :m_indexType(PHY_INTEGER), +	int m_numTriangles; +	const unsigned char* m_triangleIndexBase; +	// Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) +	int m_triangleIndexStride; +	int m_numVertices; +	const unsigned char* m_vertexBase; +	// Size of a vertex, in bytes +	int m_vertexStride; + +	// The index type is set when adding an indexed mesh to the +	// b3TriangleIndexVertexArray, do not set it manually +	PHY_ScalarType m_indexType; + +	// The vertex type has a default type similar to Bullet's precision mode (float or double) +	// but can be set manually if you for example run Bullet with double precision but have +	// mesh data in single precision.. +	PHY_ScalarType m_vertexType; + +	b3IndexedMesh() +		: m_indexType(PHY_INTEGER),  #ifdef B3_USE_DOUBLE_PRECISION -      m_vertexType(PHY_DOUBLE) -#else // B3_USE_DOUBLE_PRECISION -      m_vertexType(PHY_FLOAT) -#endif // B3_USE_DOUBLE_PRECISION -      { -      } -} -; - +		  m_vertexType(PHY_DOUBLE) +#else   // B3_USE_DOUBLE_PRECISION +		  m_vertexType(PHY_FLOAT) +#endif  // B3_USE_DOUBLE_PRECISION +	{ +	} +}; -typedef b3AlignedObjectArray<b3IndexedMesh>	IndexedMeshArray; +typedef b3AlignedObjectArray<b3IndexedMesh> IndexedMeshArray;  ///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays.  ///Additional meshes can be added using addIndexedMesh  ///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays.  ///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray. -B3_ATTRIBUTE_ALIGNED16( class) b3TriangleIndexVertexArray : public b3StridingMeshInterface +B3_ATTRIBUTE_ALIGNED16(class) +b3TriangleIndexVertexArray : public b3StridingMeshInterface  {  protected: -	IndexedMeshArray	m_indexedMeshes; +	IndexedMeshArray m_indexedMeshes;  	int m_pad[2]; -	mutable int m_hasAabb; // using int instead of bool to maintain alignment +	mutable int m_hasAabb;  // using int instead of bool to maintain alignment  	mutable b3Vector3 m_aabbMin;  	mutable b3Vector3 m_aabbMax;  public: -  	B3_DECLARE_ALIGNED_ALLOCATOR();  	b3TriangleIndexVertexArray() : m_hasAabb(0) @@ -85,49 +82,47 @@ public:  	virtual ~b3TriangleIndexVertexArray();  	//just to be backwards compatible -	b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride); -	 -	void	addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER) +	b3TriangleIndexVertexArray(int numTriangles, int* triangleIndexBase, int triangleIndexStride, int numVertices, b3Scalar* vertexBase, int vertexStride); + +	void addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER)  	{  		m_indexedMeshes.push_back(mesh); -		m_indexedMeshes[m_indexedMeshes.size()-1].m_indexType = indexType; +		m_indexedMeshes[m_indexedMeshes.size() - 1].m_indexType = indexType;  	} -	 -	 -	virtual void	getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0); -	virtual void	getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const; +	virtual void getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0); + +	virtual void getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) const;  	/// unLockVertexBase finishes the access to a subpart of the triangle mesh  	/// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished -	virtual void	unLockVertexBase(int subpart) {(void)subpart;} +	virtual void unLockVertexBase(int subpart) { (void)subpart; } -	virtual void	unLockReadOnlyVertexBase(int subpart) const {(void)subpart;} +	virtual void unLockReadOnlyVertexBase(int subpart) const { (void)subpart; }  	/// getNumSubParts returns the number of seperate subparts  	/// each subpart has a continuous array of vertices and indices -	virtual int		getNumSubParts() const {  +	virtual int getNumSubParts() const +	{  		return (int)m_indexedMeshes.size();  	} -	IndexedMeshArray&	getIndexedMeshArray() +	IndexedMeshArray& getIndexedMeshArray()  	{  		return m_indexedMeshes;  	} -	const IndexedMeshArray&	getIndexedMeshArray() const +	const IndexedMeshArray& getIndexedMeshArray() const  	{  		return m_indexedMeshes;  	} -	virtual void	preallocateVertices(int numverts){(void) numverts;} -	virtual void	preallocateIndices(int numindices){(void) numindices;} - -	virtual bool	hasPremadeAabb() const; -	virtual void	setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const; -	virtual void	getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const; +	virtual void preallocateVertices(int numverts) { (void)numverts; } +	virtual void preallocateIndices(int numindices) { (void)numindices; } -} -; +	virtual bool hasPremadeAabb() const; +	virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; +	virtual void getPremadeAabb(b3Vector3 * aabbMin, b3Vector3 * aabbMax) const; +}; -#endif //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H +#endif  //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h index f6f65f7719..5cc4b5a626 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h @@ -7,5 +7,4 @@  #define float4 b3Vector3  //#define make_float4(x,y,z,w) b3Vector4(x,y,z,w) - -#endif //B3_VECTOR_FLOAT4_H +#endif  //B3_VECTOR_FLOAT4_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp index cf3d5ef49d..dae61d4581 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp @@ -23,26 +23,24 @@ subject to the following restrictions:  */ -  #include "b3VoronoiSimplexSolver.h" -#define VERTA  0 -#define VERTB  1 -#define VERTC  2 -#define VERTD  3 +#define VERTA 0 +#define VERTB 1 +#define VERTC 2 +#define VERTD 3  #define B3_CATCH_DEGENERATE_TETRAHEDRON 1 -void	b3VoronoiSimplexSolver::removeVertex(int index) +void b3VoronoiSimplexSolver::removeVertex(int index)  { -	 -	b3Assert(m_numVertices>0); +	b3Assert(m_numVertices > 0);  	m_numVertices--;  	m_simplexVectorW[index] = m_simplexVectorW[m_numVertices];  	m_simplexPointsP[index] = m_simplexPointsP[m_numVertices];  	m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices];  } -void	b3VoronoiSimplexSolver::reduceVertices (const b3UsageBitfield& usedVerts) +void b3VoronoiSimplexSolver::reduceVertices(const b3UsageBitfield& usedVerts)  {  	if ((numVertices() >= 4) && (!usedVerts.usedVertexD))  		removeVertex(3); @@ -52,29 +50,22 @@ void	b3VoronoiSimplexSolver::reduceVertices (const b3UsageBitfield& usedVerts)  	if ((numVertices() >= 2) && (!usedVerts.usedVertexB))  		removeVertex(1); -	 +  	if ((numVertices() >= 1) && (!usedVerts.usedVertexA))  		removeVertex(0); -  } - - - -  //clear the simplex, remove all the vertices  void b3VoronoiSimplexSolver::reset()  {  	m_cachedValidClosest = false;  	m_numVertices = 0;  	m_needsUpdate = true; -	m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); +	m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT));  	m_cachedBC.reset();  } - - -	//add a vertex +//add a vertex  void b3VoronoiSimplexSolver::addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q)  {  	m_lastW = w; @@ -87,9 +78,8 @@ void b3VoronoiSimplexSolver::addVertex(const b3Vector3& w, const b3Vector3& p, c  	m_numVertices++;  } -bool	b3VoronoiSimplexSolver::updateClosestVectorAndPoints() +bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints()  { -	  	if (m_needsUpdate)  	{  		m_cachedBC.reset(); @@ -98,127 +88,131 @@ bool	b3VoronoiSimplexSolver::updateClosestVectorAndPoints()  		switch (numVertices())  		{ -		case 0: +			case 0:  				m_cachedValidClosest = false;  				break; -		case 1: +			case 1:  			{  				m_cachedP1 = m_simplexPointsP[0];  				m_cachedP2 = m_simplexPointsQ[0]; -				m_cachedV = m_cachedP1-m_cachedP2; //== m_simplexVectorW[0] +				m_cachedV = m_cachedP1 - m_cachedP2;  //== m_simplexVectorW[0]  				m_cachedBC.reset(); -				m_cachedBC.setBarycentricCoordinates(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); +				m_cachedBC.setBarycentricCoordinates(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));  				m_cachedValidClosest = m_cachedBC.isValid();  				break;  			}; -		case 2: +			case 2:  			{ -			//closest point origin from line segment -					const b3Vector3& from = m_simplexVectorW[0]; -					const b3Vector3& to = m_simplexVectorW[1]; -					b3Vector3 nearest; - -					b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); -					b3Vector3 diff = p - from; -					b3Vector3 v = to - from; -					b3Scalar t = v.dot(diff); -					 -					if (t > 0) { -						b3Scalar dotVV = v.dot(v); -						if (t < dotVV) { -							t /= dotVV; -							diff -= t*v; -							m_cachedBC.m_usedVertices.usedVertexA = true; -							m_cachedBC.m_usedVertices.usedVertexB = true; -						} else { -							t = 1; -							diff -= v; -							//reduce to 1 point -							m_cachedBC.m_usedVertices.usedVertexB = true; -						} -					} else +				//closest point origin from line segment +				const b3Vector3& from = m_simplexVectorW[0]; +				const b3Vector3& to = m_simplexVectorW[1]; +				b3Vector3 nearest; + +				b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); +				b3Vector3 diff = p - from; +				b3Vector3 v = to - from; +				b3Scalar t = v.dot(diff); + +				if (t > 0) +				{ +					b3Scalar dotVV = v.dot(v); +					if (t < dotVV)  					{ -						t = 0; -						//reduce to 1 point +						t /= dotVV; +						diff -= t * v;  						m_cachedBC.m_usedVertices.usedVertexA = true; +						m_cachedBC.m_usedVertices.usedVertexB = true; +					} +					else +					{ +						t = 1; +						diff -= v; +						//reduce to 1 point +						m_cachedBC.m_usedVertices.usedVertexB = true;  					} -					m_cachedBC.setBarycentricCoordinates(1-t,t); -					nearest = from + t*v; +				} +				else +				{ +					t = 0; +					//reduce to 1 point +					m_cachedBC.m_usedVertices.usedVertexA = true; +				} +				m_cachedBC.setBarycentricCoordinates(1 - t, t); +				nearest = from + t * v; -					m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]); -					m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]); -					m_cachedV = m_cachedP1 - m_cachedP2; -					 -					reduceVertices(m_cachedBC.m_usedVertices); +				m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]); +				m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]); +				m_cachedV = m_cachedP1 - m_cachedP2; -					m_cachedValidClosest = m_cachedBC.isValid(); -					break; +				reduceVertices(m_cachedBC.m_usedVertices); + +				m_cachedValidClosest = m_cachedBC.isValid(); +				break;  			} -		case 3:  -			{  -				//closest point origin from triangle  -				b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.));  +			case 3: +			{ +				//closest point origin from triangle +				b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); -				const b3Vector3& a = m_simplexVectorW[0];  -				const b3Vector3& b = m_simplexVectorW[1];  -				const b3Vector3& c = m_simplexVectorW[2];  +				const b3Vector3& a = m_simplexVectorW[0]; +				const b3Vector3& b = m_simplexVectorW[1]; +				const b3Vector3& c = m_simplexVectorW[2]; -				closestPtPointTriangle(p,a,b,c,m_cachedBC);  -				m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] +  -				m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] +  -				m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2];  +				closestPtPointTriangle(p, a, b, c, m_cachedBC); +				m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + +							 m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + +							 m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2]; -				m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] +  -				m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] +  -				m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2];  +				m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + +							 m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + +							 m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2]; -				m_cachedV = m_cachedP1-m_cachedP2;  +				m_cachedV = m_cachedP1 - m_cachedP2; -				reduceVertices (m_cachedBC.m_usedVertices);  -				m_cachedValidClosest = m_cachedBC.isValid();  +				reduceVertices(m_cachedBC.m_usedVertices); +				m_cachedValidClosest = m_cachedBC.isValid(); -				break;  +				break;  			} -		case 4: +			case 4:  			{ +				b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); -				 -				b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); -				  				const b3Vector3& a = m_simplexVectorW[0];  				const b3Vector3& b = m_simplexVectorW[1];  				const b3Vector3& c = m_simplexVectorW[2];  				const b3Vector3& d = m_simplexVectorW[3]; -				bool hasSeperation = closestPtPointTetrahedron(p,a,b,c,d,m_cachedBC); +				bool hasSeperation = closestPtPointTetrahedron(p, a, b, c, d, m_cachedBC);  				if (hasSeperation)  				{ -  					m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + -						m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + -						m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] + -						m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3]; +								 m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + +								 m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] + +								 m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3];  					m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + -						m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + -						m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] + -						m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3]; +								 m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + +								 m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] + +								 m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3]; -					m_cachedV = m_cachedP1-m_cachedP2; -					reduceVertices (m_cachedBC.m_usedVertices); -				} else +					m_cachedV = m_cachedP1 - m_cachedP2; +					reduceVertices(m_cachedBC.m_usedVertices); +				} +				else  				{ -//					printf("sub distance got penetration\n"); +					//					printf("sub distance got penetration\n");  					if (m_cachedBC.m_degenerate)  					{  						m_cachedValidClosest = false; -					} else +					} +					else  					{  						m_cachedValidClosest = true;  						//degenerate case == false, penetration = true + zero -						m_cachedV.setValue(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); +						m_cachedV.setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.));  					}  					break;  				} @@ -228,7 +222,7 @@ bool	b3VoronoiSimplexSolver::updateClosestVectorAndPoints()  				//closest point origin from tetrahedron  				break;  			} -		default: +			default:  			{  				m_cachedValidClosest = false;  			} @@ -236,7 +230,6 @@ bool	b3VoronoiSimplexSolver::updateClosestVectorAndPoints()  	}  	return m_cachedValidClosest; -  }  //return/calculate the closest vertex @@ -247,13 +240,11 @@ bool b3VoronoiSimplexSolver::closest(b3Vector3& v)  	return succes;  } - -  b3Scalar b3VoronoiSimplexSolver::maxVertex()  {  	int i, numverts = numVertices();  	b3Scalar maxV = b3Scalar(0.); -	for (i=0;i<numverts;i++) +	for (i = 0; i < numverts; i++)  	{  		b3Scalar curLen2 = m_simplexVectorW[i].length2();  		if (maxV < curLen2) @@ -262,13 +253,11 @@ b3Scalar b3VoronoiSimplexSolver::maxVertex()  	return maxV;  } - - -	//return the current simplex -int b3VoronoiSimplexSolver::getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vector3 *yBuf) const +//return the current simplex +int b3VoronoiSimplexSolver::getSimplex(b3Vector3* pBuf, b3Vector3* qBuf, b3Vector3* yBuf) const  {  	int i; -	for (i=0;i<numVertices();i++) +	for (i = 0; i < numVertices(); i++)  	{  		yBuf[i] = m_simplexVectorW[i];  		pBuf[i] = m_simplexPointsP[i]; @@ -277,20 +266,17 @@ int b3VoronoiSimplexSolver::getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vecto  	return numVertices();  } - - -  bool b3VoronoiSimplexSolver::inSimplex(const b3Vector3& w)  {  	bool found = false;  	int i, numverts = numVertices();  	//b3Scalar maxV = b3Scalar(0.); -	 +  	//w is in the current (reduced) simplex -	for (i=0;i<numverts;i++) +	for (i = 0; i < numverts; i++)  	{  #ifdef BT_USE_EQUAL_VERTEX_THRESHOLD -		if ( m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold) +		if (m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold)  #else  		if (m_simplexVectorW[i] == w)  #endif @@ -300,199 +286,190 @@ bool b3VoronoiSimplexSolver::inSimplex(const b3Vector3& w)  	//check in case lastW is already removed  	if (w == m_lastW)  		return true; -    	 +  	return found;  } -void b3VoronoiSimplexSolver::backup_closest(b3Vector3& v)  +void b3VoronoiSimplexSolver::backup_closest(b3Vector3& v)  {  	v = m_cachedV;  } - -bool b3VoronoiSimplexSolver::emptySimplex() const  +bool b3VoronoiSimplexSolver::emptySimplex() const  {  	return (numVertices() == 0); -  } -void b3VoronoiSimplexSolver::compute_points(b3Vector3& p1, b3Vector3& p2)  +void b3VoronoiSimplexSolver::compute_points(b3Vector3& p1, b3Vector3& p2)  {  	updateClosestVectorAndPoints();  	p1 = m_cachedP1;  	p2 = m_cachedP2; -  } - - - -bool	b3VoronoiSimplexSolver::closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c,b3SubSimplexClosestResult& result) +bool b3VoronoiSimplexSolver::closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, b3SubSimplexClosestResult& result)  {  	result.m_usedVertices.reset(); -    // Check if P in vertex region outside A -    b3Vector3 ab = b - a; -    b3Vector3 ac = c - a; -    b3Vector3 ap = p - a; -    b3Scalar d1 = ab.dot(ap); -    b3Scalar d2 = ac.dot(ap); -    if (d1 <= b3Scalar(0.0) && d2 <= b3Scalar(0.0))  +	// Check if P in vertex region outside A +	b3Vector3 ab = b - a; +	b3Vector3 ac = c - a; +	b3Vector3 ap = p - a; +	b3Scalar d1 = ab.dot(ap); +	b3Scalar d2 = ac.dot(ap); +	if (d1 <= b3Scalar(0.0) && d2 <= b3Scalar(0.0))  	{  		result.m_closestPointOnSimplex = a;  		result.m_usedVertices.usedVertexA = true; -		result.setBarycentricCoordinates(1,0,0); -		return true;// a; // barycentric coordinates (1,0,0) +		result.setBarycentricCoordinates(1, 0, 0); +		return true;  // a; // barycentric coordinates (1,0,0)  	} -    // Check if P in vertex region outside B -    b3Vector3 bp = p - b; -    b3Scalar d3 = ab.dot(bp); -    b3Scalar d4 = ac.dot(bp); -    if (d3 >= b3Scalar(0.0) && d4 <= d3)  +	// Check if P in vertex region outside B +	b3Vector3 bp = p - b; +	b3Scalar d3 = ab.dot(bp); +	b3Scalar d4 = ac.dot(bp); +	if (d3 >= b3Scalar(0.0) && d4 <= d3)  	{  		result.m_closestPointOnSimplex = b;  		result.m_usedVertices.usedVertexB = true; -		result.setBarycentricCoordinates(0,1,0); +		result.setBarycentricCoordinates(0, 1, 0); -		return true; // b; // barycentric coordinates (0,1,0) +		return true;  // b; // barycentric coordinates (0,1,0)  	} -    // Check if P in edge region of AB, if so return projection of P onto AB -    b3Scalar vc = d1*d4 - d3*d2; -    if (vc <= b3Scalar(0.0) && d1 >= b3Scalar(0.0) && d3 <= b3Scalar(0.0)) { -        b3Scalar v = d1 / (d1 - d3); +	// Check if P in edge region of AB, if so return projection of P onto AB +	b3Scalar vc = d1 * d4 - d3 * d2; +	if (vc <= b3Scalar(0.0) && d1 >= b3Scalar(0.0) && d3 <= b3Scalar(0.0)) +	{ +		b3Scalar v = d1 / (d1 - d3);  		result.m_closestPointOnSimplex = a + v * ab;  		result.m_usedVertices.usedVertexA = true;  		result.m_usedVertices.usedVertexB = true; -		result.setBarycentricCoordinates(1-v,v,0); +		result.setBarycentricCoordinates(1 - v, v, 0);  		return true; -        //return a + v * ab; // barycentric coordinates (1-v,v,0) -    } - -    // Check if P in vertex region outside C -    b3Vector3 cp = p - c; -    b3Scalar d5 = ab.dot(cp); -    b3Scalar d6 = ac.dot(cp); -    if (d6 >= b3Scalar(0.0) && d5 <= d6)  +		//return a + v * ab; // barycentric coordinates (1-v,v,0) +	} + +	// Check if P in vertex region outside C +	b3Vector3 cp = p - c; +	b3Scalar d5 = ab.dot(cp); +	b3Scalar d6 = ac.dot(cp); +	if (d6 >= b3Scalar(0.0) && d5 <= d6)  	{  		result.m_closestPointOnSimplex = c;  		result.m_usedVertices.usedVertexC = true; -		result.setBarycentricCoordinates(0,0,1); -		return true;//c; // barycentric coordinates (0,0,1) +		result.setBarycentricCoordinates(0, 0, 1); +		return true;  //c; // barycentric coordinates (0,0,1)  	} -    // Check if P in edge region of AC, if so return projection of P onto AC -    b3Scalar vb = d5*d2 - d1*d6; -    if (vb <= b3Scalar(0.0) && d2 >= b3Scalar(0.0) && d6 <= b3Scalar(0.0)) { -        b3Scalar w = d2 / (d2 - d6); +	// Check if P in edge region of AC, if so return projection of P onto AC +	b3Scalar vb = d5 * d2 - d1 * d6; +	if (vb <= b3Scalar(0.0) && d2 >= b3Scalar(0.0) && d6 <= b3Scalar(0.0)) +	{ +		b3Scalar w = d2 / (d2 - d6);  		result.m_closestPointOnSimplex = a + w * ac;  		result.m_usedVertices.usedVertexA = true;  		result.m_usedVertices.usedVertexC = true; -		result.setBarycentricCoordinates(1-w,0,w); +		result.setBarycentricCoordinates(1 - w, 0, w);  		return true; -        //return a + w * ac; // barycentric coordinates (1-w,0,w) -    } +		//return a + w * ac; // barycentric coordinates (1-w,0,w) +	} + +	// Check if P in edge region of BC, if so return projection of P onto BC +	b3Scalar va = d3 * d6 - d5 * d4; +	if (va <= b3Scalar(0.0) && (d4 - d3) >= b3Scalar(0.0) && (d5 - d6) >= b3Scalar(0.0)) +	{ +		b3Scalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6)); -    // Check if P in edge region of BC, if so return projection of P onto BC -    b3Scalar va = d3*d6 - d5*d4; -    if (va <= b3Scalar(0.0) && (d4 - d3) >= b3Scalar(0.0) && (d5 - d6) >= b3Scalar(0.0)) { -        b3Scalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6)); -		  		result.m_closestPointOnSimplex = b + w * (c - b);  		result.m_usedVertices.usedVertexB = true;  		result.m_usedVertices.usedVertexC = true; -		result.setBarycentricCoordinates(0,1-w,w); -		return true;		 -       // return b + w * (c - b); // barycentric coordinates (0,1-w,w) -    } - -    // P inside face region. Compute Q through its barycentric coordinates (u,v,w) -    b3Scalar denom = b3Scalar(1.0) / (va + vb + vc); -    b3Scalar v = vb * denom; -    b3Scalar w = vc * denom; -     +		result.setBarycentricCoordinates(0, 1 - w, w); +		return true; +		// return b + w * (c - b); // barycentric coordinates (0,1-w,w) +	} + +	// P inside face region. Compute Q through its barycentric coordinates (u,v,w) +	b3Scalar denom = b3Scalar(1.0) / (va + vb + vc); +	b3Scalar v = vb * denom; +	b3Scalar w = vc * denom; +  	result.m_closestPointOnSimplex = a + ab * v + ac * w;  	result.m_usedVertices.usedVertexA = true;  	result.m_usedVertices.usedVertexB = true;  	result.m_usedVertices.usedVertexC = true; -	result.setBarycentricCoordinates(1-v-w,v,w); -	 -	return true; -//	return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = b3Scalar(1.0) - v - w +	result.setBarycentricCoordinates(1 - v - w, v, w); +	return true; +	//	return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = b3Scalar(1.0) - v - w  } - - - -  /// Test if point p and d lie on opposite sides of plane through abc  int b3VoronoiSimplexSolver::pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d)  { -	b3Vector3 normal = (b-a).cross(c-a); +	b3Vector3 normal = (b - a).cross(c - a); -    b3Scalar signp = (p - a).dot(normal); // [AP AB AC] -    b3Scalar signd = (d - a).dot( normal); // [AD AB AC] +	b3Scalar signp = (p - a).dot(normal);  // [AP AB AC] +	b3Scalar signd = (d - a).dot(normal);  // [AD AB AC]  #ifdef B3_CATCH_DEGENERATE_TETRAHEDRON  #ifdef BT_USE_DOUBLE_PRECISION -if (signd * signd < (b3Scalar(1e-8) * b3Scalar(1e-8))) +	if (signd * signd < (b3Scalar(1e-8) * b3Scalar(1e-8)))  	{  		return -1;  	}  #else  	if (signd * signd < (b3Scalar(1e-4) * b3Scalar(1e-4)))  	{ -//		printf("affine dependent/degenerate\n");// +		//		printf("affine dependent/degenerate\n");//  		return -1;  	}  #endif  #endif  	// Points on opposite sides if expression signs are opposite -    return signp * signd < b3Scalar(0.); +	return signp * signd < b3Scalar(0.);  } - -bool	b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult) +bool b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult)  {  	b3SubSimplexClosestResult tempResult; -    // Start out assuming point inside all halfspaces, so closest to itself +	// Start out assuming point inside all halfspaces, so closest to itself  	finalResult.m_closestPointOnSimplex = p;  	finalResult.m_usedVertices.reset(); -    finalResult.m_usedVertices.usedVertexA = true; +	finalResult.m_usedVertices.usedVertexA = true;  	finalResult.m_usedVertices.usedVertexB = true;  	finalResult.m_usedVertices.usedVertexC = true;  	finalResult.m_usedVertices.usedVertexD = true; -    int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d); +	int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d);  	int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b); -  	int	pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c); -	int	pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a); - -   if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0) -   { -	   finalResult.m_degenerate = true; -	   return false; -   } +	int pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c); +	int pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a); -   if (!pointOutsideABC  && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC) -	 { -		 return false; -	 } +	if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0) +	{ +		finalResult.m_degenerate = true; +		return false; +	} +	if (!pointOutsideABC && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC) +	{ +		return false; +	} -    b3Scalar bestSqDist = FLT_MAX; -    // If point outside face abc then compute closest point on abc -	if (pointOutsideABC)  +	b3Scalar bestSqDist = FLT_MAX; +	// If point outside face abc then compute closest point on abc +	if (pointOutsideABC)  	{ -        closestPtPointTriangle(p, a, b, c,tempResult); +		closestPtPointTriangle(p, a, b, c, tempResult);  		b3Vector3 q = tempResult.m_closestPointOnSimplex; -		 -        b3Scalar sqDist = (q - p).dot( q - p); -        // Update best closest point if (squared) distance is less than current best -        if (sqDist < bestSqDist) { + +		b3Scalar sqDist = (q - p).dot(q - p); +		// Update best closest point if (squared) distance is less than current best +		if (sqDist < bestSqDist) +		{  			bestSqDist = sqDist;  			finalResult.m_closestPointOnSimplex = q;  			//convert result bitmask! @@ -501,25 +478,22 @@ bool	b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const  			finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB;  			finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC;  			finalResult.setBarycentricCoordinates( -					tempResult.m_barycentricCoords[VERTA], -					tempResult.m_barycentricCoords[VERTB], -					tempResult.m_barycentricCoords[VERTC], -					0 -			); - +				tempResult.m_barycentricCoords[VERTA], +				tempResult.m_barycentricCoords[VERTB], +				tempResult.m_barycentricCoords[VERTC], +				0);  		} -    } -   +	}  	// Repeat test for face acd -	if (pointOutsideACD)  +	if (pointOutsideACD)  	{ -        closestPtPointTriangle(p, a, c, d,tempResult); +		closestPtPointTriangle(p, a, c, d, tempResult);  		b3Vector3 q = tempResult.m_closestPointOnSimplex;  		//convert result bitmask! -        b3Scalar sqDist = (q - p).dot( q - p); -        if (sqDist < bestSqDist)  +		b3Scalar sqDist = (q - p).dot(q - p); +		if (sqDist < bestSqDist)  		{  			bestSqDist = sqDist;  			finalResult.m_closestPointOnSimplex = q; @@ -529,52 +503,46 @@ bool	b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const  			finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB;  			finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC;  			finalResult.setBarycentricCoordinates( -					tempResult.m_barycentricCoords[VERTA], -					0, -					tempResult.m_barycentricCoords[VERTB], -					tempResult.m_barycentricCoords[VERTC] -			); - +				tempResult.m_barycentricCoords[VERTA], +				0, +				tempResult.m_barycentricCoords[VERTB], +				tempResult.m_barycentricCoords[VERTC]);  		} -    } -    // Repeat test for face adb +	} +	// Repeat test for face adb -	  	if (pointOutsideADB)  	{ -		closestPtPointTriangle(p, a, d, b,tempResult); +		closestPtPointTriangle(p, a, d, b, tempResult);  		b3Vector3 q = tempResult.m_closestPointOnSimplex;  		//convert result bitmask! -        b3Scalar sqDist = (q - p).dot( q - p); -        if (sqDist < bestSqDist)  +		b3Scalar sqDist = (q - p).dot(q - p); +		if (sqDist < bestSqDist)  		{  			bestSqDist = sqDist;  			finalResult.m_closestPointOnSimplex = q;  			finalResult.m_usedVertices.reset();  			finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA;  			finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC; -			 +  			finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB;  			finalResult.setBarycentricCoordinates( -					tempResult.m_barycentricCoords[VERTA], -					tempResult.m_barycentricCoords[VERTC], -					0, -					tempResult.m_barycentricCoords[VERTB] -			); - +				tempResult.m_barycentricCoords[VERTA], +				tempResult.m_barycentricCoords[VERTC], +				0, +				tempResult.m_barycentricCoords[VERTB]);  		} -    } -    // Repeat test for face bdc -     +	} +	// Repeat test for face bdc  	if (pointOutsideBDC)  	{ -        closestPtPointTriangle(p, b, d, c,tempResult); +		closestPtPointTriangle(p, b, d, c, tempResult);  		b3Vector3 q = tempResult.m_closestPointOnSimplex;  		//convert result bitmask! -        b3Scalar sqDist = (q - p).dot( q - p); -        if (sqDist < bestSqDist)  +		b3Scalar sqDist = (q - p).dot(q - p); +		if (sqDist < bestSqDist)  		{  			bestSqDist = sqDist;  			finalResult.m_closestPointOnSimplex = q; @@ -585,25 +553,22 @@ bool	b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const  			finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB;  			finalResult.setBarycentricCoordinates( -					0, -					tempResult.m_barycentricCoords[VERTA], -					tempResult.m_barycentricCoords[VERTC], -					tempResult.m_barycentricCoords[VERTB] -			); - +				0, +				tempResult.m_barycentricCoords[VERTA], +				tempResult.m_barycentricCoords[VERTC], +				tempResult.m_barycentricCoords[VERTB]);  		} -    } +	}  	//help! we ended up full ! -	 +  	if (finalResult.m_usedVertices.usedVertexA &&  		finalResult.m_usedVertices.usedVertexB &&  		finalResult.m_usedVertices.usedVertexC && -		finalResult.m_usedVertices.usedVertexD)  +		finalResult.m_usedVertices.usedVertexD)  	{  		return true;  	} -    return true; +	return true;  } - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h index a6e27667d8..b40b169978 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h @@ -13,22 +13,19 @@ subject to the following restrictions:  3. This notice may not be removed or altered from any source distribution.  */ - -  #ifndef B3_VORONOI_SIMPLEX_SOLVER_H  #define B3_VORONOI_SIMPLEX_SOLVER_H  #include "Bullet3Common/b3Vector3.h" -  #define VORONOI_SIMPLEX_MAX_VERTS 5  ///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure  //#define BT_USE_EQUAL_VERTEX_THRESHOLD  #define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f - -struct b3UsageBitfield{ +struct b3UsageBitfield +{  	b3UsageBitfield()  	{  		reset(); @@ -41,137 +38,127 @@ struct b3UsageBitfield{  		usedVertexC = false;  		usedVertexD = false;  	} -	unsigned short usedVertexA	: 1; -	unsigned short usedVertexB	: 1; -	unsigned short usedVertexC	: 1; -	unsigned short usedVertexD	: 1; -	unsigned short unused1		: 1; -	unsigned short unused2		: 1; -	unsigned short unused3		: 1; -	unsigned short unused4		: 1; +	unsigned short usedVertexA : 1; +	unsigned short usedVertexB : 1; +	unsigned short usedVertexC : 1; +	unsigned short usedVertexD : 1; +	unsigned short unused1 : 1; +	unsigned short unused2 : 1; +	unsigned short unused3 : 1; +	unsigned short unused4 : 1;  }; - -struct	b3SubSimplexClosestResult +struct b3SubSimplexClosestResult  { -	b3Vector3	m_closestPointOnSimplex; +	b3Vector3 m_closestPointOnSimplex;  	//MASK for m_usedVertices -	//stores the simplex vertex-usage, using the MASK,  +	//stores the simplex vertex-usage, using the MASK,  	// if m_usedVertices & MASK then the related vertex is used -	b3UsageBitfield	m_usedVertices; -	b3Scalar	m_barycentricCoords[4]; +	b3UsageBitfield m_usedVertices; +	b3Scalar m_barycentricCoords[4];  	bool m_degenerate; -	void	reset() +	void reset()  	{  		m_degenerate = false;  		setBarycentricCoordinates();  		m_usedVertices.reset();  	} -	bool	isValid() +	bool isValid()  	{  		bool valid = (m_barycentricCoords[0] >= b3Scalar(0.)) && -			(m_barycentricCoords[1] >= b3Scalar(0.)) && -			(m_barycentricCoords[2] >= b3Scalar(0.)) && -			(m_barycentricCoords[3] >= b3Scalar(0.)); - +					 (m_barycentricCoords[1] >= b3Scalar(0.)) && +					 (m_barycentricCoords[2] >= b3Scalar(0.)) && +					 (m_barycentricCoords[3] >= b3Scalar(0.));  		return valid;  	} -	void	setBarycentricCoordinates(b3Scalar a=b3Scalar(0.),b3Scalar b=b3Scalar(0.),b3Scalar c=b3Scalar(0.),b3Scalar d=b3Scalar(0.)) +	void setBarycentricCoordinates(b3Scalar a = b3Scalar(0.), b3Scalar b = b3Scalar(0.), b3Scalar c = b3Scalar(0.), b3Scalar d = b3Scalar(0.))  	{  		m_barycentricCoords[0] = a;  		m_barycentricCoords[1] = b;  		m_barycentricCoords[2] = c;  		m_barycentricCoords[3] = d;  	} -  };  /// b3VoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin.  /// Can be used with GJK, as an alternative to Johnson distance algorithm. -B3_ATTRIBUTE_ALIGNED16(class) b3VoronoiSimplexSolver  +B3_ATTRIBUTE_ALIGNED16(class) +b3VoronoiSimplexSolver  {  public: -  	B3_DECLARE_ALIGNED_ALLOCATOR(); -	int	m_numVertices; - -	b3Vector3	m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS]; -	b3Vector3	m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS]; -	b3Vector3	m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS]; +	int m_numVertices; -	 +	b3Vector3 m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS]; +	b3Vector3 m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS]; +	b3Vector3 m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS]; -	b3Vector3	m_cachedP1; -	b3Vector3	m_cachedP2; -	b3Vector3	m_cachedV; -	b3Vector3	m_lastW; -	 -	b3Scalar	m_equalVertexThreshold; -	bool		m_cachedValidClosest; +	b3Vector3 m_cachedP1; +	b3Vector3 m_cachedP2; +	b3Vector3 m_cachedV; +	b3Vector3 m_lastW; +	b3Scalar m_equalVertexThreshold; +	bool m_cachedValidClosest;  	b3SubSimplexClosestResult m_cachedBC; -	bool	m_needsUpdate; -	 -	void	removeVertex(int index); -	void	reduceVertices (const b3UsageBitfield& usedVerts); -	bool	updateClosestVectorAndPoints(); +	bool m_needsUpdate; -	bool	closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult); -	int		pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d); -	bool	closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c,b3SubSimplexClosestResult& result); +	void removeVertex(int index); +	void reduceVertices(const b3UsageBitfield& usedVerts); +	bool updateClosestVectorAndPoints(); -public: +	bool closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult); +	int pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d); +	bool closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, b3SubSimplexClosestResult& result); +public:  	b3VoronoiSimplexSolver() -		:  m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD) +		: m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD)  	{  	} -	 void reset(); - -	 void addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q); +	void reset(); -	 void	setEqualVertexThreshold(b3Scalar threshold) -	 { -		 m_equalVertexThreshold = threshold; -	 } +	void addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q); -	 b3Scalar	getEqualVertexThreshold() const -	 { -		 return m_equalVertexThreshold; -	 } +	void setEqualVertexThreshold(b3Scalar threshold) +	{ +		m_equalVertexThreshold = threshold; +	} -	 bool closest(b3Vector3& v); +	b3Scalar getEqualVertexThreshold() const +	{ +		return m_equalVertexThreshold; +	} -	 b3Scalar maxVertex(); +	bool closest(b3Vector3 & v); -	 bool fullSimplex() const -	 { -		 return (m_numVertices == 4); -	 } +	b3Scalar maxVertex(); -	 int getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vector3 *yBuf) const; +	bool fullSimplex() const +	{ +		return (m_numVertices == 4); +	} -	 bool inSimplex(const b3Vector3& w); -	 -	 void backup_closest(b3Vector3& v) ; +	int getSimplex(b3Vector3 * pBuf, b3Vector3 * qBuf, b3Vector3 * yBuf) const; -	 bool emptySimplex() const ; +	bool inSimplex(const b3Vector3& w); -	 void compute_points(b3Vector3& p1, b3Vector3& p2) ; +	void backup_closest(b3Vector3 & v); -	 int numVertices() const  -	 { -		 return m_numVertices; -	 } +	bool emptySimplex() const; +	void compute_points(b3Vector3 & p1, b3Vector3 & p2); +	int numVertices() const +	{ +		return m_numVertices; +	}  }; -#endif //B3_VORONOI_SIMPLEX_SOLVER_H - +#endif  //B3_VORONOI_SIMPLEX_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h index 4b3b49eae8..f1df8a6970 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h @@ -1,258 +1,257 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* bvhTraversalKernelCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"typedef unsigned int u32;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes\n" -"	int	m_escapeIndexOrTriangleIndex;\n" -"} btQuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -"	float4		m_aabbMin;\n" -"	float4		m_aabbMax;\n" -"	float4		m_quantization;\n" -"	int			m_numNodes;\n" -"	int			m_numSubTrees;\n" -"	int			m_nodeOffset;\n" -"	int			m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int	getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeaf(const btQuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"	\n" -"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes, points to the root of the subtree\n" -"	int			m_rootNodeIndex;\n" -"	//4 bytes\n" -"	int			m_subtreeSize;\n" -"	int			m_padding[3];\n" -"} btBvhSubtreeInfo;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	int m_numChildShapes;\n" -"	int blaat2;\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"int testQuantizedAabbAgainstQuantizedAabb(\n" -"								const unsigned short int* aabbMin1,\n" -"								const unsigned short int* aabbMax1,\n" -"								const unsigned short int* aabbMin2,\n" -"								const unsigned short int* aabbMax2)\n" -"{\n" -"	//int overlap = 1;\n" -"	if (aabbMin1[0] > aabbMax2[0])\n" -"		return 0;\n" -"	if (aabbMax1[0] < aabbMin2[0])\n" -"		return 0;\n" -"	if (aabbMin1[1] > aabbMax2[1])\n" -"		return 0;\n" -"	if (aabbMax1[1] < aabbMin2[1])\n" -"		return 0;\n" -"	if (aabbMin1[2] > aabbMax2[2])\n" -"		return 0;\n" -"	if (aabbMax1[2] < aabbMin2[2])\n" -"		return 0;\n" -"	return 1;\n" -"	//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" -"	//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" -"	//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" -"	//return overlap;\n" -"}\n" -"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" -"{\n" -"	float4 clampedPoint = max(point2,bvhAabbMin);\n" -"	clampedPoint = min (clampedPoint, bvhAabbMax);\n" -"	float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" -"	if (isMax)\n" -"	{\n" -"		out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" -"		out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" -"		out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" -"	} else\n" -"	{\n" -"		out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" -"		out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" -"		out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" -"	}\n" -"}\n" -"// work-in-progress\n" -"__kernel void   bvhTraversalKernel( __global const int4* pairs, \n" -"									__global const BodyData* rigidBodies, \n" -"									__global const btCollidableGpu* collidables,\n" -"									__global btAabbCL* aabbs,\n" -"									__global int4* concavePairsOut,\n" -"									__global volatile int* numConcavePairsOut,\n" -"									__global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" -"									__global const btQuantizedBvhNode* quantizedNodesRoot,\n" -"									__global const b3BvhInfo* bvhInfos,\n" -"									int numPairs,\n" -"									int maxNumConcavePairsCapacity)\n" -"{\n" -"	int id = get_global_id(0);\n" -"	if (id>=numPairs)\n" -"		return;\n" -"	\n" -"	int bodyIndexA = pairs[id].x;\n" -"	int bodyIndexB = pairs[id].y;\n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"	//once the broadphase avoids static-static pairs, we can remove this test\n" -"	if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"	{\n" -"		return;\n" -"	}\n" -"		\n" -"	if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" -"		return;\n" -"	int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"		\n" -"	if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" -"		shapeTypeB!=SHAPE_SPHERE	&&\n" -"		shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" -"		)\n" -"		return;\n" -"	b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" -"	float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" -"	float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" -"	float4 bvhQuantization = bvhInfo.m_quantization;\n" -"	int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" -"	__global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" -"	__global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" -"	\n" -"	unsigned short int quantizedQueryAabbMin[3];\n" -"	unsigned short int quantizedQueryAabbMax[3];\n" -"	quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" -"	quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" -"	\n" -"	for (int i=0;i<numSubtreeHeaders;i++)\n" -"	{\n" -"		btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" -"				\n" -"		int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" -"		if (overlap != 0)\n" -"		{\n" -"			int startNodeIndex = subtree.m_rootNodeIndex;\n" -"			int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" -"			int curIndex = startNodeIndex;\n" -"			int escapeIndex;\n" -"			int isLeafNode;\n" -"			int aabbOverlap;\n" -"			while (curIndex < endNodeIndex)\n" -"			{\n" -"				btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" -"				aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" -"				isLeafNode = isLeaf(&rootNode);\n" -"				if (aabbOverlap)\n" -"				{\n" -"					if (isLeafNode)\n" -"					{\n" -"						int triangleIndex = getTriangleIndex(&rootNode);\n" -"						if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"						{\n" -"								int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -"								int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" -"								for (int b=0;b<numChildrenB;b++)\n" -"								{\n" -"									if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" -"									{\n" -"										int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -"										int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" -"										concavePairsOut[pairIdx+b] = newPair;\n" -"									}\n" -"								}\n" -"						} else\n" -"						{\n" -"							int pairIdx = atomic_inc(numConcavePairsOut);\n" -"							if (pairIdx<maxNumConcavePairsCapacity)\n" -"							{\n" -"								int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" -"								concavePairsOut[pairIdx] = newPair;\n" -"							}\n" -"						}\n" -"					} \n" -"					curIndex++;\n" -"				} else\n" -"				{\n" -"					if (isLeafNode)\n" -"					{\n" -"						curIndex++;\n" -"					} else\n" -"					{\n" -"						escapeIndex = getEscapeIndex(&rootNode);\n" -"						curIndex += escapeIndex;\n" -"					}\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"}\n" -; +static const char* bvhTraversalKernelCL = +	"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +	"//written by Erwin Coumans\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define SHAPE_SPHERE 7\n" +	"typedef unsigned int u32;\n" +	"#define MAX_NUM_PARTS_IN_BITS 10\n" +	"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +	"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes\n" +	"	int	m_escapeIndexOrTriangleIndex;\n" +	"} btQuantizedBvhNode;\n" +	"typedef struct\n" +	"{\n" +	"	float4		m_aabbMin;\n" +	"	float4		m_aabbMax;\n" +	"	float4		m_quantization;\n" +	"	int			m_numNodes;\n" +	"	int			m_numSubTrees;\n" +	"	int			m_nodeOffset;\n" +	"	int			m_subTreeOffset;\n" +	"} b3BvhInfo;\n" +	"int	getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int isLeaf(const btQuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"	\n" +	"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes, points to the root of the subtree\n" +	"	int			m_rootNodeIndex;\n" +	"	//4 bytes\n" +	"	int			m_subtreeSize;\n" +	"	int			m_padding[3];\n" +	"} btBvhSubtreeInfo;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	int m_numChildShapes;\n" +	"	int blaat2;\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"int testQuantizedAabbAgainstQuantizedAabb(\n" +	"								const unsigned short int* aabbMin1,\n" +	"								const unsigned short int* aabbMax1,\n" +	"								const unsigned short int* aabbMin2,\n" +	"								const unsigned short int* aabbMax2)\n" +	"{\n" +	"	//int overlap = 1;\n" +	"	if (aabbMin1[0] > aabbMax2[0])\n" +	"		return 0;\n" +	"	if (aabbMax1[0] < aabbMin2[0])\n" +	"		return 0;\n" +	"	if (aabbMin1[1] > aabbMax2[1])\n" +	"		return 0;\n" +	"	if (aabbMax1[1] < aabbMin2[1])\n" +	"		return 0;\n" +	"	if (aabbMin1[2] > aabbMax2[2])\n" +	"		return 0;\n" +	"	if (aabbMax1[2] < aabbMin2[2])\n" +	"		return 0;\n" +	"	return 1;\n" +	"	//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" +	"	//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" +	"	//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" +	"	//return overlap;\n" +	"}\n" +	"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" +	"{\n" +	"	float4 clampedPoint = max(point2,bvhAabbMin);\n" +	"	clampedPoint = min (clampedPoint, bvhAabbMax);\n" +	"	float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" +	"	if (isMax)\n" +	"	{\n" +	"		out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" +	"		out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" +	"		out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" +	"	} else\n" +	"	{\n" +	"		out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" +	"		out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" +	"		out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" +	"	}\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   bvhTraversalKernel( __global const int4* pairs, \n" +	"									__global const BodyData* rigidBodies, \n" +	"									__global const btCollidableGpu* collidables,\n" +	"									__global btAabbCL* aabbs,\n" +	"									__global int4* concavePairsOut,\n" +	"									__global volatile int* numConcavePairsOut,\n" +	"									__global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" +	"									__global const btQuantizedBvhNode* quantizedNodesRoot,\n" +	"									__global const b3BvhInfo* bvhInfos,\n" +	"									int numPairs,\n" +	"									int maxNumConcavePairsCapacity)\n" +	"{\n" +	"	int id = get_global_id(0);\n" +	"	if (id>=numPairs)\n" +	"		return;\n" +	"	\n" +	"	int bodyIndexA = pairs[id].x;\n" +	"	int bodyIndexB = pairs[id].y;\n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"	//once the broadphase avoids static-static pairs, we can remove this test\n" +	"	if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"	{\n" +	"		return;\n" +	"	}\n" +	"		\n" +	"	if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" +	"		return;\n" +	"	int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"		\n" +	"	if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" +	"		shapeTypeB!=SHAPE_SPHERE	&&\n" +	"		shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" +	"		)\n" +	"		return;\n" +	"	b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" +	"	float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" +	"	float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" +	"	float4 bvhQuantization = bvhInfo.m_quantization;\n" +	"	int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" +	"	__global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" +	"	__global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" +	"	\n" +	"	unsigned short int quantizedQueryAabbMin[3];\n" +	"	unsigned short int quantizedQueryAabbMax[3];\n" +	"	quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" +	"	quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" +	"	\n" +	"	for (int i=0;i<numSubtreeHeaders;i++)\n" +	"	{\n" +	"		btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" +	"				\n" +	"		int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" +	"		if (overlap != 0)\n" +	"		{\n" +	"			int startNodeIndex = subtree.m_rootNodeIndex;\n" +	"			int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" +	"			int curIndex = startNodeIndex;\n" +	"			int escapeIndex;\n" +	"			int isLeafNode;\n" +	"			int aabbOverlap;\n" +	"			while (curIndex < endNodeIndex)\n" +	"			{\n" +	"				btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" +	"				aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" +	"				isLeafNode = isLeaf(&rootNode);\n" +	"				if (aabbOverlap)\n" +	"				{\n" +	"					if (isLeafNode)\n" +	"					{\n" +	"						int triangleIndex = getTriangleIndex(&rootNode);\n" +	"						if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"						{\n" +	"								int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +	"								int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" +	"								for (int b=0;b<numChildrenB;b++)\n" +	"								{\n" +	"									if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" +	"									{\n" +	"										int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +	"										int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" +	"										concavePairsOut[pairIdx+b] = newPair;\n" +	"									}\n" +	"								}\n" +	"						} else\n" +	"						{\n" +	"							int pairIdx = atomic_inc(numConcavePairsOut);\n" +	"							if (pairIdx<maxNumConcavePairsCapacity)\n" +	"							{\n" +	"								int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" +	"								concavePairsOut[pairIdx] = newPair;\n" +	"							}\n" +	"						}\n" +	"					} \n" +	"					curIndex++;\n" +	"				} else\n" +	"				{\n" +	"					if (isLeafNode)\n" +	"					{\n" +	"						curIndex++;\n" +	"					} else\n" +	"					{\n" +	"						escapeIndex = getEscapeIndex(&rootNode);\n" +	"						curIndex += escapeIndex;\n" +	"					}\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h index 7ed4b382c3..74959a931c 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h @@ -1,1446 +1,1445 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* mprKernelsCL= \ -"/***\n" -" * ---------------------------------\n" -" * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" -" *\n" -" *  This file was ported from mpr.c file, part of libccd.\n" -" *  The Minkoski Portal Refinement implementation was ported \n" -" *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" -" *  at http://github.com/erwincoumans/bullet3\n" -" *\n" -" *  Distributed under the OSI-approved BSD License (the \"License\");\n" -" *  see <http://www.opensource.org/licenses/bsd-license.php>.\n" -" *  This software is distributed WITHOUT ANY WARRANTY; without even the\n" -" *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" -" *  See the License for more information.\n" -" */\n" -"#ifndef B3_MPR_PENETRATION_H\n" -"#define B3_MPR_PENETRATION_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -"	b3Float4				m_pos;\n" -"	b3Quat					m_quat;\n" -"	b3Float4				m_linVel;\n" -"	b3Float4				m_angVel;\n" -"	int 					m_collidableIdx;\n" -"	float 				m_invMass;\n" -"	float 				m_restituitionCoeff;\n" -"	float 				m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -"	b3Mat3x3 m_invInertiaWorld;\n" -"	b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -"	\n" -"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" -"#define B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"typedef struct b3GpuFace b3GpuFace_t;\n" -"struct b3GpuFace\n" -"{\n" -"	b3Float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"	int m_unusedPadding1;\n" -"	int m_unusedPadding2;\n" -"};\n" -"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" -"struct b3ConvexPolyhedronData\n" -"{\n" -"	b3Float4		m_localCenter;\n" -"	b3Float4		m_extents;\n" -"	b3Float4		mC;\n" -"	b3Float4		mE;\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"};\n" -"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -"	SHAPE_HEIGHT_FIELD=1,\n" -"	SHAPE_CONVEX_HULL=3,\n" -"	SHAPE_PLANE=4,\n" -"	SHAPE_CONCAVE_TRIMESH=5,\n" -"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -"	SHAPE_SPHERE=7,\n" -"	MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -"	b3Float4	m_childPosition;\n" -"	b3Quat		m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -"	int m_bodyIndexA;\n" -"	int m_bodyIndexB;\n" -"//	int	m_pairType;\n" -"	int m_childShapeIndexA;\n" -"	int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define B3_MPR_SQRT sqrt\n" -"#endif\n" -"#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" -"#define B3_MPR_FABS fabs\n" -"#define B3_MPR_TOLERANCE 1E-6f\n" -"#define B3_MPR_MAX_ITERATIONS 1000\n" -"struct _b3MprSupport_t \n" -"{\n" -"    b3Float4 v;  //!< Support point in minkowski sum\n" -"    b3Float4 v1; //!< Support point in obj1\n" -"    b3Float4 v2; //!< Support point in obj2\n" -"};\n" -"typedef struct _b3MprSupport_t b3MprSupport_t;\n" -"struct _b3MprSimplex_t \n" -"{\n" -"    b3MprSupport_t ps[4];\n" -"    int last; //!< index of last added point\n" -"};\n" -"typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" -"inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" -"{\n" -"    return &s->ps[idx];\n" -"}\n" -"inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" -"{\n" -"    s->last = size - 1;\n" -"}\n" -"inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" -"{\n" -"    return s->last + 1;\n" -"}\n" -"inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" -"{\n" -"    // here is no check on boundaries\n" -"    return &s->ps[idx];\n" -"}\n" -"inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" -"{\n" -"    *d = *s;\n" -"}\n" -"inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" -"{\n" -"    b3MprSupportCopy(s->ps + pos, a);\n" -"}\n" -"inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" -"{\n" -"    b3MprSupport_t supp;\n" -"    b3MprSupportCopy(&supp, &s->ps[pos1]);\n" -"    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" -"    b3MprSupportCopy(&s->ps[pos2], &supp);\n" -"}\n" -"inline int b3MprIsZero(float val)\n" -"{\n" -"    return B3_MPR_FABS(val) < FLT_EPSILON;\n" -"}\n" -"inline int b3MprEq(float _a, float _b)\n" -"{\n" -"    float ab;\n" -"    float a, b;\n" -"    ab = B3_MPR_FABS(_a - _b);\n" -"    if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" -"        return 1;\n" -"    a = B3_MPR_FABS(_a);\n" -"    b = B3_MPR_FABS(_b);\n" -"    if (b > a){\n" -"        return ab < FLT_EPSILON * b;\n" -"    }else{\n" -"        return ab < FLT_EPSILON * a;\n" -"    }\n" -"}\n" -"inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" -"{\n" -"    return b3MprEq((*a).x, (*b).x)\n" -"            && b3MprEq((*a).y, (*b).y)\n" -"            && b3MprEq((*a).z, (*b).z);\n" -"}\n" -"inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, 	b3ConstArray(b3Float4) verticesA)\n" -"{\n" -"	b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" -"	float maxDot = -B3_LARGE_FLOAT;\n" -"    if( 0 < hull->m_numVertices )\n" -"    {\n" -"        const b3Float4 scaled = supportVec;\n" -"		int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" -"        return verticesA[hull->m_vertexOffset+index];\n" -"    }\n" -"    return supVec;\n" -"}\n" -"B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"														const b3Float4* _dir, b3Float4* outp, int logme)\n" -"{\n" -"	//dir is in worldspace, move to local space\n" -"	\n" -"	b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" -"	b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" -"	\n" -"	b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" -"	\n" -"	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" -"	\n" -"	//find local support vertex\n" -"	int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" -"	\n" -"	b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" -"	__global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" -"	\n" -"	b3Float4 pInA;\n" -"	if (logme)\n" -"	{\n" -"		b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" -"		float maxDot = -B3_LARGE_FLOAT;\n" -"		if( 0 < hull->m_numVertices )\n" -"		{\n" -"			const b3Float4 scaled = localDir;\n" -"			int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" -"			pInA = cpuVertices[hull->m_vertexOffset+index];\n" -"			\n" -"		}\n" -"	} else\n" -"	{\n" -"		pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" -"	}\n" -"	//move vertex to world space\n" -"	*outp = b3TransformPoint(pInA,pos,orn);\n" -"	\n" -"}\n" -"inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"													const b3Float4* _dir, b3MprSupport_t *supp)\n" -"{\n" -"    b3Float4 dir;\n" -"	dir = *_dir;\n" -"	b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" -"    dir = *_dir*-1.f;\n" -"	b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" -"    supp->v = supp->v1 - supp->v2;\n" -"}\n" -"inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" -"{\n" -"    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" -"	center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" -"    center->v = center->v1 - center->v2;\n" -"}\n" -"inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" -"{\n" -"	(*v).x = x;\n" -"	(*v).y = y;\n" -"	(*v).z = z;\n" -"	(*v).w = 0.f;\n" -"}\n" -"inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" -"{\n" -"    (*v).x += (*w).x;\n" -"    (*v).y += (*w).y;\n" -"    (*v).z += (*w).z;\n" -"}\n" -"inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" -"{\n" -"    *v = *w;\n" -"}\n" -"inline void b3MprVec3Scale(b3Float4 *d, float k)\n" -"{\n" -"    *d *= k;\n" -"}\n" -"inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -"    float dot;\n" -"	dot = b3Dot3F4(*a,*b);\n" -"    return dot;\n" -"}\n" -"inline float b3MprVec3Len2(const b3Float4 *v)\n" -"{\n" -"    return b3MprVec3Dot(v, v);\n" -"}\n" -"inline void b3MprVec3Normalize(b3Float4 *d)\n" -"{\n" -"    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" -"    b3MprVec3Scale(d, k);\n" -"}\n" -"inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -"	*d = b3Cross3(*a,*b);\n" -"	\n" -"}\n" -"inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" -"{\n" -"	*d = *v - *w;\n" -"}\n" -"inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" -"{\n" -"    b3Float4 v2v1, v3v1;\n" -"    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b3MprVec3Cross(dir, &v2v1, &v3v1);\n" -"    b3MprVec3Normalize(dir);\n" -"}\n" -"inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" -"                                       const b3Float4 *dir)\n" -"{\n" -"    float dot;\n" -"    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" -"    return b3MprIsZero(dot) || dot > 0.f;\n" -"}\n" -"inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" -"                                     const b3MprSupport_t *v4,\n" -"                                     const b3Float4 *dir)\n" -"{\n" -"    float dv1, dv2, dv3, dv4;\n" -"    float dot1, dot2, dot3;\n" -"    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" -"    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" -"    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" -"    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" -"    dv4 = b3MprVec3Dot(&v4->v, dir);\n" -"    dot1 = dv4 - dv1;\n" -"    dot2 = dv4 - dv2;\n" -"    dot3 = dv4 - dv3;\n" -"    dot1 = B3_MPR_FMIN(dot1, dot2);\n" -"    dot1 = B3_MPR_FMIN(dot1, dot3);\n" -"    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" -"}\n" -"inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   \n" -"                                         const b3MprSupport_t *v4,\n" -"                                         const b3Float4 *dir)\n" -"{\n" -"    float dot;\n" -"    dot = b3MprVec3Dot(&v4->v, dir);\n" -"    return b3MprIsZero(dot) || dot > 0.f;\n" -"}\n" -"inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" -"                              const b3MprSupport_t *v4)\n" -"{\n" -"    float dot;\n" -"    b3Float4 v4v0;\n" -"    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" -"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" -"    if (dot > 0.f){\n" -"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" -"        if (dot > 0.f){\n" -"            b3MprSimplexSet(portal, 1, v4);\n" -"        }else{\n" -"            b3MprSimplexSet(portal, 3, v4);\n" -"        }\n" -"    }else{\n" -"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" -"        if (dot > 0.f){\n" -"            b3MprSimplexSet(portal, 2, v4);\n" -"        }else{\n" -"            b3MprSimplexSet(portal, 1, v4);\n" -"        }\n" -"    }\n" -"}\n" -"B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"													__global int*	hasSepAxis,\n" -"													b3MprSimplex_t *portal)\n" -"{\n" -"    b3Float4 dir, va, vb;\n" -"    float dot;\n" -"    int cont;\n" -"	\n" -"	\n" -"    // vertex 0 is center of portal\n" -"    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" -"    // vertex 0 is center of portal\n" -"    b3MprSimplexSetSize(portal, 1);\n" -"	\n" -"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" -"        // Portal's center lies on origin (0,0,0) => we know that objects\n" -"        // intersect but we would need to know penetration info.\n" -"        // So move center little bit...\n" -"        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" -"        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" -"    }\n" -"    // vertex 1 = support in direction of origin\n" -"    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Scale(&dir, -1.f);\n" -"    b3MprVec3Normalize(&dir);\n" -"    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" -"    b3MprSimplexSetSize(portal, 2);\n" -"    // test if origin isn't outside of v1\n" -"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" -"	\n" -"    if (b3MprIsZero(dot) || dot < 0.f)\n" -"        return -1;\n" -"    // vertex 2\n" -"    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" -"        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" -"            // origin lies on v1\n" -"            return 1;\n" -"        }else{\n" -"            // origin lies on v0-v1 segment\n" -"            return 2;\n" -"        }\n" -"    }\n" -"    b3MprVec3Normalize(&dir);\n" -"	 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" -"    \n" -"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" -"    if (b3MprIsZero(dot) || dot < 0.f)\n" -"        return -1;\n" -"    b3MprSimplexSetSize(portal, 3);\n" -"    // vertex 3 direction\n" -"    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -"                     &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" -"                     &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Cross(&dir, &va, &vb);\n" -"    b3MprVec3Normalize(&dir);\n" -"    // it is better to form portal faces to be oriented \"outside\" origin\n" -"    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" -"    if (dot > 0.f){\n" -"        b3MprSimplexSwap(portal, 1, 2);\n" -"        b3MprVec3Scale(&dir, -1.f);\n" -"    }\n" -"    while (b3MprSimplexSize(portal) < 4){\n" -"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" -"        \n" -"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" -"        if (b3MprIsZero(dot) || dot < 0.f)\n" -"            return -1;\n" -"        cont = 0;\n" -"        // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" -"        // continue\n" -"        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -"                          &b3MprSimplexPoint(portal, 3)->v);\n" -"        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" -"        if (dot < 0.f && !b3MprIsZero(dot)){\n" -"            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" -"            cont = 1;\n" -"        }\n" -"        if (!cont){\n" -"            // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" -"            // continue\n" -"            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" -"                              &b3MprSimplexPoint(portal, 2)->v);\n" -"            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" -"            if (dot < 0.f && !b3MprIsZero(dot)){\n" -"                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" -"                cont = 1;\n" -"            }\n" -"        }\n" -"        if (cont){\n" -"            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -"                             &b3MprSimplexPoint(portal, 0)->v);\n" -"            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" -"                             &b3MprSimplexPoint(portal, 0)->v);\n" -"            b3MprVec3Cross(&dir, &va, &vb);\n" -"            b3MprVec3Normalize(&dir);\n" -"        }else{\n" -"            b3MprSimplexSetSize(portal, 4);\n" -"        }\n" -"    }\n" -"    return 0;\n" -"}\n" -"B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"													b3MprSimplex_t *portal)\n" -"{\n" -"    b3Float4 dir;\n" -"    b3MprSupport_t v4;\n" -"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" -"    //while (1)\n" -"	{\n" -"        // compute direction outside the portal (from v0 throught v1,v2,v3\n" -"        // face)\n" -"        b3PortalDir(portal, &dir);\n" -"        // test if origin is inside the portal\n" -"        if (portalEncapsulesOrigin(portal, &dir))\n" -"            return 0;\n" -"        // get next support point\n" -"        \n" -"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" -"        // test if v4 can expand portal to contain origin and if portal\n" -"        // expanding doesn't reach given tolerance\n" -"        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" -"                || portalReachTolerance(portal, &v4, &dir))\n" -"		{\n" -"            return -1;\n" -"        }\n" -"        // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" -"        // difference (direction from v0).\n" -"        b3ExpandPortal(portal, &v4);\n" -"    }\n" -"    return -1;\n" -"}\n" -"B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" -"{\n" -"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"    b3Float4 dir;\n" -"    size_t i;\n" -"    float b[4], sum, inv;\n" -"    b3Float4 vec, p1, p2;\n" -"    b3PortalDir(portal, &dir);\n" -"    // use barycentric coordinates of tetrahedron to find origin\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" -"                       &b3MprSimplexPoint(portal, 2)->v);\n" -"    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" -"                       &b3MprSimplexPoint(portal, 2)->v);\n" -"    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" -"	sum = b[0] + b[1] + b[2] + b[3];\n" -"    if (b3MprIsZero(sum) || sum < 0.f){\n" -"		b[0] = 0.f;\n" -"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" -"                           &b3MprSimplexPoint(portal, 3)->v);\n" -"        b[1] = b3MprVec3Dot(&vec, &dir);\n" -"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" -"                           &b3MprSimplexPoint(portal, 1)->v);\n" -"        b[2] = b3MprVec3Dot(&vec, &dir);\n" -"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" -"                           &b3MprSimplexPoint(portal, 2)->v);\n" -"        b[3] = b3MprVec3Dot(&vec, &dir);\n" -"		sum = b[1] + b[2] + b[3];\n" -"	}\n" -"	inv = 1.f / sum;\n" -"    b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" -"    b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" -"    for (i = 0; i < 4; i++){\n" -"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" -"        b3MprVec3Scale(&vec, b[i]);\n" -"        b3MprVec3Add(&p1, &vec);\n" -"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" -"        b3MprVec3Scale(&vec, b[i]);\n" -"        b3MprVec3Add(&p2, &vec);\n" -"    }\n" -"    b3MprVec3Scale(&p1, inv);\n" -"    b3MprVec3Scale(&p2, inv);\n" -"    b3MprVec3Copy(pos, &p1);\n" -"    b3MprVec3Add(pos, &p2);\n" -"    b3MprVec3Scale(pos, 0.5);\n" -"}\n" -"inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -"    b3Float4 ab;\n" -"    b3MprVec3Sub2(&ab, a, b);\n" -"    return b3MprVec3Len2(&ab);\n" -"}\n" -"inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" -"                                                  const b3Float4 *x0,\n" -"                                                  const b3Float4 *b,\n" -"                                                  b3Float4 *witness)\n" -"{\n" -"    // The computation comes from solving equation of segment:\n" -"    //      S(t) = x0 + t.d\n" -"    //          where - x0 is initial point of segment\n" -"    //                - d is direction of segment from x0 (|d| > 0)\n" -"    //                - t belongs to <0, 1> interval\n" -"    // \n" -"    // Than, distance from a segment to some point P can be expressed:\n" -"    //      D(t) = |x0 + t.d - P|^2\n" -"    //          which is distance from any point on segment. Minimization\n" -"    //          of this function brings distance from P to segment.\n" -"    // Minimization of D(t) leads to simple quadratic equation that's\n" -"    // solving is straightforward.\n" -"    //\n" -"    // Bonus of this method is witness point for free.\n" -"    float dist, t;\n" -"    b3Float4 d, a;\n" -"    // direction of segment\n" -"    b3MprVec3Sub2(&d, b, x0);\n" -"    // precompute vector from P to x0\n" -"    b3MprVec3Sub2(&a, x0, P);\n" -"    t  = -1.f * b3MprVec3Dot(&a, &d);\n" -"    t /= b3MprVec3Len2(&d);\n" -"    if (t < 0.f || b3MprIsZero(t)){\n" -"        dist = b3MprVec3Dist2(x0, P);\n" -"        if (witness)\n" -"            b3MprVec3Copy(witness, x0);\n" -"    }else if (t > 1.f || b3MprEq(t, 1.f)){\n" -"        dist = b3MprVec3Dist2(b, P);\n" -"        if (witness)\n" -"            b3MprVec3Copy(witness, b);\n" -"    }else{\n" -"        if (witness){\n" -"            b3MprVec3Copy(witness, &d);\n" -"            b3MprVec3Scale(witness, t);\n" -"            b3MprVec3Add(witness, x0);\n" -"            dist = b3MprVec3Dist2(witness, P);\n" -"        }else{\n" -"            // recycling variables\n" -"            b3MprVec3Scale(&d, t);\n" -"            b3MprVec3Add(&d, &a);\n" -"            dist = b3MprVec3Len2(&d);\n" -"        }\n" -"    }\n" -"    return dist;\n" -"}\n" -"inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" -"                                const b3Float4 *x0, const b3Float4 *B,\n" -"                                const b3Float4 *C,\n" -"                                b3Float4 *witness)\n" -"{\n" -"    // Computation comes from analytic expression for triangle (x0, B, C)\n" -"    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" -"    // Then equation for distance is:\n" -"    //      D(s, t) = | T(s, t) - P |^2\n" -"    // This leads to minimization of quadratic function of two variables.\n" -"    // The solution from is taken only if s is between 0 and 1, t is\n" -"    // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" -"    // computed.\n" -"    b3Float4 d1, d2, a;\n" -"    float u, v, w, p, q, r;\n" -"    float s, t, dist, dist2;\n" -"    b3Float4 witness2;\n" -"    b3MprVec3Sub2(&d1, B, x0);\n" -"    b3MprVec3Sub2(&d2, C, x0);\n" -"    b3MprVec3Sub2(&a, x0, P);\n" -"    u = b3MprVec3Dot(&a, &a);\n" -"    v = b3MprVec3Dot(&d1, &d1);\n" -"    w = b3MprVec3Dot(&d2, &d2);\n" -"    p = b3MprVec3Dot(&a, &d1);\n" -"    q = b3MprVec3Dot(&a, &d2);\n" -"    r = b3MprVec3Dot(&d1, &d2);\n" -"    s = (q * r - w * p) / (w * v - r * r);\n" -"    t = (-s * r - q) / w;\n" -"    if ((b3MprIsZero(s) || s > 0.f)\n" -"            && (b3MprEq(s, 1.f) || s < 1.f)\n" -"            && (b3MprIsZero(t) || t > 0.f)\n" -"            && (b3MprEq(t, 1.f) || t < 1.f)\n" -"            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" -"        if (witness){\n" -"            b3MprVec3Scale(&d1, s);\n" -"            b3MprVec3Scale(&d2, t);\n" -"            b3MprVec3Copy(witness, x0);\n" -"            b3MprVec3Add(witness, &d1);\n" -"            b3MprVec3Add(witness, &d2);\n" -"            dist = b3MprVec3Dist2(witness, P);\n" -"        }else{\n" -"            dist  = s * s * v;\n" -"            dist += t * t * w;\n" -"            dist += 2.f * s * t * r;\n" -"            dist += 2.f * s * p;\n" -"            dist += 2.f * t * q;\n" -"            dist += u;\n" -"        }\n" -"    }else{\n" -"        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" -"        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" -"        if (dist2 < dist){\n" -"            dist = dist2;\n" -"            if (witness)\n" -"                b3MprVec3Copy(witness, &witness2);\n" -"        }\n" -"        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" -"        if (dist2 < dist){\n" -"            dist = dist2;\n" -"            if (witness)\n" -"                b3MprVec3Copy(witness, &witness2);\n" -"        }\n" -"    }\n" -"    return dist;\n" -"}\n" -"B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"                       b3MprSimplex_t *portal,\n" -"                       float *depth, b3Float4 *pdir, b3Float4 *pos)\n" -"{\n" -"    b3Float4 dir;\n" -"    b3MprSupport_t v4;\n" -"    unsigned long iterations;\n" -"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"    iterations = 1UL;\n" -"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" -"    //while (1)\n" -"	{\n" -"        // compute portal direction and obtain next support point\n" -"        b3PortalDir(portal, &dir);\n" -"        \n" -"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" -"        // reached tolerance -> find penetration info\n" -"        if (portalReachTolerance(portal, &v4, &dir)\n" -"                || iterations ==B3_MPR_MAX_ITERATIONS)\n" -"		{\n" -"            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" -"            *depth = B3_MPR_SQRT(*depth);\n" -"			\n" -"			if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" -"			{\n" -"				\n" -"				*pdir = dir;\n" -"			} \n" -"			b3MprVec3Normalize(pdir);\n" -"			\n" -"            // barycentric coordinates:\n" -"            b3FindPos(portal, pos);\n" -"            return;\n" -"        }\n" -"        b3ExpandPortal(portal, &v4);\n" -"        iterations++;\n" -"    }\n" -"}\n" -"B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" -"{\n" -"    // Touching contact on portal's v1 - so depth is zero and direction\n" -"    // is unimportant and pos can be guessed\n" -"    *depth = 0.f;\n" -"    b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"	b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" -"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" -"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" -"    b3MprVec3Scale(pos, 0.5);\n" -"}\n" -"B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" -"                              float *depth, b3Float4 *dir, b3Float4 *pos)\n" -"{\n" -"    \n" -"    // Origin lies on v0-v1 segment.\n" -"    // Depth is distance to v1, direction also and position must be\n" -"    // computed\n" -"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" -"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" -"    b3MprVec3Scale(pos, 0.5f);\n" -"    \n" -"    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" -"    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" -"    b3MprVec3Normalize(dir);\n" -"}\n" -"inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" -"					b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" -"					b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"					b3ConstArray(b3Collidable_t)	cpuCollidables,\n" -"					b3ConstArray(b3Float4)	cpuVertices,\n" -"					__global b3Float4* sepAxis,\n" -"					__global int*	hasSepAxis,\n" -"					float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" -"{\n" -"	\n" -"	 b3MprSimplex_t portal;\n" -"	 \n" -"//	if (!hasSepAxis[pairIndex])\n" -"	//	return -1;\n" -"	\n" -"	hasSepAxis[pairIndex] = 0;\n" -"	 int res;\n" -"    // Phase 1: Portal discovery\n" -"    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" -"	\n" -"	  \n" -"	//sepAxis[pairIndex] = *pdir;//or -dir?\n" -"	switch (res)\n" -"	{\n" -"	case 0:\n" -"		{\n" -"			// Phase 2: Portal refinement\n" -"		\n" -"			res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" -"			if (res < 0)\n" -"				return -1;\n" -"			// Phase 3. Penetration info\n" -"			b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" -"			hasSepAxis[pairIndex] = 1;\n" -"			sepAxis[pairIndex] = -*dirOut;\n" -"			break;\n" -"		}\n" -"	case 1:\n" -"		{\n" -"			 // Touching contact on portal's v1.\n" -"			b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" -"			break;\n" -"		}\n" -"	case 2:\n" -"		{\n" -"			\n" -"			b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" -"			break;\n" -"		}\n" -"	default:\n" -"		{\n" -"			hasSepAxis[pairIndex]=0;\n" -"			//if (res < 0)\n" -"			//{\n" -"				// Origin isn't inside portal - no collision.\n" -"				return -1;\n" -"			//}\n" -"		}\n" -"	};\n" -"	\n" -"	return 0;\n" -"};\n" -"#endif //B3_MPR_PENETRATION_H\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"typedef  struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -"	b3Float4	m_worldPosB[4];\n" -"//	b3Float4	m_localPosA[4];\n" -"//	b3Float4	m_localPosB[4];\n" -"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" -"	unsigned short  m_restituitionCoeffCmp;\n" -"	unsigned short  m_frictionCoeffCmp;\n" -"	int m_batchIdx;\n" -"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -"	int m_bodyBPtrAndSignBit;\n" -"	int	m_childIndexA;\n" -"	int	m_childIndexB;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -"	return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -"	contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"#ifdef cl_ext_atomic_counters_32\n" -"	#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"	#define counter32_t volatile __global int*\n" -"#endif\n" -"__kernel void   mprPenetrationKernel( __global int4* pairs,\n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global float4* separatingNormals,\n" -"																					__global int* hasSeparatingAxis,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int contactCapacity,\n" -"																					int numPairs)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"		\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			return;\n" -"		}\n" -"		\n" -"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -"		{\n" -"			return;\n" -"		}\n" -"		float depthOut;\n" -"		b3Float4 dirOut;\n" -"		b3Float4 posOut;\n" -"		int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" -"		\n" -"		\n" -"		\n" -"		\n" -"		if (res==0)\n" -"		{\n" -"			//add a contact\n" -"			int dstIdx;\n" -"			AppendInc( nGlobalContactsOut, dstIdx );\n" -"			if (dstIdx<contactCapacity)\n" -"			{\n" -"				pairs[pairIndex].z = dstIdx;\n" -"				__global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" -"				c->m_worldNormalOnB = -dirOut;//normal;\n" -"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"				c->m_batchIdx = pairIndex;\n" -"				int bodyA = pairs[pairIndex].x;\n" -"				int bodyB = pairs[pairIndex].y;\n" -"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" -"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" -"				c->m_childIndexA = -1;\n" -"				c->m_childIndexB = -1;\n" -"				//for (int i=0;i<nContacts;i++)\n" -"				posOut.w = -depthOut;\n" -"				c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" -"				GET_NPOINTS(*c) = 1;//nContacts;\n" -"			}\n" -"		}\n" -"	}\n" -"}\n" -"typedef float4 Quaternion;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"}\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"inline void project(__global const b3ConvexPolyhedronData_t* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"bool findSeparatingAxisUnitSphere(	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices,\n" -"	__global const float4* unitSphereDirections,\n" -"	int numUnitSphereDirections,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test unit sphere directions\n" -"	for (int i=0;i<numUnitSphereDirections;i++)\n" -"	{\n" -"		float4 crossje;\n" -"		crossje = unitSphereDirections[i];	\n" -"		if (dot3F4(DeltaC2,crossje)>0)\n" -"			crossje *= -1.f;\n" -"		{\n" -"			float dist;\n" -"			bool result = true;\n" -"			float Min0,Max0;\n" -"			float Min1,Max1;\n" -"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -"		\n" -"			if(Max0<Min1 || Max1<Min0)\n" -"				return false;\n" -"		\n" -"			float d0 = Max0 - Min1;\n" -"			float d1 = Max1 - Min0;\n" -"			dist = d0<d1 ? d0:d1;\n" -"			result = true;\n" -"	\n" -"			if(dist<*dmin)\n" -"			{\n" -"				*dmin = dist;\n" -"				*sep = crossje;\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"__kernel void   findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* unitSphereDirections,\n" -"																					__global  float4* separatingNormals,\n" -"																					__global  int* hasSeparatingAxis,\n" -"																					__global  float* dmins,\n" -"																					int numUnitSphereDirections,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"	\n" -"			int bodyIndexA = pairs[i].x;\n" -"			int bodyIndexB = pairs[i].y;\n" -"	\n" -"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"			\n" -"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	\n" -"			float dmin = dmins[i];\n" -"	\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			posA.w = 0.f;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			posB.w = 0.f;\n" -"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			float4 c0 = transform(&c0local, &posA, &ornA);\n" -"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"			float4 c1 = transform(&c1local,&posB,&ornB);\n" -"			const float4 DeltaC2 = c0 - c1;\n" -"			float4 sepNormal = separatingNormals[i];\n" -"			\n" -"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" -"			if (numEdgeEdgeDirections>numUnitSphereDirections)\n" -"			{\n" -"				bool sepEE = findSeparatingAxisUnitSphere(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																										posB,ornB,\n" -"																										DeltaC2,\n" -"																										vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" -"				if (!sepEE)\n" -"				{\n" -"					hasSeparatingAxis[i] = 0;\n" -"				} else\n" -"				{\n" -"					hasSeparatingAxis[i] = 1;\n" -"					separatingNormals[i] = sepNormal;\n" -"				}\n" -"			}\n" -"		}		//if (hasSeparatingAxis[i])\n" -"	}//(i<numPairs)\n" -"}\n" -; +static const char* mprKernelsCL = +	"/***\n" +	" * ---------------------------------\n" +	" * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" +	" *\n" +	" *  This file was ported from mpr.c file, part of libccd.\n" +	" *  The Minkoski Portal Refinement implementation was ported \n" +	" *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" +	" *  at http://github.com/erwincoumans/bullet3\n" +	" *\n" +	" *  Distributed under the OSI-approved BSD License (the \"License\");\n" +	" *  see <http://www.opensource.org/licenses/bsd-license.php>.\n" +	" *  This software is distributed WITHOUT ANY WARRANTY; without even the\n" +	" *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" +	" *  See the License for more information.\n" +	" */\n" +	"#ifndef B3_MPR_PENETRATION_H\n" +	"#define B3_MPR_PENETRATION_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_RIGIDBODY_DATA_H\n" +	"#define B3_RIGIDBODY_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +	"struct b3RigidBodyData\n" +	"{\n" +	"	b3Float4				m_pos;\n" +	"	b3Quat					m_quat;\n" +	"	b3Float4				m_linVel;\n" +	"	b3Float4				m_angVel;\n" +	"	int 					m_collidableIdx;\n" +	"	float 				m_invMass;\n" +	"	float 				m_restituitionCoeff;\n" +	"	float 				m_frictionCoeff;\n" +	"};\n" +	"typedef struct b3InertiaData b3InertiaData_t;\n" +	"struct b3InertiaData\n" +	"{\n" +	"	b3Mat3x3 m_invInertiaWorld;\n" +	"	b3Mat3x3 m_initInvInertia;\n" +	"};\n" +	"#endif //B3_RIGIDBODY_DATA_H\n" +	"	\n" +	"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#define B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"typedef struct b3GpuFace b3GpuFace_t;\n" +	"struct b3GpuFace\n" +	"{\n" +	"	b3Float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"	int m_unusedPadding1;\n" +	"	int m_unusedPadding2;\n" +	"};\n" +	"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" +	"struct b3ConvexPolyhedronData\n" +	"{\n" +	"	b3Float4		m_localCenter;\n" +	"	b3Float4		m_extents;\n" +	"	b3Float4		mC;\n" +	"	b3Float4		mE;\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"};\n" +	"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_COLLIDABLE_H\n" +	"#define B3_COLLIDABLE_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"enum b3ShapeTypes\n" +	"{\n" +	"	SHAPE_HEIGHT_FIELD=1,\n" +	"	SHAPE_CONVEX_HULL=3,\n" +	"	SHAPE_PLANE=4,\n" +	"	SHAPE_CONCAVE_TRIMESH=5,\n" +	"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +	"	SHAPE_SPHERE=7,\n" +	"	MAX_NUM_SHAPE_TYPES,\n" +	"};\n" +	"typedef struct b3Collidable b3Collidable_t;\n" +	"struct b3Collidable\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"};\n" +	"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" +	"struct b3GpuChildShape\n" +	"{\n" +	"	b3Float4	m_childPosition;\n" +	"	b3Quat		m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"struct b3CompoundOverlappingPair\n" +	"{\n" +	"	int m_bodyIndexA;\n" +	"	int m_bodyIndexB;\n" +	"//	int	m_pairType;\n" +	"	int m_childShapeIndexA;\n" +	"	int m_childShapeIndexB;\n" +	"};\n" +	"#endif //B3_COLLIDABLE_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#define B3_MPR_SQRT sqrt\n" +	"#endif\n" +	"#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" +	"#define B3_MPR_FABS fabs\n" +	"#define B3_MPR_TOLERANCE 1E-6f\n" +	"#define B3_MPR_MAX_ITERATIONS 1000\n" +	"struct _b3MprSupport_t \n" +	"{\n" +	"    b3Float4 v;  //!< Support point in minkowski sum\n" +	"    b3Float4 v1; //!< Support point in obj1\n" +	"    b3Float4 v2; //!< Support point in obj2\n" +	"};\n" +	"typedef struct _b3MprSupport_t b3MprSupport_t;\n" +	"struct _b3MprSimplex_t \n" +	"{\n" +	"    b3MprSupport_t ps[4];\n" +	"    int last; //!< index of last added point\n" +	"};\n" +	"typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" +	"inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" +	"{\n" +	"    return &s->ps[idx];\n" +	"}\n" +	"inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" +	"{\n" +	"    s->last = size - 1;\n" +	"}\n" +	"inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" +	"{\n" +	"    return s->last + 1;\n" +	"}\n" +	"inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" +	"{\n" +	"    // here is no check on boundaries\n" +	"    return &s->ps[idx];\n" +	"}\n" +	"inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" +	"{\n" +	"    *d = *s;\n" +	"}\n" +	"inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" +	"{\n" +	"    b3MprSupportCopy(s->ps + pos, a);\n" +	"}\n" +	"inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" +	"{\n" +	"    b3MprSupport_t supp;\n" +	"    b3MprSupportCopy(&supp, &s->ps[pos1]);\n" +	"    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" +	"    b3MprSupportCopy(&s->ps[pos2], &supp);\n" +	"}\n" +	"inline int b3MprIsZero(float val)\n" +	"{\n" +	"    return B3_MPR_FABS(val) < FLT_EPSILON;\n" +	"}\n" +	"inline int b3MprEq(float _a, float _b)\n" +	"{\n" +	"    float ab;\n" +	"    float a, b;\n" +	"    ab = B3_MPR_FABS(_a - _b);\n" +	"    if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" +	"        return 1;\n" +	"    a = B3_MPR_FABS(_a);\n" +	"    b = B3_MPR_FABS(_b);\n" +	"    if (b > a){\n" +	"        return ab < FLT_EPSILON * b;\n" +	"    }else{\n" +	"        return ab < FLT_EPSILON * a;\n" +	"    }\n" +	"}\n" +	"inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" +	"{\n" +	"    return b3MprEq((*a).x, (*b).x)\n" +	"            && b3MprEq((*a).y, (*b).y)\n" +	"            && b3MprEq((*a).z, (*b).z);\n" +	"}\n" +	"inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, 	b3ConstArray(b3Float4) verticesA)\n" +	"{\n" +	"	b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" +	"	float maxDot = -B3_LARGE_FLOAT;\n" +	"    if( 0 < hull->m_numVertices )\n" +	"    {\n" +	"        const b3Float4 scaled = supportVec;\n" +	"		int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" +	"        return verticesA[hull->m_vertexOffset+index];\n" +	"    }\n" +	"    return supVec;\n" +	"}\n" +	"B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"														const b3Float4* _dir, b3Float4* outp, int logme)\n" +	"{\n" +	"	//dir is in worldspace, move to local space\n" +	"	\n" +	"	b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" +	"	b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" +	"	\n" +	"	b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" +	"	\n" +	"	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" +	"	\n" +	"	//find local support vertex\n" +	"	int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" +	"	\n" +	"	b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" +	"	__global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" +	"	\n" +	"	b3Float4 pInA;\n" +	"	if (logme)\n" +	"	{\n" +	"		b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" +	"		float maxDot = -B3_LARGE_FLOAT;\n" +	"		if( 0 < hull->m_numVertices )\n" +	"		{\n" +	"			const b3Float4 scaled = localDir;\n" +	"			int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" +	"			pInA = cpuVertices[hull->m_vertexOffset+index];\n" +	"			\n" +	"		}\n" +	"	} else\n" +	"	{\n" +	"		pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" +	"	}\n" +	"	//move vertex to world space\n" +	"	*outp = b3TransformPoint(pInA,pos,orn);\n" +	"	\n" +	"}\n" +	"inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"													const b3Float4* _dir, b3MprSupport_t *supp)\n" +	"{\n" +	"    b3Float4 dir;\n" +	"	dir = *_dir;\n" +	"	b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" +	"    dir = *_dir*-1.f;\n" +	"	b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" +	"    supp->v = supp->v1 - supp->v2;\n" +	"}\n" +	"inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" +	"{\n" +	"    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" +	"	center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" +	"    center->v = center->v1 - center->v2;\n" +	"}\n" +	"inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" +	"{\n" +	"	(*v).x = x;\n" +	"	(*v).y = y;\n" +	"	(*v).z = z;\n" +	"	(*v).w = 0.f;\n" +	"}\n" +	"inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" +	"{\n" +	"    (*v).x += (*w).x;\n" +	"    (*v).y += (*w).y;\n" +	"    (*v).z += (*w).z;\n" +	"}\n" +	"inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" +	"{\n" +	"    *v = *w;\n" +	"}\n" +	"inline void b3MprVec3Scale(b3Float4 *d, float k)\n" +	"{\n" +	"    *d *= k;\n" +	"}\n" +	"inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" +	"{\n" +	"    float dot;\n" +	"	dot = b3Dot3F4(*a,*b);\n" +	"    return dot;\n" +	"}\n" +	"inline float b3MprVec3Len2(const b3Float4 *v)\n" +	"{\n" +	"    return b3MprVec3Dot(v, v);\n" +	"}\n" +	"inline void b3MprVec3Normalize(b3Float4 *d)\n" +	"{\n" +	"    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" +	"    b3MprVec3Scale(d, k);\n" +	"}\n" +	"inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" +	"{\n" +	"	*d = b3Cross3(*a,*b);\n" +	"	\n" +	"}\n" +	"inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" +	"{\n" +	"	*d = *v - *w;\n" +	"}\n" +	"inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" +	"{\n" +	"    b3Float4 v2v1, v3v1;\n" +	"    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b3MprVec3Cross(dir, &v2v1, &v3v1);\n" +	"    b3MprVec3Normalize(dir);\n" +	"}\n" +	"inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" +	"                                       const b3Float4 *dir)\n" +	"{\n" +	"    float dot;\n" +	"    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" +	"    return b3MprIsZero(dot) || dot > 0.f;\n" +	"}\n" +	"inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" +	"                                     const b3MprSupport_t *v4,\n" +	"                                     const b3Float4 *dir)\n" +	"{\n" +	"    float dv1, dv2, dv3, dv4;\n" +	"    float dot1, dot2, dot3;\n" +	"    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" +	"    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" +	"    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" +	"    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" +	"    dv4 = b3MprVec3Dot(&v4->v, dir);\n" +	"    dot1 = dv4 - dv1;\n" +	"    dot2 = dv4 - dv2;\n" +	"    dot3 = dv4 - dv3;\n" +	"    dot1 = B3_MPR_FMIN(dot1, dot2);\n" +	"    dot1 = B3_MPR_FMIN(dot1, dot3);\n" +	"    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" +	"}\n" +	"inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   \n" +	"                                         const b3MprSupport_t *v4,\n" +	"                                         const b3Float4 *dir)\n" +	"{\n" +	"    float dot;\n" +	"    dot = b3MprVec3Dot(&v4->v, dir);\n" +	"    return b3MprIsZero(dot) || dot > 0.f;\n" +	"}\n" +	"inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" +	"                              const b3MprSupport_t *v4)\n" +	"{\n" +	"    float dot;\n" +	"    b3Float4 v4v0;\n" +	"    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" +	"    if (dot > 0.f){\n" +	"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" +	"        if (dot > 0.f){\n" +	"            b3MprSimplexSet(portal, 1, v4);\n" +	"        }else{\n" +	"            b3MprSimplexSet(portal, 3, v4);\n" +	"        }\n" +	"    }else{\n" +	"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" +	"        if (dot > 0.f){\n" +	"            b3MprSimplexSet(portal, 2, v4);\n" +	"        }else{\n" +	"            b3MprSimplexSet(portal, 1, v4);\n" +	"        }\n" +	"    }\n" +	"}\n" +	"B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"													__global int*	hasSepAxis,\n" +	"													b3MprSimplex_t *portal)\n" +	"{\n" +	"    b3Float4 dir, va, vb;\n" +	"    float dot;\n" +	"    int cont;\n" +	"	\n" +	"	\n" +	"    // vertex 0 is center of portal\n" +	"    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" +	"    // vertex 0 is center of portal\n" +	"    b3MprSimplexSetSize(portal, 1);\n" +	"	\n" +	"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" +	"        // Portal's center lies on origin (0,0,0) => we know that objects\n" +	"        // intersect but we would need to know penetration info.\n" +	"        // So move center little bit...\n" +	"        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" +	"        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" +	"    }\n" +	"    // vertex 1 = support in direction of origin\n" +	"    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Scale(&dir, -1.f);\n" +	"    b3MprVec3Normalize(&dir);\n" +	"    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" +	"    b3MprSimplexSetSize(portal, 2);\n" +	"    // test if origin isn't outside of v1\n" +	"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" +	"	\n" +	"    if (b3MprIsZero(dot) || dot < 0.f)\n" +	"        return -1;\n" +	"    // vertex 2\n" +	"    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" +	"        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" +	"            // origin lies on v1\n" +	"            return 1;\n" +	"        }else{\n" +	"            // origin lies on v0-v1 segment\n" +	"            return 2;\n" +	"        }\n" +	"    }\n" +	"    b3MprVec3Normalize(&dir);\n" +	"	 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" +	"    \n" +	"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" +	"    if (b3MprIsZero(dot) || dot < 0.f)\n" +	"        return -1;\n" +	"    b3MprSimplexSetSize(portal, 3);\n" +	"    // vertex 3 direction\n" +	"    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                     &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                     &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Cross(&dir, &va, &vb);\n" +	"    b3MprVec3Normalize(&dir);\n" +	"    // it is better to form portal faces to be oriented \"outside\" origin\n" +	"    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    if (dot > 0.f){\n" +	"        b3MprSimplexSwap(portal, 1, 2);\n" +	"        b3MprVec3Scale(&dir, -1.f);\n" +	"    }\n" +	"    while (b3MprSimplexSize(portal) < 4){\n" +	"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" +	"        \n" +	"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" +	"        if (b3MprIsZero(dot) || dot < 0.f)\n" +	"            return -1;\n" +	"        cont = 0;\n" +	"        // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" +	"        // continue\n" +	"        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                          &b3MprSimplexPoint(portal, 3)->v);\n" +	"        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" +	"        if (dot < 0.f && !b3MprIsZero(dot)){\n" +	"            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" +	"            cont = 1;\n" +	"        }\n" +	"        if (!cont){\n" +	"            // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" +	"            // continue\n" +	"            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                              &b3MprSimplexPoint(portal, 2)->v);\n" +	"            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" +	"            if (dot < 0.f && !b3MprIsZero(dot)){\n" +	"                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" +	"                cont = 1;\n" +	"            }\n" +	"        }\n" +	"        if (cont){\n" +	"            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                             &b3MprSimplexPoint(portal, 0)->v);\n" +	"            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                             &b3MprSimplexPoint(portal, 0)->v);\n" +	"            b3MprVec3Cross(&dir, &va, &vb);\n" +	"            b3MprVec3Normalize(&dir);\n" +	"        }else{\n" +	"            b3MprSimplexSetSize(portal, 4);\n" +	"        }\n" +	"    }\n" +	"    return 0;\n" +	"}\n" +	"B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"													b3MprSimplex_t *portal)\n" +	"{\n" +	"    b3Float4 dir;\n" +	"    b3MprSupport_t v4;\n" +	"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" +	"    //while (1)\n" +	"	{\n" +	"        // compute direction outside the portal (from v0 throught v1,v2,v3\n" +	"        // face)\n" +	"        b3PortalDir(portal, &dir);\n" +	"        // test if origin is inside the portal\n" +	"        if (portalEncapsulesOrigin(portal, &dir))\n" +	"            return 0;\n" +	"        // get next support point\n" +	"        \n" +	"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" +	"        // test if v4 can expand portal to contain origin and if portal\n" +	"        // expanding doesn't reach given tolerance\n" +	"        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" +	"                || portalReachTolerance(portal, &v4, &dir))\n" +	"		{\n" +	"            return -1;\n" +	"        }\n" +	"        // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" +	"        // difference (direction from v0).\n" +	"        b3ExpandPortal(portal, &v4);\n" +	"    }\n" +	"    return -1;\n" +	"}\n" +	"B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" +	"{\n" +	"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"    b3Float4 dir;\n" +	"    size_t i;\n" +	"    float b[4], sum, inv;\n" +	"    b3Float4 vec, p1, p2;\n" +	"    b3PortalDir(portal, &dir);\n" +	"    // use barycentric coordinates of tetrahedron to find origin\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                       &b3MprSimplexPoint(portal, 2)->v);\n" +	"    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                       &b3MprSimplexPoint(portal, 2)->v);\n" +	"    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" +	"	sum = b[0] + b[1] + b[2] + b[3];\n" +	"    if (b3MprIsZero(sum) || sum < 0.f){\n" +	"		b[0] = 0.f;\n" +	"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                           &b3MprSimplexPoint(portal, 3)->v);\n" +	"        b[1] = b3MprVec3Dot(&vec, &dir);\n" +	"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                           &b3MprSimplexPoint(portal, 1)->v);\n" +	"        b[2] = b3MprVec3Dot(&vec, &dir);\n" +	"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                           &b3MprSimplexPoint(portal, 2)->v);\n" +	"        b[3] = b3MprVec3Dot(&vec, &dir);\n" +	"		sum = b[1] + b[2] + b[3];\n" +	"	}\n" +	"	inv = 1.f / sum;\n" +	"    b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" +	"    b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" +	"    for (i = 0; i < 4; i++){\n" +	"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" +	"        b3MprVec3Scale(&vec, b[i]);\n" +	"        b3MprVec3Add(&p1, &vec);\n" +	"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" +	"        b3MprVec3Scale(&vec, b[i]);\n" +	"        b3MprVec3Add(&p2, &vec);\n" +	"    }\n" +	"    b3MprVec3Scale(&p1, inv);\n" +	"    b3MprVec3Scale(&p2, inv);\n" +	"    b3MprVec3Copy(pos, &p1);\n" +	"    b3MprVec3Add(pos, &p2);\n" +	"    b3MprVec3Scale(pos, 0.5);\n" +	"}\n" +	"inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" +	"{\n" +	"    b3Float4 ab;\n" +	"    b3MprVec3Sub2(&ab, a, b);\n" +	"    return b3MprVec3Len2(&ab);\n" +	"}\n" +	"inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" +	"                                                  const b3Float4 *x0,\n" +	"                                                  const b3Float4 *b,\n" +	"                                                  b3Float4 *witness)\n" +	"{\n" +	"    // The computation comes from solving equation of segment:\n" +	"    //      S(t) = x0 + t.d\n" +	"    //          where - x0 is initial point of segment\n" +	"    //                - d is direction of segment from x0 (|d| > 0)\n" +	"    //                - t belongs to <0, 1> interval\n" +	"    // \n" +	"    // Than, distance from a segment to some point P can be expressed:\n" +	"    //      D(t) = |x0 + t.d - P|^2\n" +	"    //          which is distance from any point on segment. Minimization\n" +	"    //          of this function brings distance from P to segment.\n" +	"    // Minimization of D(t) leads to simple quadratic equation that's\n" +	"    // solving is straightforward.\n" +	"    //\n" +	"    // Bonus of this method is witness point for free.\n" +	"    float dist, t;\n" +	"    b3Float4 d, a;\n" +	"    // direction of segment\n" +	"    b3MprVec3Sub2(&d, b, x0);\n" +	"    // precompute vector from P to x0\n" +	"    b3MprVec3Sub2(&a, x0, P);\n" +	"    t  = -1.f * b3MprVec3Dot(&a, &d);\n" +	"    t /= b3MprVec3Len2(&d);\n" +	"    if (t < 0.f || b3MprIsZero(t)){\n" +	"        dist = b3MprVec3Dist2(x0, P);\n" +	"        if (witness)\n" +	"            b3MprVec3Copy(witness, x0);\n" +	"    }else if (t > 1.f || b3MprEq(t, 1.f)){\n" +	"        dist = b3MprVec3Dist2(b, P);\n" +	"        if (witness)\n" +	"            b3MprVec3Copy(witness, b);\n" +	"    }else{\n" +	"        if (witness){\n" +	"            b3MprVec3Copy(witness, &d);\n" +	"            b3MprVec3Scale(witness, t);\n" +	"            b3MprVec3Add(witness, x0);\n" +	"            dist = b3MprVec3Dist2(witness, P);\n" +	"        }else{\n" +	"            // recycling variables\n" +	"            b3MprVec3Scale(&d, t);\n" +	"            b3MprVec3Add(&d, &a);\n" +	"            dist = b3MprVec3Len2(&d);\n" +	"        }\n" +	"    }\n" +	"    return dist;\n" +	"}\n" +	"inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" +	"                                const b3Float4 *x0, const b3Float4 *B,\n" +	"                                const b3Float4 *C,\n" +	"                                b3Float4 *witness)\n" +	"{\n" +	"    // Computation comes from analytic expression for triangle (x0, B, C)\n" +	"    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" +	"    // Then equation for distance is:\n" +	"    //      D(s, t) = | T(s, t) - P |^2\n" +	"    // This leads to minimization of quadratic function of two variables.\n" +	"    // The solution from is taken only if s is between 0 and 1, t is\n" +	"    // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" +	"    // computed.\n" +	"    b3Float4 d1, d2, a;\n" +	"    float u, v, w, p, q, r;\n" +	"    float s, t, dist, dist2;\n" +	"    b3Float4 witness2;\n" +	"    b3MprVec3Sub2(&d1, B, x0);\n" +	"    b3MprVec3Sub2(&d2, C, x0);\n" +	"    b3MprVec3Sub2(&a, x0, P);\n" +	"    u = b3MprVec3Dot(&a, &a);\n" +	"    v = b3MprVec3Dot(&d1, &d1);\n" +	"    w = b3MprVec3Dot(&d2, &d2);\n" +	"    p = b3MprVec3Dot(&a, &d1);\n" +	"    q = b3MprVec3Dot(&a, &d2);\n" +	"    r = b3MprVec3Dot(&d1, &d2);\n" +	"    s = (q * r - w * p) / (w * v - r * r);\n" +	"    t = (-s * r - q) / w;\n" +	"    if ((b3MprIsZero(s) || s > 0.f)\n" +	"            && (b3MprEq(s, 1.f) || s < 1.f)\n" +	"            && (b3MprIsZero(t) || t > 0.f)\n" +	"            && (b3MprEq(t, 1.f) || t < 1.f)\n" +	"            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" +	"        if (witness){\n" +	"            b3MprVec3Scale(&d1, s);\n" +	"            b3MprVec3Scale(&d2, t);\n" +	"            b3MprVec3Copy(witness, x0);\n" +	"            b3MprVec3Add(witness, &d1);\n" +	"            b3MprVec3Add(witness, &d2);\n" +	"            dist = b3MprVec3Dist2(witness, P);\n" +	"        }else{\n" +	"            dist  = s * s * v;\n" +	"            dist += t * t * w;\n" +	"            dist += 2.f * s * t * r;\n" +	"            dist += 2.f * s * p;\n" +	"            dist += 2.f * t * q;\n" +	"            dist += u;\n" +	"        }\n" +	"    }else{\n" +	"        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" +	"        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" +	"        if (dist2 < dist){\n" +	"            dist = dist2;\n" +	"            if (witness)\n" +	"                b3MprVec3Copy(witness, &witness2);\n" +	"        }\n" +	"        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" +	"        if (dist2 < dist){\n" +	"            dist = dist2;\n" +	"            if (witness)\n" +	"                b3MprVec3Copy(witness, &witness2);\n" +	"        }\n" +	"    }\n" +	"    return dist;\n" +	"}\n" +	"B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"                       b3MprSimplex_t *portal,\n" +	"                       float *depth, b3Float4 *pdir, b3Float4 *pos)\n" +	"{\n" +	"    b3Float4 dir;\n" +	"    b3MprSupport_t v4;\n" +	"    unsigned long iterations;\n" +	"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"    iterations = 1UL;\n" +	"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" +	"    //while (1)\n" +	"	{\n" +	"        // compute portal direction and obtain next support point\n" +	"        b3PortalDir(portal, &dir);\n" +	"        \n" +	"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" +	"        // reached tolerance -> find penetration info\n" +	"        if (portalReachTolerance(portal, &v4, &dir)\n" +	"                || iterations ==B3_MPR_MAX_ITERATIONS)\n" +	"		{\n" +	"            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" +	"            *depth = B3_MPR_SQRT(*depth);\n" +	"			\n" +	"			if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" +	"			{\n" +	"				\n" +	"				*pdir = dir;\n" +	"			} \n" +	"			b3MprVec3Normalize(pdir);\n" +	"			\n" +	"            // barycentric coordinates:\n" +	"            b3FindPos(portal, pos);\n" +	"            return;\n" +	"        }\n" +	"        b3ExpandPortal(portal, &v4);\n" +	"        iterations++;\n" +	"    }\n" +	"}\n" +	"B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" +	"{\n" +	"    // Touching contact on portal's v1 - so depth is zero and direction\n" +	"    // is unimportant and pos can be guessed\n" +	"    *depth = 0.f;\n" +	"    b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"	b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" +	"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" +	"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" +	"    b3MprVec3Scale(pos, 0.5);\n" +	"}\n" +	"B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" +	"                              float *depth, b3Float4 *dir, b3Float4 *pos)\n" +	"{\n" +	"    \n" +	"    // Origin lies on v0-v1 segment.\n" +	"    // Depth is distance to v1, direction also and position must be\n" +	"    // computed\n" +	"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" +	"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" +	"    b3MprVec3Scale(pos, 0.5f);\n" +	"    \n" +	"    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" +	"    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" +	"    b3MprVec3Normalize(dir);\n" +	"}\n" +	"inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" +	"					b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" +	"					b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"					b3ConstArray(b3Collidable_t)	cpuCollidables,\n" +	"					b3ConstArray(b3Float4)	cpuVertices,\n" +	"					__global b3Float4* sepAxis,\n" +	"					__global int*	hasSepAxis,\n" +	"					float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" +	"{\n" +	"	\n" +	"	 b3MprSimplex_t portal;\n" +	"	 \n" +	"//	if (!hasSepAxis[pairIndex])\n" +	"	//	return -1;\n" +	"	\n" +	"	hasSepAxis[pairIndex] = 0;\n" +	"	 int res;\n" +	"    // Phase 1: Portal discovery\n" +	"    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" +	"	\n" +	"	  \n" +	"	//sepAxis[pairIndex] = *pdir;//or -dir?\n" +	"	switch (res)\n" +	"	{\n" +	"	case 0:\n" +	"		{\n" +	"			// Phase 2: Portal refinement\n" +	"		\n" +	"			res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" +	"			if (res < 0)\n" +	"				return -1;\n" +	"			// Phase 3. Penetration info\n" +	"			b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" +	"			hasSepAxis[pairIndex] = 1;\n" +	"			sepAxis[pairIndex] = -*dirOut;\n" +	"			break;\n" +	"		}\n" +	"	case 1:\n" +	"		{\n" +	"			 // Touching contact on portal's v1.\n" +	"			b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" +	"			break;\n" +	"		}\n" +	"	case 2:\n" +	"		{\n" +	"			\n" +	"			b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" +	"			break;\n" +	"		}\n" +	"	default:\n" +	"		{\n" +	"			hasSepAxis[pairIndex]=0;\n" +	"			//if (res < 0)\n" +	"			//{\n" +	"				// Origin isn't inside portal - no collision.\n" +	"				return -1;\n" +	"			//}\n" +	"		}\n" +	"	};\n" +	"	\n" +	"	return 0;\n" +	"};\n" +	"#endif //B3_MPR_PENETRATION_H\n" +	"#ifndef B3_CONTACT4DATA_H\n" +	"#define B3_CONTACT4DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"typedef  struct b3Contact4Data b3Contact4Data_t;\n" +	"struct b3Contact4Data\n" +	"{\n" +	"	b3Float4	m_worldPosB[4];\n" +	"//	b3Float4	m_localPosA[4];\n" +	"//	b3Float4	m_localPosB[4];\n" +	"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" +	"	unsigned short  m_restituitionCoeffCmp;\n" +	"	unsigned short  m_frictionCoeffCmp;\n" +	"	int m_batchIdx;\n" +	"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +	"	int m_bodyBPtrAndSignBit;\n" +	"	int	m_childIndexA;\n" +	"	int	m_childIndexB;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +	"{\n" +	"	return (int)contact->m_worldNormalOnB.w;\n" +	"};\n" +	"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +	"{\n" +	"	contact->m_worldNormalOnB.w = (float)numPoints;\n" +	"};\n" +	"#endif //B3_CONTACT4DATA_H\n" +	"#define AppendInc(x, out) out = atomic_inc(x)\n" +	"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +	"#ifdef cl_ext_atomic_counters_32\n" +	"	#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +	"#else\n" +	"	#define counter32_t volatile __global int*\n" +	"#endif\n" +	"__kernel void   mprPenetrationKernel( __global int4* pairs,\n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global float4* separatingNormals,\n" +	"																					__global int* hasSeparatingAxis,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int contactCapacity,\n" +	"																					int numPairs)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"		\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		float depthOut;\n" +	"		b3Float4 dirOut;\n" +	"		b3Float4 posOut;\n" +	"		int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" +	"		\n" +	"		\n" +	"		\n" +	"		\n" +	"		if (res==0)\n" +	"		{\n" +	"			//add a contact\n" +	"			int dstIdx;\n" +	"			AppendInc( nGlobalContactsOut, dstIdx );\n" +	"			if (dstIdx<contactCapacity)\n" +	"			{\n" +	"				pairs[pairIndex].z = dstIdx;\n" +	"				__global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" +	"				c->m_worldNormalOnB = -dirOut;//normal;\n" +	"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"				c->m_batchIdx = pairIndex;\n" +	"				int bodyA = pairs[pairIndex].x;\n" +	"				int bodyB = pairs[pairIndex].y;\n" +	"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +	"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +	"				c->m_childIndexA = -1;\n" +	"				c->m_childIndexB = -1;\n" +	"				//for (int i=0;i<nContacts;i++)\n" +	"				posOut.w = -depthOut;\n" +	"				c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" +	"				GET_NPOINTS(*c) = 1;//nContacts;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"}\n" +	"typedef float4 Quaternion;\n" +	"#define make_float4 (float4)\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"}\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"inline void project(__global const b3ConvexPolyhedronData_t* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"bool findSeparatingAxisUnitSphere(	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices,\n" +	"	__global const float4* unitSphereDirections,\n" +	"	int numUnitSphereDirections,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test unit sphere directions\n" +	"	for (int i=0;i<numUnitSphereDirections;i++)\n" +	"	{\n" +	"		float4 crossje;\n" +	"		crossje = unitSphereDirections[i];	\n" +	"		if (dot3F4(DeltaC2,crossje)>0)\n" +	"			crossje *= -1.f;\n" +	"		{\n" +	"			float dist;\n" +	"			bool result = true;\n" +	"			float Min0,Max0;\n" +	"			float Min1,Max1;\n" +	"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +	"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +	"		\n" +	"			if(Max0<Min1 || Max1<Min0)\n" +	"				return false;\n" +	"		\n" +	"			float d0 = Max0 - Min1;\n" +	"			float d1 = Max1 - Min0;\n" +	"			dist = d0<d1 ? d0:d1;\n" +	"			result = true;\n" +	"	\n" +	"			if(dist<*dmin)\n" +	"			{\n" +	"				*dmin = dist;\n" +	"				*sep = crossje;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"__kernel void   findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* unitSphereDirections,\n" +	"																					__global  float4* separatingNormals,\n" +	"																					__global  int* hasSeparatingAxis,\n" +	"																					__global  float* dmins,\n" +	"																					int numUnitSphereDirections,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"	\n" +	"			int bodyIndexA = pairs[i].x;\n" +	"			int bodyIndexB = pairs[i].y;\n" +	"	\n" +	"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"			\n" +	"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	\n" +	"			float dmin = dmins[i];\n" +	"	\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			posA.w = 0.f;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			posB.w = 0.f;\n" +	"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"			const float4 DeltaC2 = c0 - c1;\n" +	"			float4 sepNormal = separatingNormals[i];\n" +	"			\n" +	"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" +	"			if (numEdgeEdgeDirections>numUnitSphereDirections)\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisUnitSphere(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																										posB,ornB,\n" +	"																										DeltaC2,\n" +	"																										vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" +	"				if (!sepEE)\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 0;\n" +	"				} else\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 1;\n" +	"					separatingNormals[i] = sepNormal;\n" +	"				}\n" +	"			}\n" +	"		}		//if (hasSeparatingAxis[i])\n" +	"	}//(i<numPairs)\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index b0103fe674..b2e0a2dd47 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -1,1289 +1,1288 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* primitiveContactsKernelsCL= \ -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef  struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -"	b3Float4	m_worldPosB[4];\n" -"//	b3Float4	m_localPosA[4];\n" -"//	b3Float4	m_localPosB[4];\n" -"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" -"	unsigned short  m_restituitionCoeffCmp;\n" -"	unsigned short  m_frictionCoeffCmp;\n" -"	int m_batchIdx;\n" -"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -"	int m_bodyBPtrAndSignBit;\n" -"	int	m_childIndexA;\n" -"	int	m_childIndexB;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -"	return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -"	contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_PLANE 4\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define max2 max\n" -"#define min2 min\n" -"typedef unsigned int u32;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	int m_numChildShapes;\n" -"	float m_radius;\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;	\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct  \n" -"{\n" -"	float4		m_localCenter;\n" -"	float4		m_extents;\n" -"	float4		mC;\n" -"	float4		mE;\n" -"	\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct\n" -"{\n" -"	float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"} btGpuFace;\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"//	return numerator/denominator;	\n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"}\n" -"//#define dot3F4 dot\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtInvert(orientationIn);\n" -"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"void	trMul(float4 translationA, Quaternion orientationA,\n" -"						float4 translationB, Quaternion orientationB,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtMul(orientationA,orientationB);\n" -"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" -"{\n" -"	return make_float4(	a.x + (b.x - a.x) * t,\n" -"						a.y + (b.y - a.y) * t,\n" -"						a.z + (b.z - a.z) * t,\n" -"						0.f);\n" -"}\n" -"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" -"{\n" -"	float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" -"	float dist = dot3F4(n, point) + planeEqn.w;\n" -"	*closestPointOnFace = point - dist * n;\n" -"	return dist;\n" -"}\n" -"inline bool IsPointInPolygon(float4 p, \n" -"							const btGpuFace* face,\n" -"							__global const float4* baseVertex,\n" -"							__global const  int* convexIndices,\n" -"							float4* out)\n" -"{\n" -"    float4 a;\n" -"    float4 b;\n" -"    float4 ab;\n" -"    float4 ap;\n" -"    float4 v;\n" -"	float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" -"	\n" -"	if (face->m_numIndices<2)\n" -"		return false;\n" -"	\n" -"	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" -"	\n" -"	b = v0;\n" -"    for(unsigned i=0; i != face->m_numIndices; ++i)\n" -"    {\n" -"		a = b;\n" -"		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" -"		b = vi;\n" -"        ab = b-a;\n" -"        ap = p-a;\n" -"        v = cross3(ab,plane);\n" -"        if (dot(ap, v) > 0.f)\n" -"        {\n" -"            float ab_m2 = dot(ab, ab);\n" -"            float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" -"            if (rt <= 0.f)\n" -"            {\n" -"                *out = a;\n" -"            }\n" -"            else if (rt >= 1.f) \n" -"            {\n" -"                *out = b;\n" -"            }\n" -"            else\n" -"            {\n" -"            	float s = 1.f - rt;\n" -"				out[0].x = s * a.x + rt * b.x;\n" -"				out[0].y = s * a.y + rt * b.y;\n" -"				out[0].z = s * a.z + rt * b.z;\n" -"            }\n" -"            return false;\n" -"        }\n" -"    }\n" -"    return true;\n" -"}\n" -"void	computeContactSphereConvex(int pairIndex,\n" -"																int bodyIndexA, int bodyIndexB, \n" -"																int collidableIndexA, int collidableIndexB, \n" -"																__global const BodyData* rigidBodies, \n" -"																__global const btCollidableGpu* collidables,\n" -"																__global const ConvexPolyhedronCL* convexShapes,\n" -"																__global const float4* convexVertices,\n" -"																__global const int* convexIndices,\n" -"																__global const btGpuFace* faces,\n" -"																__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																counter32_t nGlobalContactsOut,\n" -"																int maxContactCapacity,\n" -"																float4 spherePos2,\n" -"																float radius,\n" -"																float4 pos,\n" -"																float4 quat\n" -"																)\n" -"{\n" -"	float4 invPos;\n" -"	float4 invOrn;\n" -"	trInverse(pos,quat, &invPos,&invOrn);\n" -"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -"	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" -"	int numFaces = convexShapes[shapeIndex].m_numFaces;\n" -"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" -"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" -"	float minDist = -1000000.f;\n" -"	bool bCollide = true;\n" -"	for ( int f = 0; f < numFaces; f++ )\n" -"	{\n" -"		btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" -"		// set up a plane equation \n" -"		float4 planeEqn;\n" -"		float4 n1 = face.m_plane;\n" -"		n1.w = 0.f;\n" -"		planeEqn = n1;\n" -"		planeEqn.w = face.m_plane.w;\n" -"		\n" -"	\n" -"		// compute a signed distance from the vertex in cloth to the face of rigidbody.\n" -"		float4 pntReturn;\n" -"		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" -"		// If the distance is positive, the plane is a separating plane. \n" -"		if ( dist > radius )\n" -"		{\n" -"			bCollide = false;\n" -"			break;\n" -"		}\n" -"		if (dist>0)\n" -"		{\n" -"			//might hit an edge or vertex\n" -"			float4 out;\n" -"			float4 zeroPos = make_float4(0,0,0,0);\n" -"			bool isInPoly = IsPointInPolygon(spherePos,\n" -"					&face,\n" -"					&convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" -"					convexIndices,\n" -"           &out);\n" -"			if (isInPoly)\n" -"			{\n" -"				if (dist>minDist)\n" -"				{\n" -"					minDist = dist;\n" -"					closestPnt = pntReturn;\n" -"					hitNormalWorld = planeEqn;\n" -"					\n" -"				}\n" -"			} else\n" -"			{\n" -"				float4 tmp = spherePos-out;\n" -"				float l2 = dot(tmp,tmp);\n" -"				if (l2<radius*radius)\n" -"				{\n" -"					dist  = sqrt(l2);\n" -"					if (dist>minDist)\n" -"					{\n" -"						minDist = dist;\n" -"						closestPnt = out;\n" -"						hitNormalWorld = tmp/dist;\n" -"						\n" -"					}\n" -"					\n" -"				} else\n" -"				{\n" -"					bCollide = false;\n" -"					break;\n" -"				}\n" -"			}\n" -"		} else\n" -"		{\n" -"			if ( dist > minDist )\n" -"			{\n" -"				minDist = dist;\n" -"				closestPnt = pntReturn;\n" -"				hitNormalWorld.xyz = planeEqn.xyz;\n" -"			}\n" -"		}\n" -"		\n" -"	}\n" -"	\n" -"	if (bCollide && minDist > -10000)\n" -"	{\n" -"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" -"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" -"		\n" -"		float actualDepth = minDist-radius;\n" -"		if (actualDepth<=0.f)\n" -"		{\n" -"			\n" -"			pOnB1.w = actualDepth;\n" -"			int dstIdx;\n" -"			AppendInc( nGlobalContactsOut, dstIdx );\n" -"		\n" -"			\n" -"			if (1)//dstIdx < maxContactCapacity)\n" -"			{\n" -"				__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"				c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"				c->m_batchIdx = pairIndex;\n" -"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"				c->m_worldPosB[0] = pOnB1;\n" -"				c->m_childIndexA = -1;\n" -"				c->m_childIndexB = -1;\n" -"				GET_NPOINTS(*c) = 1;\n" -"			} \n" -"		}\n" -"	}//if (hasCollision)\n" -"}\n" -"							\n" -"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" -"{\n" -"	if( nPoints == 0 )\n" -"        return 0;\n" -"    \n" -"    if (nPoints <=4)\n" -"        return nPoints;\n" -"    \n" -"    \n" -"    if (nPoints >64)\n" -"        nPoints = 64;\n" -"    \n" -"	float4 center = make_float4(0.f);\n" -"	{\n" -"		\n" -"		for (int i=0;i<nPoints;i++)\n" -"			center += p[i];\n" -"		center /= (float)nPoints;\n" -"	}\n" -"    \n" -"	\n" -"    \n" -"	//	sample 4 directions\n" -"    \n" -"    float4 aVector = p[0] - center;\n" -"    float4 u = cross3( nearNormal, aVector );\n" -"    float4 v = cross3( nearNormal, u );\n" -"    u = normalize3( u );\n" -"    v = normalize3( v );\n" -"    \n" -"    \n" -"    //keep point with deepest penetration\n" -"    float minW= FLT_MAX;\n" -"    \n" -"    int minIndex=-1;\n" -"    \n" -"    float4 maxDots;\n" -"    maxDots.x = FLT_MIN;\n" -"    maxDots.y = FLT_MIN;\n" -"    maxDots.z = FLT_MIN;\n" -"    maxDots.w = FLT_MIN;\n" -"    \n" -"    //	idx, distance\n" -"    for(int ie = 0; ie<nPoints; ie++ )\n" -"    {\n" -"        if (p[ie].w<minW)\n" -"        {\n" -"            minW = p[ie].w;\n" -"            minIndex=ie;\n" -"        }\n" -"        float f;\n" -"        float4 r = p[ie]-center;\n" -"        f = dot3F4( u, r );\n" -"        if (f<maxDots.x)\n" -"        {\n" -"            maxDots.x = f;\n" -"            contactIdx[0].x = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -u, r );\n" -"        if (f<maxDots.y)\n" -"        {\n" -"            maxDots.y = f;\n" -"            contactIdx[0].y = ie;\n" -"        }\n" -"        \n" -"        \n" -"        f = dot3F4( v, r );\n" -"        if (f<maxDots.z)\n" -"        {\n" -"            maxDots.z = f;\n" -"            contactIdx[0].z = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -v, r );\n" -"        if (f<maxDots.w)\n" -"        {\n" -"            maxDots.w = f;\n" -"            contactIdx[0].w = ie;\n" -"        }\n" -"        \n" -"    }\n" -"    \n" -"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" -"    {\n" -"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" -"        contactIdx[0].x = minIndex;\n" -"    }\n" -"    \n" -"    return 4;\n" -"    \n" -"}\n" -"#define MAX_PLANE_CONVEX_POINTS 64\n" -"int computeContactPlaneConvex(int pairIndex,\n" -"								int bodyIndexA, int bodyIndexB, \n" -"								int collidableIndexA, int collidableIndexB, \n" -"								__global const BodyData* rigidBodies, \n" -"								__global const btCollidableGpu*collidables,\n" -"								__global const ConvexPolyhedronCL* convexShapes,\n" -"								__global const float4* convexVertices,\n" -"								__global const int* convexIndices,\n" -"								__global const btGpuFace* faces,\n" -"								__global struct b3Contact4Data* restrict globalContactsOut,\n" -"								counter32_t nGlobalContactsOut,\n" -"								int maxContactCapacity,\n" -"								float4 posB,\n" -"								Quaternion ornB\n" -"								)\n" -"{\n" -"	int resultIndex=-1;\n" -"		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" -"	__global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" -"	\n" -"	float4 posA;\n" -"	posA = rigidBodies[bodyIndexA].m_pos;\n" -"	Quaternion ornA;\n" -"	ornA = rigidBodies[bodyIndexA].m_quat;\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"	float4 planeEq;\n" -"	 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" -"	float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" -"	float4 planeNormalWorld;\n" -"	planeNormalWorld = qtRotate(ornA,planeNormal);\n" -"	float planeConstant = planeEq.w;\n" -"	\n" -"	float4 invPosA;Quaternion invOrnA;\n" -"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" -"	{\n" -"		\n" -"		trInverse(posA,ornA,&invPosA,&invOrnA);\n" -"		trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -"	}\n" -"	float4 invPosB;Quaternion invOrnB;\n" -"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" -"	{\n" -"		\n" -"		trInverse(posB,ornB,&invPosB,&invOrnB);\n" -"		trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1);	\n" -"	}\n" -"	\n" -"	float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" -"	float maxDot = -1e30;\n" -"	int hitVertex=-1;\n" -"	float4 hitVtx;\n" -"	float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" -"	int numPoints = 0;\n" -"	int4 contactIdx;\n" -"	contactIdx=make_int4(0,1,2,3);\n" -"    \n" -"	\n" -"	for (int i=0;i<hullB->m_numVertices;i++)\n" -"	{\n" -"		float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" -"		float curDot = dot(vtx,planeNormalInConvex);\n" -"		if (curDot>maxDot)\n" -"		{\n" -"			hitVertex=i;\n" -"			maxDot=curDot;\n" -"			hitVtx = vtx;\n" -"			//make sure the deepest points is always included\n" -"			if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" -"				numPoints--;\n" -"		}\n" -"		if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" -"		{\n" -"			float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" -"			float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" -"			float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" -"			if (dist<0.f)\n" -"			{\n" -"				vtxWorld.w = dist;\n" -"				contactPoints[numPoints] = vtxWorld;\n" -"				numPoints++;\n" -"			}\n" -"		}\n" -"	}\n" -"	int numReducedPoints  = numPoints;\n" -"	if (numPoints>4)\n" -"	{\n" -"		numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" -"	}\n" -"	if (numReducedPoints>0)\n" -"	{\n" -"		int dstIdx;\n" -"	    AppendInc( nGlobalContactsOut, dstIdx );\n" -"		if (dstIdx < maxContactCapacity)\n" -"		{\n" -"			resultIndex = dstIdx;\n" -"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"			c->m_worldNormalOnB = -planeNormalWorld;\n" -"			//c->setFrictionCoeff(0.7);\n" -"			//c->setRestituitionCoeff(0.f);\n" -"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"			c->m_batchIdx = pairIndex;\n" -"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"			c->m_childIndexA = -1;\n" -"			c->m_childIndexB = -1;\n" -"			switch (numReducedPoints)\n" -"            {\n" -"                case 4:\n" -"                    c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" -"                case 3:\n" -"                    c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" -"                case 2:\n" -"                    c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" -"                case 1:\n" -"                    c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" -"                default:\n" -"                {\n" -"                }\n" -"            };\n" -"			\n" -"			GET_NPOINTS(*c) = numReducedPoints;\n" -"		}//if (dstIdx < numPairs)\n" -"	}	\n" -"	return resultIndex;\n" -"}\n" -"void	computeContactPlaneSphere(int pairIndex,\n" -"																int bodyIndexA, int bodyIndexB, \n" -"																int collidableIndexA, int collidableIndexB, \n" -"																__global const BodyData* rigidBodies, \n" -"																__global const btCollidableGpu* collidables,\n" -"																__global const btGpuFace* faces,\n" -"																__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																counter32_t nGlobalContactsOut,\n" -"																int maxContactCapacity)\n" -"{\n" -"	float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" -"	float radius = collidables[collidableIndexB].m_radius;\n" -"	float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" -"	float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" -"	float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" -"	float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" -"	\n" -"	bool hasCollision = false;\n" -"	float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" -"	float planeConstant = planeEq.w;\n" -"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" -"	{\n" -"		float4 invPosA;Quaternion invOrnA;\n" -"		trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" -"		trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -"	}\n" -"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" -"	{\n" -"		float4 invPosB;Quaternion invOrnB;\n" -"		trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" -"		trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1);	\n" -"	}\n" -"	float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" -"	float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -"	float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" -"	hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" -"	if (hasCollision)\n" -"	{\n" -"		float4 vtxInPlaneProjected1 = vtxInPlane1 -   distance*planeNormal1;\n" -"		float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" -"		float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" -"		float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" -"		pOnB1.w = distance;\n" -"		int dstIdx;\n" -"    AppendInc( nGlobalContactsOut, dstIdx );\n" -"		\n" -"		if (dstIdx < maxContactCapacity)\n" -"		{\n" -"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"			c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"			c->m_batchIdx = pairIndex;\n" -"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"			c->m_worldPosB[0] = pOnB1;\n" -"			c->m_childIndexA = -1;\n" -"			c->m_childIndexB = -1;\n" -"			GET_NPOINTS(*c) = 1;\n" -"		}//if (dstIdx < numPairs)\n" -"	}//if (hasCollision)\n" -"}\n" -"__kernel void   primitiveContactsKernel( __global int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int numPairs, int maxContactCapacity)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"			\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" -"		{\n" -"			float4 posB;\n" -"			posB = rigidBodies[bodyIndexB].m_pos;\n" -"			Quaternion ornB;\n" -"			ornB = rigidBodies[bodyIndexB].m_quat;\n" -"			int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" -"			if (contactIndex>=0)\n" -"				pairs[pairIndex].z = contactIndex;\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" -"		{\n" -"			float4 posA;\n" -"			posA = rigidBodies[bodyIndexA].m_pos;\n" -"			Quaternion ornA;\n" -"			ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -"			if (contactIndex>=0)\n" -"				pairs[pairIndex].z = contactIndex;\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"			computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"																rigidBodies,collidables,faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" -"		{\n" -"			computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" -"																rigidBodies,collidables,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -"			return;\n" -"		}\n" -"		\n" -"	\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" -"		{\n" -"		\n" -"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" -"			float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" -"			float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" -"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"																spherePos,sphereRadius,convexPos,convexOrn);\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"		\n" -"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" -"			float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" -"			float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"																spherePos,sphereRadius,convexPos,convexOrn);\n" -"			return;\n" -"		}\n" -"	\n" -"	\n" -"	\n" -"		\n" -"	\n" -"	\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"			//sphere-sphere\n" -"			float radiusA = collidables[collidableIndexA].m_radius;\n" -"			float radiusB = collidables[collidableIndexB].m_radius;\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			float4 diff = posA-posB;\n" -"			float len = length(diff);\n" -"			\n" -"			///iff distance positive, don't generate a new contact\n" -"			if ( len <= (radiusA+radiusB))\n" -"			{\n" -"				///distance (negative means penetration)\n" -"				float dist = len - (radiusA+radiusB);\n" -"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" -"				if (len > 0.00001)\n" -"				{\n" -"					normalOnSurfaceB = diff / len;\n" -"				}\n" -"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" -"				contactPosB.w = dist;\n" -"								\n" -"				int dstIdx;\n" -"				 AppendInc( nGlobalContactsOut, dstIdx );\n" -"				\n" -"				if (dstIdx < maxContactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = normalOnSurfaceB;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_worldPosB[0] = contactPosB;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = -1;\n" -"					GET_NPOINTS(*c) = 1;\n" -"				}//if (dstIdx < numPairs)\n" -"			}//if ( len <= (radiusA+radiusB))\n" -"			return;\n" -"		}//SHAPE_SPHERE SHAPE_SPHERE\n" -"	}//	if (i<numPairs)\n" -"}\n" -"// work-in-progress\n" -"__kernel void   processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" -"													__global const BodyData* rigidBodies, \n" -"													__global const btCollidableGpu* collidables,\n" -"													__global const ConvexPolyhedronCL* convexShapes, \n" -"													__global const float4* vertices,\n" -"													__global const float4* uniqueEdges,\n" -"													__global const btGpuFace* faces,\n" -"													__global const int* indices,\n" -"													__global btAabbCL* aabbs,\n" -"													__global const btGpuChildShape* gpuChildShapes,\n" -"													__global struct b3Contact4Data* restrict globalContactsOut,\n" -"													counter32_t nGlobalContactsOut,\n" -"													int numCompoundPairs, int maxContactCapacity\n" -"													)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i<numCompoundPairs)\n" -"	{\n" -"		int bodyIndexA = gpuCompoundPairs[i].x;\n" -"		int bodyIndexB = gpuCompoundPairs[i].y;\n" -"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" -"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" -"		\n" -"		int collidableIndexA = -1;\n" -"		int collidableIndexB = -1;\n" -"		\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		\n" -"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							\n" -"		if (childShapeIndexA >= 0)\n" -"		{\n" -"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"			float4 newOrnA = qtMul(ornA,childOrnA);\n" -"			posA = newPosA;\n" -"			ornA = newOrnA;\n" -"		} else\n" -"		{\n" -"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		}\n" -"		\n" -"		if (childShapeIndexB>=0)\n" -"		{\n" -"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"		} else\n" -"		{\n" -"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" -"		}\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	\n" -"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" -"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"		int pairIndex = i;\n" -"		if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" -"		{\n" -"			computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB,  collidableIndexA,collidableIndexB, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" -"			return;\n" -"		}\n" -"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" -"		{\n" -"			computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -"			return;\n" -"		}\n" -"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" -"		{\n" -"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" -"			float4 convexPos = posA;\n" -"			float4 convexOrn = ornA;\n" -"			\n" -"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" -"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"										spherePos,sphereRadius,convexPos,convexOrn);\n" -"	\n" -"			return;\n" -"		}\n" -"		if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" -"		{\n" -"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" -"			float4 convexPos = posB;\n" -"			float4 convexOrn = ornB;\n" -"			\n" -"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"										spherePos,sphereRadius,convexPos,convexOrn);\n" -"	\n" -"			return;\n" -"		}\n" -"	}//	if (i<numCompoundPairs)\n" -"}\n" -"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" -"{\n" -"	const float4* p1 = &vertices[0];\n" -"	const float4* p2 = &vertices[1];\n" -"	const float4* p3 = &vertices[2];\n" -"	float4 edge1;	edge1 = (*p2 - *p1);\n" -"	float4 edge2;	edge2 = ( *p3 - *p2 );\n" -"	float4 edge3;	edge3 = ( *p1 - *p3 );\n" -"	\n" -"	float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" -"	float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" -"	float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" -"	float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" -"	float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" -"	float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" -"	\n" -"	\n" -"	float r1, r2, r3;\n" -"	r1 = dot(edge1_normal,p1_to_p );\n" -"	r2 = dot(edge2_normal,p2_to_p );\n" -"	r3 = dot(edge3_normal,p3_to_p );\n" -"	\n" -"	if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" -"		return true;\n" -"    if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" -"		return true;\n" -"	return false;\n" -"}\n" -"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" -"{\n" -"	float4 diff = p - from;\n" -"	float4 v = to - from;\n" -"	float t = dot(v,diff);\n" -"	\n" -"	if (t > 0) \n" -"	{\n" -"		float dotVV = dot(v,v);\n" -"		if (t < dotVV) \n" -"		{\n" -"			t /= dotVV;\n" -"			diff -= t*v;\n" -"		} else \n" -"		{\n" -"			t = 1;\n" -"			diff -= v;\n" -"		}\n" -"	} else\n" -"	{\n" -"		t = 0;\n" -"	}\n" -"	*nearest = from + t*v;\n" -"	return dot(diff,diff);	\n" -"}\n" -"void	computeContactSphereTriangle(int pairIndex,\n" -"									int bodyIndexA, int bodyIndexB,\n" -"									int collidableIndexA, int collidableIndexB, \n" -"									__global const BodyData* rigidBodies, \n" -"									__global const btCollidableGpu* collidables,\n" -"									const float4* triangleVertices,\n" -"									__global struct b3Contact4Data* restrict globalContactsOut,\n" -"									counter32_t nGlobalContactsOut,\n" -"									int maxContactCapacity,\n" -"									float4 spherePos2,\n" -"									float radius,\n" -"									float4 pos,\n" -"									float4 quat,\n" -"									int faceIndex\n" -"									)\n" -"{\n" -"	float4 invPos;\n" -"	float4 invOrn;\n" -"	trInverse(pos,quat, &invPos,&invOrn);\n" -"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -"	int numFaces = 3;\n" -"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" -"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" -"	float minDist = -1000000.f;\n" -"	bool bCollide = false;\n" -"	\n" -"	//////////////////////////////////////\n" -"	float4 sphereCenter;\n" -"	sphereCenter = spherePos;\n" -"	const float4* vertices = triangleVertices;\n" -"	float contactBreakingThreshold = 0.f;//todo?\n" -"	float radiusWithThreshold = radius + contactBreakingThreshold;\n" -"	float4 edge10;\n" -"	edge10 = vertices[1]-vertices[0];\n" -"	edge10.w = 0.f;//is this needed?\n" -"	float4 edge20;\n" -"	edge20 = vertices[2]-vertices[0];\n" -"	edge20.w = 0.f;//is this needed?\n" -"	float4 normal = cross3(edge10,edge20);\n" -"	normal = normalize(normal);\n" -"	float4 p1ToCenter;\n" -"	p1ToCenter = sphereCenter - vertices[0];\n" -"	\n" -"	float distanceFromPlane = dot(p1ToCenter,normal);\n" -"	if (distanceFromPlane < 0.f)\n" -"	{\n" -"		//triangle facing the other way\n" -"		distanceFromPlane *= -1.f;\n" -"		normal *= -1.f;\n" -"	}\n" -"	hitNormalWorld = normal;\n" -"	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" -"	\n" -"	// Check for contact / intersection\n" -"	bool hasContact = false;\n" -"	float4 contactPoint;\n" -"	if (isInsideContactPlane) \n" -"	{\n" -"	\n" -"		if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" -"		{\n" -"			// Inside the contact wedge - touches a point on the shell plane\n" -"			hasContact = true;\n" -"			contactPoint = sphereCenter - normal*distanceFromPlane;\n" -"			\n" -"		} else {\n" -"			// Could be inside one of the contact capsules\n" -"			float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" -"			float4 nearestOnEdge;\n" -"			int numEdges = 3;\n" -"			for (int i = 0; i < numEdges; i++) \n" -"			{\n" -"				float4 pa =vertices[i];\n" -"				float4 pb = vertices[(i+1)%3];\n" -"				float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" -"				if (distanceSqr < contactCapsuleRadiusSqr) \n" -"				{\n" -"					// Yep, we're inside a capsule\n" -"					hasContact = true;\n" -"					contactPoint = nearestOnEdge;\n" -"					\n" -"				}\n" -"				\n" -"			}\n" -"		}\n" -"	}\n" -"	if (hasContact) \n" -"	{\n" -"		closestPnt = contactPoint;\n" -"		float4 contactToCenter = sphereCenter - contactPoint;\n" -"		minDist = length(contactToCenter);\n" -"		if (minDist>FLT_EPSILON)\n" -"		{\n" -"			hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" -"			bCollide  = true;\n" -"		}\n" -"		\n" -"	}\n" -"	/////////////////////////////////////\n" -"	if (bCollide && minDist > -10000)\n" -"	{\n" -"		\n" -"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" -"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" -"		float actualDepth = minDist-radius;\n" -"		\n" -"		if (actualDepth<=0.f)\n" -"		{\n" -"			pOnB1.w = actualDepth;\n" -"			int dstIdx;\n" -"			\n" -"			float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" -"			if (lenSqr>FLT_EPSILON)\n" -"			{\n" -"				AppendInc( nGlobalContactsOut, dstIdx );\n" -"			\n" -"				if (dstIdx < maxContactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"					c->m_worldPosB[0] = pOnB1;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = faceIndex;\n" -"					GET_NPOINTS(*c) = 1;\n" -"				} \n" -"			}\n" -"		}\n" -"	}//if (hasCollision)\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSphereContactsKernel( __global int4* concavePairs,\n" -"												__global const BodyData* rigidBodies,\n" -"												__global const btCollidableGpu* collidables,\n" -"												__global const ConvexPolyhedronCL* convexShapes, \n" -"												__global const float4* vertices,\n" -"												__global const float4* uniqueEdges,\n" -"												__global const btGpuFace* faces,\n" -"												__global const int* indices,\n" -"												__global btAabbCL* aabbs,\n" -"												__global struct b3Contact4Data* restrict globalContactsOut,\n" -"												counter32_t nGlobalContactsOut,\n" -"													int numConcavePairs, int maxContactCapacity\n" -"												)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"	int pairIdx = i;\n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" -"	{\n" -"		int f = concavePairs[i].z;\n" -"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"		\n" -"		float4 verticesA[3];\n" -"		for (int i=0;i<3;i++)\n" -"		{\n" -"			int index = indices[face.m_indexOffset+i];\n" -"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"			verticesA[i] = vert;\n" -"		}\n" -"		float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -"		float sphereRadius = collidables[collidableIndexB].m_radius;\n" -"		float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" -"		float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -"		computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" -"																rigidBodies,collidables,\n" -"																verticesA,\n" -"																globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"																spherePos,sphereRadius,convexPos,convexOrn, f);\n" -"		return;\n" -"	}\n" -"}\n" -; +static const char* primitiveContactsKernelsCL = +	"#ifndef B3_CONTACT4DATA_H\n" +	"#define B3_CONTACT4DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"typedef  struct b3Contact4Data b3Contact4Data_t;\n" +	"struct b3Contact4Data\n" +	"{\n" +	"	b3Float4	m_worldPosB[4];\n" +	"//	b3Float4	m_localPosA[4];\n" +	"//	b3Float4	m_localPosB[4];\n" +	"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" +	"	unsigned short  m_restituitionCoeffCmp;\n" +	"	unsigned short  m_frictionCoeffCmp;\n" +	"	int m_batchIdx;\n" +	"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +	"	int m_bodyBPtrAndSignBit;\n" +	"	int	m_childIndexA;\n" +	"	int	m_childIndexB;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +	"{\n" +	"	return (int)contact->m_worldNormalOnB.w;\n" +	"};\n" +	"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +	"{\n" +	"	contact->m_worldNormalOnB.w = (float)numPoints;\n" +	"};\n" +	"#endif //B3_CONTACT4DATA_H\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_PLANE 4\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define SHAPE_SPHERE 7\n" +	"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +	"#ifdef cl_ext_atomic_counters_32\n" +	"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +	"#else\n" +	"#define counter32_t volatile __global int*\n" +	"#endif\n" +	"#define GET_GROUP_IDX get_group_id(0)\n" +	"#define GET_LOCAL_IDX get_local_id(0)\n" +	"#define GET_GLOBAL_IDX get_global_id(0)\n" +	"#define GET_GROUP_SIZE get_local_size(0)\n" +	"#define GET_NUM_GROUPS get_num_groups(0)\n" +	"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +	"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +	"#define AtomInc(x) atom_inc(&(x))\n" +	"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +	"#define AppendInc(x, out) out = atomic_inc(x)\n" +	"#define AtomAdd(x, value) atom_add(&(x), value)\n" +	"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +	"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +	"#define max2 max\n" +	"#define min2 min\n" +	"typedef unsigned int u32;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	int m_numChildShapes;\n" +	"	float m_radius;\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;	\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct  \n" +	"{\n" +	"	float4		m_localCenter;\n" +	"	float4		m_extents;\n" +	"	float4		mC;\n" +	"	float4		mE;\n" +	"	\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"} ConvexPolyhedronCL;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"} btGpuFace;\n" +	"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +	"#define make_float4 (float4)\n" +	"#define make_float2 (float2)\n" +	"#define make_uint4 (uint4)\n" +	"#define make_int4 (int4)\n" +	"#define make_uint2 (uint2)\n" +	"#define make_int2 (int2)\n" +	"__inline\n" +	"float fastDiv(float numerator, float denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"//	return numerator/denominator;	\n" +	"}\n" +	"__inline\n" +	"float4 fastDiv4(float4 numerator, float4 denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"}\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"}\n" +	"//#define dot3F4 dot\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtInvert(orientationIn);\n" +	"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" +	"}\n" +	"void	trMul(float4 translationA, Quaternion orientationA,\n" +	"						float4 translationB, Quaternion orientationB,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtMul(orientationA,orientationB);\n" +	"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" +	"{\n" +	"	return make_float4(	a.x + (b.x - a.x) * t,\n" +	"						a.y + (b.y - a.y) * t,\n" +	"						a.z + (b.z - a.z) * t,\n" +	"						0.f);\n" +	"}\n" +	"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" +	"{\n" +	"	float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" +	"	float dist = dot3F4(n, point) + planeEqn.w;\n" +	"	*closestPointOnFace = point - dist * n;\n" +	"	return dist;\n" +	"}\n" +	"inline bool IsPointInPolygon(float4 p, \n" +	"							const btGpuFace* face,\n" +	"							__global const float4* baseVertex,\n" +	"							__global const  int* convexIndices,\n" +	"							float4* out)\n" +	"{\n" +	"    float4 a;\n" +	"    float4 b;\n" +	"    float4 ab;\n" +	"    float4 ap;\n" +	"    float4 v;\n" +	"	float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" +	"	\n" +	"	if (face->m_numIndices<2)\n" +	"		return false;\n" +	"	\n" +	"	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" +	"	\n" +	"	b = v0;\n" +	"    for(unsigned i=0; i != face->m_numIndices; ++i)\n" +	"    {\n" +	"		a = b;\n" +	"		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" +	"		b = vi;\n" +	"        ab = b-a;\n" +	"        ap = p-a;\n" +	"        v = cross3(ab,plane);\n" +	"        if (dot(ap, v) > 0.f)\n" +	"        {\n" +	"            float ab_m2 = dot(ab, ab);\n" +	"            float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" +	"            if (rt <= 0.f)\n" +	"            {\n" +	"                *out = a;\n" +	"            }\n" +	"            else if (rt >= 1.f) \n" +	"            {\n" +	"                *out = b;\n" +	"            }\n" +	"            else\n" +	"            {\n" +	"            	float s = 1.f - rt;\n" +	"				out[0].x = s * a.x + rt * b.x;\n" +	"				out[0].y = s * a.y + rt * b.y;\n" +	"				out[0].z = s * a.z + rt * b.z;\n" +	"            }\n" +	"            return false;\n" +	"        }\n" +	"    }\n" +	"    return true;\n" +	"}\n" +	"void	computeContactSphereConvex(int pairIndex,\n" +	"																int bodyIndexA, int bodyIndexB, \n" +	"																int collidableIndexA, int collidableIndexB, \n" +	"																__global const BodyData* rigidBodies, \n" +	"																__global const btCollidableGpu* collidables,\n" +	"																__global const ConvexPolyhedronCL* convexShapes,\n" +	"																__global const float4* convexVertices,\n" +	"																__global const int* convexIndices,\n" +	"																__global const btGpuFace* faces,\n" +	"																__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																counter32_t nGlobalContactsOut,\n" +	"																int maxContactCapacity,\n" +	"																float4 spherePos2,\n" +	"																float radius,\n" +	"																float4 pos,\n" +	"																float4 quat\n" +	"																)\n" +	"{\n" +	"	float4 invPos;\n" +	"	float4 invOrn;\n" +	"	trInverse(pos,quat, &invPos,&invOrn);\n" +	"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" +	"	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" +	"	int numFaces = convexShapes[shapeIndex].m_numFaces;\n" +	"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" +	"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" +	"	float minDist = -1000000.f;\n" +	"	bool bCollide = true;\n" +	"	for ( int f = 0; f < numFaces; f++ )\n" +	"	{\n" +	"		btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" +	"		// set up a plane equation \n" +	"		float4 planeEqn;\n" +	"		float4 n1 = face.m_plane;\n" +	"		n1.w = 0.f;\n" +	"		planeEqn = n1;\n" +	"		planeEqn.w = face.m_plane.w;\n" +	"		\n" +	"	\n" +	"		// compute a signed distance from the vertex in cloth to the face of rigidbody.\n" +	"		float4 pntReturn;\n" +	"		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" +	"		// If the distance is positive, the plane is a separating plane. \n" +	"		if ( dist > radius )\n" +	"		{\n" +	"			bCollide = false;\n" +	"			break;\n" +	"		}\n" +	"		if (dist>0)\n" +	"		{\n" +	"			//might hit an edge or vertex\n" +	"			float4 out;\n" +	"			float4 zeroPos = make_float4(0,0,0,0);\n" +	"			bool isInPoly = IsPointInPolygon(spherePos,\n" +	"					&face,\n" +	"					&convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" +	"					convexIndices,\n" +	"           &out);\n" +	"			if (isInPoly)\n" +	"			{\n" +	"				if (dist>minDist)\n" +	"				{\n" +	"					minDist = dist;\n" +	"					closestPnt = pntReturn;\n" +	"					hitNormalWorld = planeEqn;\n" +	"					\n" +	"				}\n" +	"			} else\n" +	"			{\n" +	"				float4 tmp = spherePos-out;\n" +	"				float l2 = dot(tmp,tmp);\n" +	"				if (l2<radius*radius)\n" +	"				{\n" +	"					dist  = sqrt(l2);\n" +	"					if (dist>minDist)\n" +	"					{\n" +	"						minDist = dist;\n" +	"						closestPnt = out;\n" +	"						hitNormalWorld = tmp/dist;\n" +	"						\n" +	"					}\n" +	"					\n" +	"				} else\n" +	"				{\n" +	"					bCollide = false;\n" +	"					break;\n" +	"				}\n" +	"			}\n" +	"		} else\n" +	"		{\n" +	"			if ( dist > minDist )\n" +	"			{\n" +	"				minDist = dist;\n" +	"				closestPnt = pntReturn;\n" +	"				hitNormalWorld.xyz = planeEqn.xyz;\n" +	"			}\n" +	"		}\n" +	"		\n" +	"	}\n" +	"	\n" +	"	if (bCollide && minDist > -10000)\n" +	"	{\n" +	"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" +	"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" +	"		\n" +	"		float actualDepth = minDist-radius;\n" +	"		if (actualDepth<=0.f)\n" +	"		{\n" +	"			\n" +	"			pOnB1.w = actualDepth;\n" +	"			int dstIdx;\n" +	"			AppendInc( nGlobalContactsOut, dstIdx );\n" +	"		\n" +	"			\n" +	"			if (1)//dstIdx < maxContactCapacity)\n" +	"			{\n" +	"				__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"				c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +	"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"				c->m_batchIdx = pairIndex;\n" +	"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"				c->m_worldPosB[0] = pOnB1;\n" +	"				c->m_childIndexA = -1;\n" +	"				c->m_childIndexB = -1;\n" +	"				GET_NPOINTS(*c) = 1;\n" +	"			} \n" +	"		}\n" +	"	}//if (hasCollision)\n" +	"}\n" +	"							\n" +	"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" +	"{\n" +	"	if( nPoints == 0 )\n" +	"        return 0;\n" +	"    \n" +	"    if (nPoints <=4)\n" +	"        return nPoints;\n" +	"    \n" +	"    \n" +	"    if (nPoints >64)\n" +	"        nPoints = 64;\n" +	"    \n" +	"	float4 center = make_float4(0.f);\n" +	"	{\n" +	"		\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"			center += p[i];\n" +	"		center /= (float)nPoints;\n" +	"	}\n" +	"    \n" +	"	\n" +	"    \n" +	"	//	sample 4 directions\n" +	"    \n" +	"    float4 aVector = p[0] - center;\n" +	"    float4 u = cross3( nearNormal, aVector );\n" +	"    float4 v = cross3( nearNormal, u );\n" +	"    u = normalize3( u );\n" +	"    v = normalize3( v );\n" +	"    \n" +	"    \n" +	"    //keep point with deepest penetration\n" +	"    float minW= FLT_MAX;\n" +	"    \n" +	"    int minIndex=-1;\n" +	"    \n" +	"    float4 maxDots;\n" +	"    maxDots.x = FLT_MIN;\n" +	"    maxDots.y = FLT_MIN;\n" +	"    maxDots.z = FLT_MIN;\n" +	"    maxDots.w = FLT_MIN;\n" +	"    \n" +	"    //	idx, distance\n" +	"    for(int ie = 0; ie<nPoints; ie++ )\n" +	"    {\n" +	"        if (p[ie].w<minW)\n" +	"        {\n" +	"            minW = p[ie].w;\n" +	"            minIndex=ie;\n" +	"        }\n" +	"        float f;\n" +	"        float4 r = p[ie]-center;\n" +	"        f = dot3F4( u, r );\n" +	"        if (f<maxDots.x)\n" +	"        {\n" +	"            maxDots.x = f;\n" +	"            contactIdx[0].x = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -u, r );\n" +	"        if (f<maxDots.y)\n" +	"        {\n" +	"            maxDots.y = f;\n" +	"            contactIdx[0].y = ie;\n" +	"        }\n" +	"        \n" +	"        \n" +	"        f = dot3F4( v, r );\n" +	"        if (f<maxDots.z)\n" +	"        {\n" +	"            maxDots.z = f;\n" +	"            contactIdx[0].z = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -v, r );\n" +	"        if (f<maxDots.w)\n" +	"        {\n" +	"            maxDots.w = f;\n" +	"            contactIdx[0].w = ie;\n" +	"        }\n" +	"        \n" +	"    }\n" +	"    \n" +	"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" +	"    {\n" +	"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" +	"        contactIdx[0].x = minIndex;\n" +	"    }\n" +	"    \n" +	"    return 4;\n" +	"    \n" +	"}\n" +	"#define MAX_PLANE_CONVEX_POINTS 64\n" +	"int computeContactPlaneConvex(int pairIndex,\n" +	"								int bodyIndexA, int bodyIndexB, \n" +	"								int collidableIndexA, int collidableIndexB, \n" +	"								__global const BodyData* rigidBodies, \n" +	"								__global const btCollidableGpu*collidables,\n" +	"								__global const ConvexPolyhedronCL* convexShapes,\n" +	"								__global const float4* convexVertices,\n" +	"								__global const int* convexIndices,\n" +	"								__global const btGpuFace* faces,\n" +	"								__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"								counter32_t nGlobalContactsOut,\n" +	"								int maxContactCapacity,\n" +	"								float4 posB,\n" +	"								Quaternion ornB\n" +	"								)\n" +	"{\n" +	"	int resultIndex=-1;\n" +	"		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" +	"	__global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" +	"	\n" +	"	float4 posA;\n" +	"	posA = rigidBodies[bodyIndexA].m_pos;\n" +	"	Quaternion ornA;\n" +	"	ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"	float4 planeEq;\n" +	"	 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +	"	float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +	"	float4 planeNormalWorld;\n" +	"	planeNormalWorld = qtRotate(ornA,planeNormal);\n" +	"	float planeConstant = planeEq.w;\n" +	"	\n" +	"	float4 invPosA;Quaternion invOrnA;\n" +	"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +	"	{\n" +	"		\n" +	"		trInverse(posA,ornA,&invPosA,&invOrnA);\n" +	"		trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +	"	}\n" +	"	float4 invPosB;Quaternion invOrnB;\n" +	"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" +	"	{\n" +	"		\n" +	"		trInverse(posB,ornB,&invPosB,&invOrnB);\n" +	"		trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1);	\n" +	"	}\n" +	"	\n" +	"	float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" +	"	float maxDot = -1e30;\n" +	"	int hitVertex=-1;\n" +	"	float4 hitVtx;\n" +	"	float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" +	"	int numPoints = 0;\n" +	"	int4 contactIdx;\n" +	"	contactIdx=make_int4(0,1,2,3);\n" +	"    \n" +	"	\n" +	"	for (int i=0;i<hullB->m_numVertices;i++)\n" +	"	{\n" +	"		float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" +	"		float curDot = dot(vtx,planeNormalInConvex);\n" +	"		if (curDot>maxDot)\n" +	"		{\n" +	"			hitVertex=i;\n" +	"			maxDot=curDot;\n" +	"			hitVtx = vtx;\n" +	"			//make sure the deepest points is always included\n" +	"			if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" +	"				numPoints--;\n" +	"		}\n" +	"		if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" +	"		{\n" +	"			float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" +	"			float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" +	"			float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" +	"			if (dist<0.f)\n" +	"			{\n" +	"				vtxWorld.w = dist;\n" +	"				contactPoints[numPoints] = vtxWorld;\n" +	"				numPoints++;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	int numReducedPoints  = numPoints;\n" +	"	if (numPoints>4)\n" +	"	{\n" +	"		numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" +	"	}\n" +	"	if (numReducedPoints>0)\n" +	"	{\n" +	"		int dstIdx;\n" +	"	    AppendInc( nGlobalContactsOut, dstIdx );\n" +	"		if (dstIdx < maxContactCapacity)\n" +	"		{\n" +	"			resultIndex = dstIdx;\n" +	"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"			c->m_worldNormalOnB = -planeNormalWorld;\n" +	"			//c->setFrictionCoeff(0.7);\n" +	"			//c->setRestituitionCoeff(0.f);\n" +	"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"			c->m_batchIdx = pairIndex;\n" +	"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"			c->m_childIndexA = -1;\n" +	"			c->m_childIndexB = -1;\n" +	"			switch (numReducedPoints)\n" +	"            {\n" +	"                case 4:\n" +	"                    c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" +	"                case 3:\n" +	"                    c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" +	"                case 2:\n" +	"                    c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" +	"                case 1:\n" +	"                    c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" +	"                default:\n" +	"                {\n" +	"                }\n" +	"            };\n" +	"			\n" +	"			GET_NPOINTS(*c) = numReducedPoints;\n" +	"		}//if (dstIdx < numPairs)\n" +	"	}	\n" +	"	return resultIndex;\n" +	"}\n" +	"void	computeContactPlaneSphere(int pairIndex,\n" +	"																int bodyIndexA, int bodyIndexB, \n" +	"																int collidableIndexA, int collidableIndexB, \n" +	"																__global const BodyData* rigidBodies, \n" +	"																__global const btCollidableGpu* collidables,\n" +	"																__global const btGpuFace* faces,\n" +	"																__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																counter32_t nGlobalContactsOut,\n" +	"																int maxContactCapacity)\n" +	"{\n" +	"	float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +	"	float radius = collidables[collidableIndexB].m_radius;\n" +	"	float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" +	"	float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" +	"	float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" +	"	float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" +	"	\n" +	"	bool hasCollision = false;\n" +	"	float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +	"	float planeConstant = planeEq.w;\n" +	"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +	"	{\n" +	"		float4 invPosA;Quaternion invOrnA;\n" +	"		trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" +	"		trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +	"	}\n" +	"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" +	"	{\n" +	"		float4 invPosB;Quaternion invOrnB;\n" +	"		trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" +	"		trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1);	\n" +	"	}\n" +	"	float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" +	"	float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +	"	float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" +	"	hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" +	"	if (hasCollision)\n" +	"	{\n" +	"		float4 vtxInPlaneProjected1 = vtxInPlane1 -   distance*planeNormal1;\n" +	"		float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" +	"		float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" +	"		float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" +	"		pOnB1.w = distance;\n" +	"		int dstIdx;\n" +	"    AppendInc( nGlobalContactsOut, dstIdx );\n" +	"		\n" +	"		if (dstIdx < maxContactCapacity)\n" +	"		{\n" +	"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"			c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +	"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"			c->m_batchIdx = pairIndex;\n" +	"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"			c->m_worldPosB[0] = pOnB1;\n" +	"			c->m_childIndexA = -1;\n" +	"			c->m_childIndexB = -1;\n" +	"			GET_NPOINTS(*c) = 1;\n" +	"		}//if (dstIdx < numPairs)\n" +	"	}//if (hasCollision)\n" +	"}\n" +	"__kernel void   primitiveContactsKernel( __global int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int numPairs, int maxContactCapacity)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" +	"		{\n" +	"			float4 posB;\n" +	"			posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			Quaternion ornB;\n" +	"			ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"			int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" +	"			if (contactIndex>=0)\n" +	"				pairs[pairIndex].z = contactIndex;\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" +	"		{\n" +	"			float4 posA;\n" +	"			posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			Quaternion ornA;\n" +	"			ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" +	"			if (contactIndex>=0)\n" +	"				pairs[pairIndex].z = contactIndex;\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"			computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"																rigidBodies,collidables,faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" +	"		{\n" +	"			computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" +	"																rigidBodies,collidables,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"	\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" +	"		{\n" +	"		\n" +	"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" +	"			float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" +	"			float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"																spherePos,sphereRadius,convexPos,convexOrn);\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"		\n" +	"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" +	"			float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" +	"			float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"																spherePos,sphereRadius,convexPos,convexOrn);\n" +	"			return;\n" +	"		}\n" +	"	\n" +	"	\n" +	"	\n" +	"		\n" +	"	\n" +	"	\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"			//sphere-sphere\n" +	"			float radiusA = collidables[collidableIndexA].m_radius;\n" +	"			float radiusB = collidables[collidableIndexB].m_radius;\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			float4 diff = posA-posB;\n" +	"			float len = length(diff);\n" +	"			\n" +	"			///iff distance positive, don't generate a new contact\n" +	"			if ( len <= (radiusA+radiusB))\n" +	"			{\n" +	"				///distance (negative means penetration)\n" +	"				float dist = len - (radiusA+radiusB);\n" +	"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" +	"				if (len > 0.00001)\n" +	"				{\n" +	"					normalOnSurfaceB = diff / len;\n" +	"				}\n" +	"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +	"				contactPosB.w = dist;\n" +	"								\n" +	"				int dstIdx;\n" +	"				 AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				\n" +	"				if (dstIdx < maxContactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = normalOnSurfaceB;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_worldPosB[0] = contactPosB;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = -1;\n" +	"					GET_NPOINTS(*c) = 1;\n" +	"				}//if (dstIdx < numPairs)\n" +	"			}//if ( len <= (radiusA+radiusB))\n" +	"			return;\n" +	"		}//SHAPE_SPHERE SHAPE_SPHERE\n" +	"	}//	if (i<numPairs)\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" +	"													__global const BodyData* rigidBodies, \n" +	"													__global const btCollidableGpu* collidables,\n" +	"													__global const ConvexPolyhedronCL* convexShapes, \n" +	"													__global const float4* vertices,\n" +	"													__global const float4* uniqueEdges,\n" +	"													__global const btGpuFace* faces,\n" +	"													__global const int* indices,\n" +	"													__global btAabbCL* aabbs,\n" +	"													__global const btGpuChildShape* gpuChildShapes,\n" +	"													__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"													counter32_t nGlobalContactsOut,\n" +	"													int numCompoundPairs, int maxContactCapacity\n" +	"													)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i<numCompoundPairs)\n" +	"	{\n" +	"		int bodyIndexA = gpuCompoundPairs[i].x;\n" +	"		int bodyIndexB = gpuCompoundPairs[i].y;\n" +	"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" +	"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" +	"		\n" +	"		int collidableIndexA = -1;\n" +	"		int collidableIndexB = -1;\n" +	"		\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		\n" +	"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							\n" +	"		if (childShapeIndexA >= 0)\n" +	"		{\n" +	"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"			float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"			posA = newPosA;\n" +	"			ornA = newOrnA;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		}\n" +	"		\n" +	"		if (childShapeIndexB>=0)\n" +	"		{\n" +	"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" +	"		}\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	\n" +	"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" +	"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"		int pairIndex = i;\n" +	"		if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB,  collidableIndexA,collidableIndexB, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" +	"			return;\n" +	"		}\n" +	"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" +	"		{\n" +	"			computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" +	"			return;\n" +	"		}\n" +	"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" +	"		{\n" +	"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" +	"			float4 convexPos = posA;\n" +	"			float4 convexOrn = ornA;\n" +	"			\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" +	"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"										spherePos,sphereRadius,convexPos,convexOrn);\n" +	"	\n" +	"			return;\n" +	"		}\n" +	"		if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" +	"			float4 convexPos = posB;\n" +	"			float4 convexOrn = ornB;\n" +	"			\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"										spherePos,sphereRadius,convexPos,convexOrn);\n" +	"	\n" +	"			return;\n" +	"		}\n" +	"	}//	if (i<numCompoundPairs)\n" +	"}\n" +	"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" +	"{\n" +	"	const float4* p1 = &vertices[0];\n" +	"	const float4* p2 = &vertices[1];\n" +	"	const float4* p3 = &vertices[2];\n" +	"	float4 edge1;	edge1 = (*p2 - *p1);\n" +	"	float4 edge2;	edge2 = ( *p3 - *p2 );\n" +	"	float4 edge3;	edge3 = ( *p1 - *p3 );\n" +	"	\n" +	"	float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" +	"	float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" +	"	float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" +	"	float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" +	"	float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" +	"	float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" +	"	\n" +	"	\n" +	"	float r1, r2, r3;\n" +	"	r1 = dot(edge1_normal,p1_to_p );\n" +	"	r2 = dot(edge2_normal,p2_to_p );\n" +	"	r3 = dot(edge3_normal,p3_to_p );\n" +	"	\n" +	"	if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" +	"		return true;\n" +	"    if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" +	"		return true;\n" +	"	return false;\n" +	"}\n" +	"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" +	"{\n" +	"	float4 diff = p - from;\n" +	"	float4 v = to - from;\n" +	"	float t = dot(v,diff);\n" +	"	\n" +	"	if (t > 0) \n" +	"	{\n" +	"		float dotVV = dot(v,v);\n" +	"		if (t < dotVV) \n" +	"		{\n" +	"			t /= dotVV;\n" +	"			diff -= t*v;\n" +	"		} else \n" +	"		{\n" +	"			t = 1;\n" +	"			diff -= v;\n" +	"		}\n" +	"	} else\n" +	"	{\n" +	"		t = 0;\n" +	"	}\n" +	"	*nearest = from + t*v;\n" +	"	return dot(diff,diff);	\n" +	"}\n" +	"void	computeContactSphereTriangle(int pairIndex,\n" +	"									int bodyIndexA, int bodyIndexB,\n" +	"									int collidableIndexA, int collidableIndexB, \n" +	"									__global const BodyData* rigidBodies, \n" +	"									__global const btCollidableGpu* collidables,\n" +	"									const float4* triangleVertices,\n" +	"									__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"									counter32_t nGlobalContactsOut,\n" +	"									int maxContactCapacity,\n" +	"									float4 spherePos2,\n" +	"									float radius,\n" +	"									float4 pos,\n" +	"									float4 quat,\n" +	"									int faceIndex\n" +	"									)\n" +	"{\n" +	"	float4 invPos;\n" +	"	float4 invOrn;\n" +	"	trInverse(pos,quat, &invPos,&invOrn);\n" +	"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" +	"	int numFaces = 3;\n" +	"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" +	"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" +	"	float minDist = -1000000.f;\n" +	"	bool bCollide = false;\n" +	"	\n" +	"	//////////////////////////////////////\n" +	"	float4 sphereCenter;\n" +	"	sphereCenter = spherePos;\n" +	"	const float4* vertices = triangleVertices;\n" +	"	float contactBreakingThreshold = 0.f;//todo?\n" +	"	float radiusWithThreshold = radius + contactBreakingThreshold;\n" +	"	float4 edge10;\n" +	"	edge10 = vertices[1]-vertices[0];\n" +	"	edge10.w = 0.f;//is this needed?\n" +	"	float4 edge20;\n" +	"	edge20 = vertices[2]-vertices[0];\n" +	"	edge20.w = 0.f;//is this needed?\n" +	"	float4 normal = cross3(edge10,edge20);\n" +	"	normal = normalize(normal);\n" +	"	float4 p1ToCenter;\n" +	"	p1ToCenter = sphereCenter - vertices[0];\n" +	"	\n" +	"	float distanceFromPlane = dot(p1ToCenter,normal);\n" +	"	if (distanceFromPlane < 0.f)\n" +	"	{\n" +	"		//triangle facing the other way\n" +	"		distanceFromPlane *= -1.f;\n" +	"		normal *= -1.f;\n" +	"	}\n" +	"	hitNormalWorld = normal;\n" +	"	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" +	"	\n" +	"	// Check for contact / intersection\n" +	"	bool hasContact = false;\n" +	"	float4 contactPoint;\n" +	"	if (isInsideContactPlane) \n" +	"	{\n" +	"	\n" +	"		if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" +	"		{\n" +	"			// Inside the contact wedge - touches a point on the shell plane\n" +	"			hasContact = true;\n" +	"			contactPoint = sphereCenter - normal*distanceFromPlane;\n" +	"			\n" +	"		} else {\n" +	"			// Could be inside one of the contact capsules\n" +	"			float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" +	"			float4 nearestOnEdge;\n" +	"			int numEdges = 3;\n" +	"			for (int i = 0; i < numEdges; i++) \n" +	"			{\n" +	"				float4 pa =vertices[i];\n" +	"				float4 pb = vertices[(i+1)%3];\n" +	"				float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" +	"				if (distanceSqr < contactCapsuleRadiusSqr) \n" +	"				{\n" +	"					// Yep, we're inside a capsule\n" +	"					hasContact = true;\n" +	"					contactPoint = nearestOnEdge;\n" +	"					\n" +	"				}\n" +	"				\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if (hasContact) \n" +	"	{\n" +	"		closestPnt = contactPoint;\n" +	"		float4 contactToCenter = sphereCenter - contactPoint;\n" +	"		minDist = length(contactToCenter);\n" +	"		if (minDist>FLT_EPSILON)\n" +	"		{\n" +	"			hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" +	"			bCollide  = true;\n" +	"		}\n" +	"		\n" +	"	}\n" +	"	/////////////////////////////////////\n" +	"	if (bCollide && minDist > -10000)\n" +	"	{\n" +	"		\n" +	"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" +	"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" +	"		float actualDepth = minDist-radius;\n" +	"		\n" +	"		if (actualDepth<=0.f)\n" +	"		{\n" +	"			pOnB1.w = actualDepth;\n" +	"			int dstIdx;\n" +	"			\n" +	"			float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" +	"			if (lenSqr>FLT_EPSILON)\n" +	"			{\n" +	"				AppendInc( nGlobalContactsOut, dstIdx );\n" +	"			\n" +	"				if (dstIdx < maxContactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"					c->m_worldPosB[0] = pOnB1;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = faceIndex;\n" +	"					GET_NPOINTS(*c) = 1;\n" +	"				} \n" +	"			}\n" +	"		}\n" +	"	}//if (hasCollision)\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSphereContactsKernel( __global int4* concavePairs,\n" +	"												__global const BodyData* rigidBodies,\n" +	"												__global const btCollidableGpu* collidables,\n" +	"												__global const ConvexPolyhedronCL* convexShapes, \n" +	"												__global const float4* vertices,\n" +	"												__global const float4* uniqueEdges,\n" +	"												__global const btGpuFace* faces,\n" +	"												__global const int* indices,\n" +	"												__global btAabbCL* aabbs,\n" +	"												__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"												counter32_t nGlobalContactsOut,\n" +	"													int numConcavePairs, int maxContactCapacity\n" +	"												)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"	int pairIdx = i;\n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" +	"	{\n" +	"		int f = concavePairs[i].z;\n" +	"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"		\n" +	"		float4 verticesA[3];\n" +	"		for (int i=0;i<3;i++)\n" +	"		{\n" +	"			int index = indices[face.m_indexOffset+i];\n" +	"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"			verticesA[i] = vert;\n" +	"		}\n" +	"		float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +	"		float sphereRadius = collidables[collidableIndexB].m_radius;\n" +	"		float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" +	"		float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" +	"		computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" +	"																rigidBodies,collidables,\n" +	"																verticesA,\n" +	"																globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"																spherePos,sphereRadius,convexPos,convexOrn, f);\n" +	"		return;\n" +	"	}\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index f0ecfc7851..907809d8bd 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -1,2099 +1,2098 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satClipKernelsCL= \ -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define max2 max\n" -"#define min2 min\n" -"typedef unsigned int u32;\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef  struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -"	b3Float4	m_worldPosB[4];\n" -"//	b3Float4	m_localPosA[4];\n" -"//	b3Float4	m_localPosB[4];\n" -"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" -"	unsigned short  m_restituitionCoeffCmp;\n" -"	unsigned short  m_frictionCoeffCmp;\n" -"	int m_batchIdx;\n" -"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -"	int m_bodyBPtrAndSignBit;\n" -"	int	m_childIndexA;\n" -"	int	m_childIndexB;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -"	return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -"	contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" -"#define B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"typedef struct b3GpuFace b3GpuFace_t;\n" -"struct b3GpuFace\n" -"{\n" -"	b3Float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"	int m_unusedPadding1;\n" -"	int m_unusedPadding2;\n" -"};\n" -"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" -"struct b3ConvexPolyhedronData\n" -"{\n" -"	b3Float4		m_localCenter;\n" -"	b3Float4		m_extents;\n" -"	b3Float4		mC;\n" -"	b3Float4		mE;\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"};\n" -"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -"	SHAPE_HEIGHT_FIELD=1,\n" -"	SHAPE_CONVEX_HULL=3,\n" -"	SHAPE_PLANE=4,\n" -"	SHAPE_CONCAVE_TRIMESH=5,\n" -"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -"	SHAPE_SPHERE=7,\n" -"	MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -"	b3Float4	m_childPosition;\n" -"	b3Quat		m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -"	int m_bodyIndexA;\n" -"	int m_bodyIndexB;\n" -"//	int	m_pairType;\n" -"	int m_childShapeIndexA;\n" -"	int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -"	b3Float4				m_pos;\n" -"	b3Quat					m_quat;\n" -"	b3Float4				m_linVel;\n" -"	b3Float4				m_angVel;\n" -"	int 					m_collidableIdx;\n" -"	float 				m_invMass;\n" -"	float 				m_restituitionCoeff;\n" -"	float 				m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -"	b3Mat3x3 m_invInertiaWorld;\n" -"	b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -"	\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"//	return numerator/denominator;	\n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"}\n" -"//#define dot3F4 dot\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" -"{\n" -"	return make_float4(	a.x + (b.x - a.x) * t,\n" -"						a.y + (b.y - a.y) * t,\n" -"						a.z + (b.z - a.z) * t,\n" -"						0.f);\n" -"}\n" -"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" -"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" -"{\n" -"	\n" -"	int ve;\n" -"	float ds, de;\n" -"	int numVertsOut = 0;\n" -"    //double-check next test\n" -"    	if (numVertsIn < 2)\n" -"    		return 0;\n" -"    \n" -"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" -"	float4 endVertex = pVtxIn[0];\n" -"	\n" -"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -"    \n" -"	for (ve = 0; ve < numVertsIn; ve++)\n" -"	{\n" -"		endVertex=pVtxIn[ve];\n" -"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -"		if (ds<0)\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start < 0, end < 0, so output endVertex\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"			else\n" -"			{\n" -"				// Start < 0, end >= 0, so output intersection\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"			}\n" -"		}\n" -"		else\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start >= 0, end < 0 so output intersection and end\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"		}\n" -"		firstVertex = endVertex;\n" -"		ds = de;\n" -"	}\n" -"	return numVertsOut;\n" -"}\n" -"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" -"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" -"{\n" -"	\n" -"	int ve;\n" -"	float ds, de;\n" -"	int numVertsOut = 0;\n" -"//double-check next test\n" -"	if (numVertsIn < 2)\n" -"		return 0;\n" -"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" -"	float4 endVertex = pVtxIn[0];\n" -"	\n" -"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -"	for (ve = 0; ve < numVertsIn; ve++)\n" -"	{\n" -"		endVertex=pVtxIn[ve];\n" -"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -"		if (ds<0)\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start < 0, end < 0, so output endVertex\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"			else\n" -"			{\n" -"				// Start < 0, end >= 0, so output intersection\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"			}\n" -"		}\n" -"		else\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start >= 0, end < 0 so output intersection and end\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"		}\n" -"		firstVertex = endVertex;\n" -"		ds = de;\n" -"	}\n" -"	return numVertsOut;\n" -"}\n" -"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,  \n" -"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" -"	float4* worldVertsB2, int capacityWorldVertsB2,\n" -"	const float minDist, float maxDist,\n" -"	__global const float4* vertices,\n" -"	__global const b3GpuFace_t* faces,\n" -"	__global const int* indices,\n" -"	float4* contactsOut,\n" -"	int contactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	float4* pVtxIn = worldVertsB1;\n" -"	float4* pVtxOut = worldVertsB2;\n" -"	\n" -"	int numVertsIn = numWorldVertsB1;\n" -"	int numVertsOut = 0;\n" -"	int closestFaceA=-1;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"				faces[hullA->m_faceOffset+face].m_plane.x, \n" -"				faces[hullA->m_faceOffset+face].m_plane.y, \n" -"				faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"		\n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	if (closestFaceA<0)\n" -"		return numContactsOut;\n" -"	b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" -"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"	int numVerticesA = polyA.m_numIndices;\n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"		const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" -"		const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" -"		const float4 edge0 = a - b;\n" -"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" -"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" -"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"		float4 worldA1 = transform(&a,&posA,&ornA);\n" -"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"		\n" -"		float4 planeNormalWS = planeNormalWS1;\n" -"		float planeEqWS=planeEqWS1;\n" -"		\n" -"		//clip face\n" -"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" -"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" -"		//btSwap(pVtxIn,pVtxOut);\n" -"		float4* tmp = pVtxOut;\n" -"		pVtxOut = pVtxIn;\n" -"		pVtxIn = tmp;\n" -"		numVertsIn = numVertsOut;\n" -"		numVertsOut = 0;\n" -"	}\n" -"	\n" -"	// only keep points that are behind the witness face\n" -"	{\n" -"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float localPlaneEq = polyA.m_plane.w;\n" -"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" -"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" -"		for (int i=0;i<numVertsIn;i++)\n" -"		{\n" -"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"			if (depth <=minDist)\n" -"			{\n" -"				depth = minDist;\n" -"			}\n" -"			if (depth <=maxDist)\n" -"			{\n" -"				float4 pointInWorld = pVtxIn[i];\n" -"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" -"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"			}\n" -"		}\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,  \n" -"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" -"	float4* worldVertsB2, int capacityWorldVertsB2,\n" -"	const float minDist, float maxDist,\n" -"	const float4* verticesA,\n" -"	const b3GpuFace_t* facesA,\n" -"	const int* indicesA,\n" -"	__global const float4* verticesB,\n" -"	__global const b3GpuFace_t* facesB,\n" -"	__global const int* indicesB,\n" -"	float4* contactsOut,\n" -"	int contactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	float4* pVtxIn = worldVertsB1;\n" -"	float4* pVtxOut = worldVertsB2;\n" -"	\n" -"	int numVertsIn = numWorldVertsB1;\n" -"	int numVertsOut = 0;\n" -"	int closestFaceA=-1;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"				facesA[hullA->m_faceOffset+face].m_plane.x, \n" -"				facesA[hullA->m_faceOffset+face].m_plane.y, \n" -"				facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"		\n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	if (closestFaceA<0)\n" -"		return numContactsOut;\n" -"	b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" -"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"	int numVerticesA = polyA.m_numIndices;\n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"		const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" -"		const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" -"		const float4 edge0 = a - b;\n" -"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" -"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" -"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"		float4 worldA1 = transform(&a,&posA,&ornA);\n" -"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"		\n" -"		float4 planeNormalWS = planeNormalWS1;\n" -"		float planeEqWS=planeEqWS1;\n" -"		\n" -"		//clip face\n" -"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" -"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" -"		//btSwap(pVtxIn,pVtxOut);\n" -"		float4* tmp = pVtxOut;\n" -"		pVtxOut = pVtxIn;\n" -"		pVtxIn = tmp;\n" -"		numVertsIn = numVertsOut;\n" -"		numVertsOut = 0;\n" -"	}\n" -"	\n" -"	// only keep points that are behind the witness face\n" -"	{\n" -"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float localPlaneEq = polyA.m_plane.w;\n" -"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" -"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" -"		for (int i=0;i<numVertsIn;i++)\n" -"		{\n" -"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"			if (depth <=minDist)\n" -"			{\n" -"				depth = minDist;\n" -"			}\n" -"			if (depth <=maxDist)\n" -"			{\n" -"				float4 pointInWorld = pVtxIn[i];\n" -"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" -"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"			}\n" -"		}\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"int	clipHullAgainstHull(const float4 separatingNormal,\n" -"	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" -"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" -"	const float minDist, float maxDist,\n" -"	__global const float4* vertices,\n" -"	__global const b3GpuFace_t* faces,\n" -"	__global const int* indices,\n" -"	float4*	localContactsOut,\n" -"	int localContactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"	int closestFaceB=-1;\n" -"	float dmax = -FLT_MAX;\n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" -"				faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	{\n" -"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" -"		const int numVertices = polyB.m_numIndices;\n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" -"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"		}\n" -"	}\n" -"	if (closestFaceB>=0)\n" -"	{\n" -"		numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" -"				posA,ornA,\n" -"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" -"				faces,\n" -"				indices,localContactsOut,localContactCapacity);\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"int	clipHullAgainstHullLocalA(const float4 separatingNormal,\n" -"	const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" -"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" -"	const float minDist, float maxDist,\n" -"	const float4* verticesA,\n" -"	const b3GpuFace_t* facesA,\n" -"	const int* indicesA,\n" -"	__global const float4* verticesB,\n" -"	__global const b3GpuFace_t* facesB,\n" -"	__global const int* indicesB,\n" -"	float4*	localContactsOut,\n" -"	int localContactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"	int closestFaceB=-1;\n" -"	float dmax = -FLT_MAX;\n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" -"				facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	{\n" -"		const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -"		const int numVertices = polyB.m_numIndices;\n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"			const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"		}\n" -"	}\n" -"	if (closestFaceB>=0)\n" -"	{\n" -"		numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" -"				posA,ornA,\n" -"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" -"				verticesA,facesA,indicesA,\n" -"				verticesB,facesB,indicesB,\n" -"				localContactsOut,localContactCapacity);\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" -"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" -"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" -"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" -"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" -"{\n" -"	if( nPoints == 0 )\n" -"        return 0;\n" -"    \n" -"    if (nPoints <=4)\n" -"        return nPoints;\n" -"    \n" -"    \n" -"    if (nPoints >64)\n" -"        nPoints = 64;\n" -"    \n" -"	float4 center = make_float4(0.f);\n" -"	{\n" -"		\n" -"		for (int i=0;i<nPoints;i++)\n" -"			center += p[i];\n" -"		center /= (float)nPoints;\n" -"	}\n" -"    \n" -"	\n" -"    \n" -"	//	sample 4 directions\n" -"    \n" -"    float4 aVector = p[0] - center;\n" -"    float4 u = cross3( nearNormal, aVector );\n" -"    float4 v = cross3( nearNormal, u );\n" -"    u = normalize3( u );\n" -"    v = normalize3( v );\n" -"    \n" -"    \n" -"    //keep point with deepest penetration\n" -"    float minW= FLT_MAX;\n" -"    \n" -"    int minIndex=-1;\n" -"    \n" -"    float4 maxDots;\n" -"    maxDots.x = FLT_MIN;\n" -"    maxDots.y = FLT_MIN;\n" -"    maxDots.z = FLT_MIN;\n" -"    maxDots.w = FLT_MIN;\n" -"    \n" -"    //	idx, distance\n" -"    for(int ie = 0; ie<nPoints; ie++ )\n" -"    {\n" -"        if (p[ie].w<minW)\n" -"        {\n" -"            minW = p[ie].w;\n" -"            minIndex=ie;\n" -"        }\n" -"        float f;\n" -"        float4 r = p[ie]-center;\n" -"        f = dot3F4( u, r );\n" -"        if (f<maxDots.x)\n" -"        {\n" -"            maxDots.x = f;\n" -"            contactIdx[0].x = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -u, r );\n" -"        if (f<maxDots.y)\n" -"        {\n" -"            maxDots.y = f;\n" -"            contactIdx[0].y = ie;\n" -"        }\n" -"        \n" -"        \n" -"        f = dot3F4( v, r );\n" -"        if (f<maxDots.z)\n" -"        {\n" -"            maxDots.z = f;\n" -"            contactIdx[0].z = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -v, r );\n" -"        if (f<maxDots.w)\n" -"        {\n" -"            maxDots.w = f;\n" -"            contactIdx[0].w = ie;\n" -"        }\n" -"        \n" -"    }\n" -"    \n" -"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" -"    {\n" -"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" -"        contactIdx[0].x = minIndex;\n" -"    }\n" -"    \n" -"    return 4;\n" -"    \n" -"}\n" -"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" -"{\n" -"    contactIdx[0] = 0;\n" -"    contactIdx[1] = 1;\n" -"    contactIdx[2] = 2;\n" -"    contactIdx[3] = 3;\n" -"    \n" -"	if( nPoints == 0 ) return 0;\n" -"    \n" -"	nPoints = min2( nPoints, 4 );\n" -"    return nPoints;\n" -"    \n" -"}\n" -"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" -"{\n" -"	if( nPoints == 0 ) return 0;\n" -"	nPoints = min2( nPoints, 64 );\n" -"	float4 center = make_float4(0.f);\n" -"	{\n" -"		float4 v[64];\n" -"		for (int i=0;i<nPoints;i++)\n" -"			v[i] = p[i];\n" -"		//memcpy( v, p, nPoints*sizeof(float4) );\n" -"		PARALLEL_SUM( v, nPoints );\n" -"		center = v[0]/(float)nPoints;\n" -"	}\n" -"	\n" -"	{	//	sample 4 directions\n" -"		if( nPoints < 4 )\n" -"		{\n" -"			for(int i=0; i<nPoints; i++) \n" -"				contactIdx[i] = i;\n" -"			return nPoints;\n" -"		}\n" -"		float4 aVector = p[0] - center;\n" -"		float4 u = cross3( nearNormal, aVector );\n" -"		float4 v = cross3( nearNormal, u );\n" -"		u = normalize3( u );\n" -"		v = normalize3( v );\n" -"		int idx[4];\n" -"		float2 max00 = make_float2(0,FLT_MAX);\n" -"		{\n" -"			//	idx, distance\n" -"			{\n" -"				{\n" -"					int4 a[64];\n" -"					for(int ie = 0; ie<nPoints; ie++ )\n" -"					{\n" -"						\n" -"						\n" -"						float f;\n" -"						float4 r = p[ie]-center;\n" -"						f = dot3F4( u, r );\n" -"						a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"						f = dot3F4( -u, r );\n" -"						a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"						f = dot3F4( v, r );\n" -"						a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"						f = dot3F4( -v, r );\n" -"						a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"					}\n" -"					for(int ie=0; ie<nPoints; ie++)\n" -"					{\n" -"						a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" -"						a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" -"						a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" -"						a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" -"					}\n" -"					idx[0] = (int)a[0].x & 0xff;\n" -"					idx[1] = (int)a[0].y & 0xff;\n" -"					idx[2] = (int)a[0].z & 0xff;\n" -"					idx[3] = (int)a[0].w & 0xff;\n" -"				}\n" -"			}\n" -"			{\n" -"				float2 h[64];\n" -"				PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" -"				REDUCE_MIN( h, nPoints );\n" -"				max00 = h[0];\n" -"			}\n" -"		}\n" -"		contactIdx[0] = idx[0];\n" -"		contactIdx[1] = idx[1];\n" -"		contactIdx[2] = idx[2];\n" -"		contactIdx[3] = idx[3];\n" -"		return 4;\n" -"	}\n" -"}\n" -"__kernel void   extractManifoldAndAddContactKernel(__global const int4* pairs, \n" -"																	__global const b3RigidBodyData_t* rigidBodies, \n" -"																	__global const float4* closestPointsWorld,\n" -"																	__global const float4* separatingNormalsWorld,\n" -"																	__global const int* contactCounts,\n" -"																	__global const int* contactOffsets,\n" -"																	__global struct b3Contact4Data* restrict contactsOut,\n" -"																	counter32_t nContactsOut,\n" -"																	int contactCapacity,\n" -"																	int numPairs,\n" -"																	int pairIndex\n" -"																	)\n" -"{\n" -"	int idx = get_global_id(0);\n" -"	\n" -"	if (idx<numPairs)\n" -"	{\n" -"		float4 normal = separatingNormalsWorld[idx];\n" -"		int nPoints = contactCounts[idx];\n" -"		__global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" -"		float4 localPoints[64];\n" -"		for (int i=0;i<nPoints;i++)\n" -"		{\n" -"			localPoints[i] = pointsIn[i];\n" -"		}\n" -"		int contactIdx[4];// = {-1,-1,-1,-1};\n" -"		contactIdx[0] = -1;\n" -"		contactIdx[1] = -1;\n" -"		contactIdx[2] = -1;\n" -"		contactIdx[3] = -1;\n" -"		int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" -"		int dstIdx;\n" -"		AppendInc( nContactsOut, dstIdx );\n" -"		if (dstIdx<contactCapacity)\n" -"		{\n" -"			__global struct b3Contact4Data* c = contactsOut + dstIdx;\n" -"			c->m_worldNormalOnB = -normal;\n" -"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"			c->m_batchIdx = idx;\n" -"			int bodyA = pairs[pairIndex].x;\n" -"			int bodyB = pairs[pairIndex].y;\n" -"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" -"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" -"			c->m_childIndexA = -1;\n" -"			c->m_childIndexB = -1;\n" -"			for (int i=0;i<nContacts;i++)\n" -"			{\n" -"				c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" -"			}\n" -"			GET_NPOINTS(*c) = nContacts;\n" -"		}\n" -"	}\n" -"}\n" -"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtInvert(orientationIn);\n" -"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"void	trMul(float4 translationA, Quaternion orientationA,\n" -"						float4 translationB, Quaternion orientationB,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtMul(orientationA,orientationB);\n" -"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" -"}\n" -"__kernel void   clipHullHullKernel( __global int4* pairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const b3GpuFace_t* faces,\n" -"																					__global const int* indices,\n" -"																					__global const float4* separatingNormals,\n" -"																					__global const int* hasSeparatingAxis,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int numPairs,\n" -"																					int contactCapacity)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"			\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"			\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"		\n" -"			int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" -"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -"														rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" -"													  rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" -"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -"														minDist, maxDist,\n" -"														vertices,faces,indices,\n" -"														localContactsOut,localContactCapacity);\n" -"												\n" -"		if (numLocalContactsOut>0)\n" -"		{\n" -"				float4 normal = -separatingNormals[i];\n" -"				int nPoints = numLocalContactsOut;\n" -"				float4* pointsIn = localContactsOut;\n" -"				int contactIdx[4];// = {-1,-1,-1,-1};\n" -"				contactIdx[0] = -1;\n" -"				contactIdx[1] = -1;\n" -"				contactIdx[2] = -1;\n" -"				contactIdx[3] = -1;\n" -"		\n" -"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -"		\n" -"				\n" -"				int mprContactIndex = pairs[pairIndex].z;\n" -"				int dstIdx = mprContactIndex;\n" -"				if (dstIdx<0)\n" -"				{\n" -"					AppendInc( nGlobalContactsOut, dstIdx );\n" -"				}\n" -"				if (dstIdx<contactCapacity)\n" -"				{\n" -"					pairs[pairIndex].z = dstIdx;\n" -"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -"					c->m_worldNormalOnB = -normal;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = -1;\n" -"					for (int i=0;i<nReducedContacts;i++)\n" -"					{\n" -"					//this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" -"						if (i>0||(mprContactIndex<0))\n" -"						{\n" -"							c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -"						}\n" -"					}\n" -"					GET_NPOINTS(*c) = nReducedContacts;\n" -"				}\n" -"				\n" -"			}//		if (numContactsOut>0)\n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"}\n" -"__kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const b3GpuFace_t* faces,\n" -"																					__global const int* indices,\n" -"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" -"																					__global const float4* gpuCompoundSepNormalsOut,\n" -"																					__global const int* gpuHasCompoundSepNormalsOut,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int numCompoundPairs, int maxContactCapacity)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numCompoundPairs)\n" -"	{\n" -"		if (gpuHasCompoundSepNormalsOut[i])\n" -"		{\n" -"			int bodyIndexA = gpuCompoundPairs[i].x;\n" -"			int bodyIndexB = gpuCompoundPairs[i].y;\n" -"			\n" -"			int childShapeIndexA = gpuCompoundPairs[i].z;\n" -"			int childShapeIndexB = gpuCompoundPairs[i].w;\n" -"			\n" -"			int collidableIndexA = -1;\n" -"			int collidableIndexB = -1;\n" -"			\n" -"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			\n" -"			float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"								\n" -"			if (childShapeIndexA >= 0)\n" -"			{\n" -"				collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"				float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"				float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"				float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"				float4 newOrnA = qtMul(ornA,childOrnA);\n" -"				posA = newPosA;\n" -"				ornA = newOrnA;\n" -"			} else\n" -"			{\n" -"				collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			}\n" -"			\n" -"			if (childShapeIndexB>=0)\n" -"			{\n" -"				collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"				float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"				float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"				float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"				float4 newOrnB = qtMul(ornB,childOrnB);\n" -"				posB = newPosB;\n" -"				ornB = newOrnB;\n" -"			} else\n" -"			{\n" -"				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" -"			}\n" -"			\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"			int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" -"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -"														posA,ornA,\n" -"													  posB,ornB,\n" -"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -"														minDist, maxDist,\n" -"														vertices,faces,indices,\n" -"														localContactsOut,localContactCapacity);\n" -"												\n" -"		if (numLocalContactsOut>0)\n" -"		{\n" -"				float4 normal = -gpuCompoundSepNormalsOut[i];\n" -"				int nPoints = numLocalContactsOut;\n" -"				float4* pointsIn = localContactsOut;\n" -"				int contactIdx[4];// = {-1,-1,-1,-1};\n" -"				contactIdx[0] = -1;\n" -"				contactIdx[1] = -1;\n" -"				contactIdx[2] = -1;\n" -"				contactIdx[3] = -1;\n" -"		\n" -"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -"		\n" -"				int dstIdx;\n" -"				AppendInc( nGlobalContactsOut, dstIdx );\n" -"				if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -"					c->m_worldNormalOnB = -normal;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = gpuCompoundPairs[pairIndex].x;\n" -"					int bodyB = gpuCompoundPairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_childIndexA = childShapeIndexA;\n" -"					c->m_childIndexB = childShapeIndexB;\n" -"					for (int i=0;i<nReducedContacts;i++)\n" -"					{\n" -"						c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -"					}\n" -"					GET_NPOINTS(*c) = nReducedContacts;\n" -"				}\n" -"				\n" -"			}//		if (numContactsOut>0)\n" -"		}//		if (gpuHasCompoundSepNormalsOut[i])\n" -"	}//	if (i<numCompoundPairs)\n" -"}\n" -"__kernel void   sphereSphereCollisionKernel( __global const int4* pairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const float4* separatingNormals,\n" -"																					__global const int* hasSeparatingAxis,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int contactCapacity,\n" -"																					int numPairs)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"			\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"			//sphere-sphere\n" -"			float radiusA = collidables[collidableIndexA].m_radius;\n" -"			float radiusB = collidables[collidableIndexB].m_radius;\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			float4 diff = posA-posB;\n" -"			float len = length(diff);\n" -"			\n" -"			///iff distance positive, don't generate a new contact\n" -"			if ( len <= (radiusA+radiusB))\n" -"			{\n" -"				///distance (negative means penetration)\n" -"				float dist = len - (radiusA+radiusB);\n" -"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" -"				if (len > 0.00001)\n" -"				{\n" -"					normalOnSurfaceB = diff / len;\n" -"				}\n" -"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" -"				contactPosB.w = dist;\n" -"								\n" -"				int dstIdx;\n" -"				AppendInc( nGlobalContactsOut, dstIdx );\n" -"				if (dstIdx < contactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = -normalOnSurfaceB;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_worldPosB[0] = contactPosB;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = -1;\n" -"					GET_NPOINTS(*c) = 1;\n" -"				}//if (dstIdx < numPairs)\n" -"			}//if ( len <= (radiusA+radiusB))\n" -"		}//SHAPE_SPHERE SHAPE_SPHERE\n" -"	}//if (i<numPairs)\n" -"}				\n" -"__kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const b3GpuFace_t* faces,\n" -"																					__global const int* indices,\n" -"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" -"																					__global const float4* separatingNormals,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int contactCapacity,\n" -"																					int numConcavePairs)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numConcavePairs)\n" -"	{\n" -"		//negative value means that the pair is invalid\n" -"		if (concavePairsIn[i].w<0)\n" -"			return;\n" -"		int bodyIndexA = concavePairsIn[i].x;\n" -"		int bodyIndexB = concavePairsIn[i].y;\n" -"		int f = concavePairsIn[i].z;\n" -"		int childShapeIndexA = f;\n" -"		\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"		///////////////////////////////////////////////////////////////\n" -"		\n" -"	\n" -"		bool overlap = false;\n" -"		\n" -"		b3ConvexPolyhedronData_t convexPolyhedronA;\n" -"	//add 3 vertices of the triangle\n" -"		convexPolyhedronA.m_numVertices = 3;\n" -"		convexPolyhedronA.m_vertexOffset = 0;\n" -"		float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"		b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"		\n" -"		float4 verticesA[3];\n" -"		for (int i=0;i<3;i++)\n" -"		{\n" -"			int index = indices[face.m_indexOffset+i];\n" -"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"			verticesA[i] = vert;\n" -"			localCenter += vert;\n" -"		}\n" -"		float dmin = FLT_MAX;\n" -"		int localCC=0;\n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"                                  \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"                             \n" -"		b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot3F4(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                                            \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot3F4(edgeNormal,v0);\n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                                            \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		float4 sepAxis = separatingNormals[i];\n" -"		\n" -"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"		int childShapeIndexB =-1;\n" -"		if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			///////////////////\n" -"			///compound shape support\n" -"			\n" -"			childShapeIndexB = concavePairsIn[pairIndex].w;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			\n" -"		}\n" -"		\n" -"		////////////////////////////////////////\n" -"		\n" -"		\n" -"		\n" -"		int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" -"														&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"														posA,ornA,\n" -"													  posB,ornB,\n" -"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -"														minDist, maxDist,\n" -"														&verticesA,&facesA,&indicesA,\n" -"														vertices,faces,indices,\n" -"														localContactsOut,localContactCapacity);\n" -"												\n" -"		if (numLocalContactsOut>0)\n" -"		{\n" -"			float4 normal = -separatingNormals[i];\n" -"			int nPoints = numLocalContactsOut;\n" -"			float4* pointsIn = localContactsOut;\n" -"			int contactIdx[4];// = {-1,-1,-1,-1};\n" -"			contactIdx[0] = -1;\n" -"			contactIdx[1] = -1;\n" -"			contactIdx[2] = -1;\n" -"			contactIdx[3] = -1;\n" -"	\n" -"			int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -"	\n" -"			int dstIdx;\n" -"			AppendInc( nGlobalContactsOut, dstIdx );\n" -"			if (dstIdx<contactCapacity)\n" -"			{\n" -"				__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -"				c->m_worldNormalOnB = -normal;\n" -"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"				c->m_batchIdx = pairIndex;\n" -"				int bodyA = concavePairsIn[pairIndex].x;\n" -"				int bodyB = concavePairsIn[pairIndex].y;\n" -"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"				c->m_childIndexA = childShapeIndexA;\n" -"				c->m_childIndexB = childShapeIndexB;\n" -"				for (int i=0;i<nReducedContacts;i++)\n" -"				{\n" -"					c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -"				}\n" -"				GET_NPOINTS(*c) = nReducedContacts;\n" -"			}\n" -"				\n" -"		}//		if (numContactsOut>0)\n" -"	}//	if (i<numPairs)\n" -"}\n" -"int	findClippingFaces(const float4 separatingNormal,\n" -"                      __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" -"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -"                       __global float4* worldVertsA1,\n" -"                      __global float4* worldNormalsA1,\n" -"                      __global float4* worldVertsB1,\n" -"                      int capacityWorldVerts,\n" -"                      const float minDist, float maxDist,\n" -"                      __global const float4* vertices,\n" -"                      __global const b3GpuFace_t* faces,\n" -"                      __global const int* indices,\n" -"                      __global int4* clippingFaces, int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"    \n" -"    \n" -"	int closestFaceB=-1;\n" -"	float dmax = -FLT_MAX;\n" -"    \n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" -"                                              faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"	{\n" -"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" -"		const int numVertices = polyB.m_numIndices;\n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" -"			worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"		}\n" -"	}\n" -"    \n" -"    int closestFaceA=-1;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"                                              faces[hullA->m_faceOffset+face].m_plane.x,\n" -"                                              faces[hullA->m_faceOffset+face].m_plane.y,\n" -"                                              faces[hullA->m_faceOffset+face].m_plane.z,\n" -"                                              0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"            \n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"                worldNormalsA1[pairIndex] = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"    int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"        const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -"        worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -"    }\n" -"    \n" -"    clippingFaces[pairIndex].x = closestFaceA;\n" -"    clippingFaces[pairIndex].y = closestFaceB;\n" -"    clippingFaces[pairIndex].z = numVerticesA;\n" -"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" -"    \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"int clipFaces(__global float4* worldVertsA1,\n" -"              __global float4* worldNormalsA1,\n" -"              __global float4* worldVertsB1,\n" -"              __global float4* worldVertsB2, \n" -"              int capacityWorldVertsB2,\n" -"              const float minDist, float maxDist,\n" -"              __global int4* clippingFaces,\n" -"              int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"    \n" -"    int closestFaceA = clippingFaces[pairIndex].x;\n" -"    int closestFaceB = clippingFaces[pairIndex].y;\n" -"	int numVertsInA = clippingFaces[pairIndex].z;\n" -"	int numVertsInB = clippingFaces[pairIndex].w;\n" -"    \n" -"	int numVertsOut = 0;\n" -"    \n" -"	if (closestFaceA<0)\n" -"		return numContactsOut;\n" -"    \n" -"    __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" -"    __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" -"    \n" -"    \n" -"	\n" -"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"    \n" -"	for(int e0=0;e0<numVertsInA;e0++)\n" -"	{\n" -"		const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" -"		const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" -"		const float4 WorldEdge0 = aw - bw;\n" -"		float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" -"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"		float4 worldA1 = aw;\n" -"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"		float4 planeNormalWS = planeNormalWS1;\n" -"		float planeEqWS=planeEqWS1;\n" -"		numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" -"		__global float4* tmp = pVtxOut;\n" -"		pVtxOut = pVtxIn;\n" -"		pVtxIn = tmp;\n" -"		numVertsInB = numVertsOut;\n" -"		numVertsOut = 0;\n" -"	}\n" -"    \n" -"    //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -"    //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -"    \n" -"    /*for (int i=0;i<numVertsInB;i++)\n" -"    {\n" -"        pVtxOut[i] = pVtxIn[i];\n" -"    }*/\n" -"    \n" -"    \n" -"    \n" -"    \n" -"    //numVertsInB=0;\n" -"	\n" -"    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -"    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -"    for (int i=0;i<numVertsInB;i++)\n" -"    {\n" -"        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"        if (depth <=minDist)\n" -"        {\n" -"            depth = minDist;\n" -"        }\n" -"        \n" -"        if (depth <=maxDist)\n" -"        {\n" -"            float4 pointInWorld = pVtxIn[i];\n" -"            pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"        }\n" -"    }\n" -"   \n" -"    clippingFaces[pairIndex].w =numContactsOut;\n" -"   \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"__kernel void   findClippingFacesKernel(  __global const int4* pairs,\n" -"                                        __global const b3RigidBodyData_t* rigidBodies,\n" -"                                        __global const b3Collidable_t* collidables,\n" -"                                        __global const b3ConvexPolyhedronData_t* convexShapes,\n" -"                                        __global const float4* vertices,\n" -"                                        __global const float4* uniqueEdges,\n" -"                                        __global const b3GpuFace_t* faces,\n" -"                                        __global const int* indices,\n" -"                                        __global const float4* separatingNormals,\n" -"                                        __global const int* hasSeparatingAxis,\n" -"                                        __global int4* clippingFacesOut,\n" -"                                        __global float4* worldVertsA1,\n" -"                                        __global float4* worldNormalsA1,\n" -"                                        __global float4* worldVertsB1,\n" -"                                        int capacityWorldVerts,\n" -"                                        int numPairs\n" -"                                        )\n" -"{\n" -"    \n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"    \n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"    \n" -"	if (i<numPairs)\n" -"	{\n" -"        \n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"            \n" -"			int bodyIndexA = pairs[i].x;\n" -"			int bodyIndexB = pairs[i].y;\n" -"			\n" -"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"			\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"            \n" -"            \n" -"			int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" -"                                                        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -"                                                        rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" -"                                                        rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" -"                                                        worldVertsA1,\n" -"                                                        worldNormalsA1,\n" -"                                                        worldVertsB1,capacityWorldVerts,\n" -"                                                        minDist, maxDist,\n" -"                                                        vertices,faces,indices,\n" -"                                                        clippingFacesOut,i);\n" -"            \n" -"            \n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"    \n" -"}\n" -"__kernel void   clipFacesAndFindContactsKernel(    __global const float4* separatingNormals,\n" -"                                                   __global const int* hasSeparatingAxis,\n" -"                                                   __global int4* clippingFacesOut,\n" -"                                                   __global float4* worldVertsA1,\n" -"                                                   __global float4* worldNormalsA1,\n" -"                                                   __global float4* worldVertsB1,\n" -"                                                   __global float4* worldVertsB2,\n" -"                                                    int vertexFaceCapacity,\n" -"                                                   int numPairs,\n" -"					                                        int debugMode\n" -"                                                   )\n" -"{\n" -"    int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"    \n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"    \n" -"	if (i<numPairs)\n" -"	{\n" -"        \n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"            \n" -"//			int bodyIndexA = pairs[i].x;\n" -"	//		int bodyIndexB = pairs[i].y;\n" -"		    \n" -"            int numLocalContactsOut = 0;\n" -"            int capacityWorldVertsB2 = vertexFaceCapacity;\n" -"            \n" -"            __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" -"            __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" -"            \n" -"            {\n" -"                __global int4* clippingFaces = clippingFacesOut;\n" -"            \n" -"                \n" -"                int closestFaceA = clippingFaces[pairIndex].x;\n" -"                int closestFaceB = clippingFaces[pairIndex].y;\n" -"                int numVertsInA = clippingFaces[pairIndex].z;\n" -"                int numVertsInB = clippingFaces[pairIndex].w;\n" -"                \n" -"                int numVertsOut = 0;\n" -"                \n" -"                if (closestFaceA>=0)\n" -"                {\n" -"                    \n" -"                    \n" -"                    \n" -"                    // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"                    \n" -"                    for(int e0=0;e0<numVertsInA;e0++)\n" -"                    {\n" -"                        const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" -"                        const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" -"                        const float4 WorldEdge0 = aw - bw;\n" -"                        float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" -"                        float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"                        float4 worldA1 = aw;\n" -"                        float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"                        float4 planeNormalWS = planeNormalWS1;\n" -"                        float planeEqWS=planeEqWS1;\n" -"                        numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" -"                        __global float4* tmp = pVtxOut;\n" -"                        pVtxOut = pVtxIn;\n" -"                        pVtxIn = tmp;\n" -"                        numVertsInB = numVertsOut;\n" -"                        numVertsOut = 0;\n" -"                    }\n" -"                    \n" -"                    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -"                    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -"                    \n" -"                    for (int i=0;i<numVertsInB;i++)\n" -"                    {\n" -"                        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"                        if (depth <=minDist)\n" -"                        {\n" -"                            depth = minDist;\n" -"                        }\n" -"                        \n" -"                        if (depth <=maxDist)\n" -"                        {\n" -"                            float4 pointInWorld = pVtxIn[i];\n" -"                            pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"                        }\n" -"                    }\n" -"                    \n" -"                }\n" -"                clippingFaces[pairIndex].w =numLocalContactsOut;\n" -"                \n" -"            }\n" -"            \n" -"            for (int i=0;i<numLocalContactsOut;i++)\n" -"                pVtxIn[i] = pVtxOut[i];\n" -"                \n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"    \n" -"}\n" -"__kernel void   newContactReductionKernel( __global int4* pairs,\n" -"                                                   __global const b3RigidBodyData_t* rigidBodies,\n" -"                                                   __global const float4* separatingNormals,\n" -"                                                   __global const int* hasSeparatingAxis,\n" -"                                                   __global struct b3Contact4Data* globalContactsOut,\n" -"                                                   __global int4* clippingFaces,\n" -"                                                   __global float4* worldVertsB2,\n" -"                                                   volatile __global int* nGlobalContactsOut,\n" -"                                                   int vertexFaceCapacity,\n" -"												   int contactCapacity,\n" -"                                                   int numPairs\n" -"                                                   )\n" -"{\n" -"    int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"    int4 contactIdx;\n" -"    contactIdx=make_int4(0,1,2,3);\n" -"    \n" -"	if (i<numPairs)\n" -"	{\n" -"        \n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"            \n" -"			\n" -"            \n" -"            \n" -"			int nPoints = clippingFaces[pairIndex].w;\n" -"           \n" -"            if (nPoints>0)\n" -"            {\n" -"                 __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" -"                float4 normal = -separatingNormals[i];\n" -"                \n" -"                int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" -"            \n" -"				int mprContactIndex = pairs[pairIndex].z;\n" -"                int dstIdx = mprContactIndex;\n" -"				if (dstIdx<0)\n" -"				{\n" -"	                AppendInc( nGlobalContactsOut, dstIdx );\n" -"				}\n" -"//#if 0\n" -"                \n" -"				if (dstIdx < contactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = -normal;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					pairs[pairIndex].w = dstIdx;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"                    c->m_childIndexA =-1;\n" -"					c->m_childIndexB =-1;\n" -"                    switch (nReducedContacts)\n" -"                    {\n" -"                        case 4:\n" -"                            c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" -"                        case 3:\n" -"                            c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" -"                        case 2:\n" -"                            c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" -"                        case 1:\n" -"							if (mprContactIndex<0)//test\n" -"	                            c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" -"                        default:\n" -"                        {\n" -"                        }\n" -"                    };\n" -"                    \n" -"					GET_NPOINTS(*c) = nReducedContacts;\n" -"                    \n" -"                 }\n" -"                 \n" -"                \n" -"//#endif\n" -"				\n" -"			}//		if (numContactsOut>0)\n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"    \n" -"    \n" -"}\n" -; +static const char* satClipKernelsCL = +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +	"#ifdef cl_ext_atomic_counters_32\n" +	"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +	"#else\n" +	"#define counter32_t volatile __global int*\n" +	"#endif\n" +	"#define GET_GROUP_IDX get_group_id(0)\n" +	"#define GET_LOCAL_IDX get_local_id(0)\n" +	"#define GET_GLOBAL_IDX get_global_id(0)\n" +	"#define GET_GROUP_SIZE get_local_size(0)\n" +	"#define GET_NUM_GROUPS get_num_groups(0)\n" +	"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +	"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +	"#define AtomInc(x) atom_inc(&(x))\n" +	"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +	"#define AppendInc(x, out) out = atomic_inc(x)\n" +	"#define AtomAdd(x, value) atom_add(&(x), value)\n" +	"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +	"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +	"#define max2 max\n" +	"#define min2 min\n" +	"typedef unsigned int u32;\n" +	"#ifndef B3_CONTACT4DATA_H\n" +	"#define B3_CONTACT4DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"typedef  struct b3Contact4Data b3Contact4Data_t;\n" +	"struct b3Contact4Data\n" +	"{\n" +	"	b3Float4	m_worldPosB[4];\n" +	"//	b3Float4	m_localPosA[4];\n" +	"//	b3Float4	m_localPosB[4];\n" +	"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" +	"	unsigned short  m_restituitionCoeffCmp;\n" +	"	unsigned short  m_frictionCoeffCmp;\n" +	"	int m_batchIdx;\n" +	"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +	"	int m_bodyBPtrAndSignBit;\n" +	"	int	m_childIndexA;\n" +	"	int	m_childIndexB;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +	"{\n" +	"	return (int)contact->m_worldNormalOnB.w;\n" +	"};\n" +	"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +	"{\n" +	"	contact->m_worldNormalOnB.w = (float)numPoints;\n" +	"};\n" +	"#endif //B3_CONTACT4DATA_H\n" +	"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#define B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"typedef struct b3GpuFace b3GpuFace_t;\n" +	"struct b3GpuFace\n" +	"{\n" +	"	b3Float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"	int m_unusedPadding1;\n" +	"	int m_unusedPadding2;\n" +	"};\n" +	"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" +	"struct b3ConvexPolyhedronData\n" +	"{\n" +	"	b3Float4		m_localCenter;\n" +	"	b3Float4		m_extents;\n" +	"	b3Float4		mC;\n" +	"	b3Float4		mE;\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"};\n" +	"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_COLLIDABLE_H\n" +	"#define B3_COLLIDABLE_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"enum b3ShapeTypes\n" +	"{\n" +	"	SHAPE_HEIGHT_FIELD=1,\n" +	"	SHAPE_CONVEX_HULL=3,\n" +	"	SHAPE_PLANE=4,\n" +	"	SHAPE_CONCAVE_TRIMESH=5,\n" +	"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +	"	SHAPE_SPHERE=7,\n" +	"	MAX_NUM_SHAPE_TYPES,\n" +	"};\n" +	"typedef struct b3Collidable b3Collidable_t;\n" +	"struct b3Collidable\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"};\n" +	"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" +	"struct b3GpuChildShape\n" +	"{\n" +	"	b3Float4	m_childPosition;\n" +	"	b3Quat		m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"struct b3CompoundOverlappingPair\n" +	"{\n" +	"	int m_bodyIndexA;\n" +	"	int m_bodyIndexB;\n" +	"//	int	m_pairType;\n" +	"	int m_childShapeIndexA;\n" +	"	int m_childShapeIndexB;\n" +	"};\n" +	"#endif //B3_COLLIDABLE_H\n" +	"#ifndef B3_RIGIDBODY_DATA_H\n" +	"#define B3_RIGIDBODY_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +	"struct b3RigidBodyData\n" +	"{\n" +	"	b3Float4				m_pos;\n" +	"	b3Quat					m_quat;\n" +	"	b3Float4				m_linVel;\n" +	"	b3Float4				m_angVel;\n" +	"	int 					m_collidableIdx;\n" +	"	float 				m_invMass;\n" +	"	float 				m_restituitionCoeff;\n" +	"	float 				m_frictionCoeff;\n" +	"};\n" +	"typedef struct b3InertiaData b3InertiaData_t;\n" +	"struct b3InertiaData\n" +	"{\n" +	"	b3Mat3x3 m_invInertiaWorld;\n" +	"	b3Mat3x3 m_initInvInertia;\n" +	"};\n" +	"#endif //B3_RIGIDBODY_DATA_H\n" +	"	\n" +	"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +	"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +	"#define make_float4 (float4)\n" +	"#define make_float2 (float2)\n" +	"#define make_uint4 (uint4)\n" +	"#define make_int4 (int4)\n" +	"#define make_uint2 (uint2)\n" +	"#define make_int2 (int2)\n" +	"__inline\n" +	"float fastDiv(float numerator, float denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"//	return numerator/denominator;	\n" +	"}\n" +	"__inline\n" +	"float4 fastDiv4(float4 numerator, float4 denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"}\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"}\n" +	"//#define dot3F4 dot\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" +	"{\n" +	"	return make_float4(	a.x + (b.x - a.x) * t,\n" +	"						a.y + (b.y - a.y) * t,\n" +	"						a.z + (b.z - a.z) * t,\n" +	"						0.f);\n" +	"}\n" +	"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +	"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" +	"{\n" +	"	\n" +	"	int ve;\n" +	"	float ds, de;\n" +	"	int numVertsOut = 0;\n" +	"    //double-check next test\n" +	"    	if (numVertsIn < 2)\n" +	"    		return 0;\n" +	"    \n" +	"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" +	"	float4 endVertex = pVtxIn[0];\n" +	"	\n" +	"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +	"    \n" +	"	for (ve = 0; ve < numVertsIn; ve++)\n" +	"	{\n" +	"		endVertex=pVtxIn[ve];\n" +	"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +	"		if (ds<0)\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start < 0, end < 0, so output endVertex\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"			else\n" +	"			{\n" +	"				// Start < 0, end >= 0, so output intersection\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"			}\n" +	"		}\n" +	"		else\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start >= 0, end < 0 so output intersection and end\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"		}\n" +	"		firstVertex = endVertex;\n" +	"		ds = de;\n" +	"	}\n" +	"	return numVertsOut;\n" +	"}\n" +	"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +	"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" +	"{\n" +	"	\n" +	"	int ve;\n" +	"	float ds, de;\n" +	"	int numVertsOut = 0;\n" +	"//double-check next test\n" +	"	if (numVertsIn < 2)\n" +	"		return 0;\n" +	"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" +	"	float4 endVertex = pVtxIn[0];\n" +	"	\n" +	"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +	"	for (ve = 0; ve < numVertsIn; ve++)\n" +	"	{\n" +	"		endVertex=pVtxIn[ve];\n" +	"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +	"		if (ds<0)\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start < 0, end < 0, so output endVertex\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"			else\n" +	"			{\n" +	"				// Start < 0, end >= 0, so output intersection\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"			}\n" +	"		}\n" +	"		else\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start >= 0, end < 0 so output intersection and end\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"		}\n" +	"		firstVertex = endVertex;\n" +	"		ds = de;\n" +	"	}\n" +	"	return numVertsOut;\n" +	"}\n" +	"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,  \n" +	"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +	"	float4* worldVertsB2, int capacityWorldVertsB2,\n" +	"	const float minDist, float maxDist,\n" +	"	__global const float4* vertices,\n" +	"	__global const b3GpuFace_t* faces,\n" +	"	__global const int* indices,\n" +	"	float4* contactsOut,\n" +	"	int contactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	float4* pVtxIn = worldVertsB1;\n" +	"	float4* pVtxOut = worldVertsB2;\n" +	"	\n" +	"	int numVertsIn = numWorldVertsB1;\n" +	"	int numVertsOut = 0;\n" +	"	int closestFaceA=-1;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"				faces[hullA->m_faceOffset+face].m_plane.x, \n" +	"				faces[hullA->m_faceOffset+face].m_plane.y, \n" +	"				faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"		\n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceA<0)\n" +	"		return numContactsOut;\n" +	"	b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" +	"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"	int numVerticesA = polyA.m_numIndices;\n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"		const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" +	"		const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +	"		const float4 edge0 = a - b;\n" +	"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +	"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +	"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"		float4 worldA1 = transform(&a,&posA,&ornA);\n" +	"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"		\n" +	"		float4 planeNormalWS = planeNormalWS1;\n" +	"		float planeEqWS=planeEqWS1;\n" +	"		\n" +	"		//clip face\n" +	"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +	"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +	"		//btSwap(pVtxIn,pVtxOut);\n" +	"		float4* tmp = pVtxOut;\n" +	"		pVtxOut = pVtxIn;\n" +	"		pVtxIn = tmp;\n" +	"		numVertsIn = numVertsOut;\n" +	"		numVertsOut = 0;\n" +	"	}\n" +	"	\n" +	"	// only keep points that are behind the witness face\n" +	"	{\n" +	"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float localPlaneEq = polyA.m_plane.w;\n" +	"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +	"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +	"		for (int i=0;i<numVertsIn;i++)\n" +	"		{\n" +	"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"			if (depth <=minDist)\n" +	"			{\n" +	"				depth = minDist;\n" +	"			}\n" +	"			if (depth <=maxDist)\n" +	"			{\n" +	"				float4 pointInWorld = pVtxIn[i];\n" +	"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" +	"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,  \n" +	"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +	"	float4* worldVertsB2, int capacityWorldVertsB2,\n" +	"	const float minDist, float maxDist,\n" +	"	const float4* verticesA,\n" +	"	const b3GpuFace_t* facesA,\n" +	"	const int* indicesA,\n" +	"	__global const float4* verticesB,\n" +	"	__global const b3GpuFace_t* facesB,\n" +	"	__global const int* indicesB,\n" +	"	float4* contactsOut,\n" +	"	int contactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	float4* pVtxIn = worldVertsB1;\n" +	"	float4* pVtxOut = worldVertsB2;\n" +	"	\n" +	"	int numVertsIn = numWorldVertsB1;\n" +	"	int numVertsOut = 0;\n" +	"	int closestFaceA=-1;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"				facesA[hullA->m_faceOffset+face].m_plane.x, \n" +	"				facesA[hullA->m_faceOffset+face].m_plane.y, \n" +	"				facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"		\n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceA<0)\n" +	"		return numContactsOut;\n" +	"	b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" +	"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"	int numVerticesA = polyA.m_numIndices;\n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"		const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" +	"		const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +	"		const float4 edge0 = a - b;\n" +	"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +	"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +	"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"		float4 worldA1 = transform(&a,&posA,&ornA);\n" +	"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"		\n" +	"		float4 planeNormalWS = planeNormalWS1;\n" +	"		float planeEqWS=planeEqWS1;\n" +	"		\n" +	"		//clip face\n" +	"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +	"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +	"		//btSwap(pVtxIn,pVtxOut);\n" +	"		float4* tmp = pVtxOut;\n" +	"		pVtxOut = pVtxIn;\n" +	"		pVtxIn = tmp;\n" +	"		numVertsIn = numVertsOut;\n" +	"		numVertsOut = 0;\n" +	"	}\n" +	"	\n" +	"	// only keep points that are behind the witness face\n" +	"	{\n" +	"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float localPlaneEq = polyA.m_plane.w;\n" +	"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +	"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +	"		for (int i=0;i<numVertsIn;i++)\n" +	"		{\n" +	"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"			if (depth <=minDist)\n" +	"			{\n" +	"				depth = minDist;\n" +	"			}\n" +	"			if (depth <=maxDist)\n" +	"			{\n" +	"				float4 pointInWorld = pVtxIn[i];\n" +	"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" +	"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"int	clipHullAgainstHull(const float4 separatingNormal,\n" +	"	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +	"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +	"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +	"	const float minDist, float maxDist,\n" +	"	__global const float4* vertices,\n" +	"	__global const b3GpuFace_t* faces,\n" +	"	__global const int* indices,\n" +	"	float4*	localContactsOut,\n" +	"	int localContactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"	int closestFaceB=-1;\n" +	"	float dmax = -FLT_MAX;\n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" +	"				faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	{\n" +	"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +	"		const int numVertices = polyB.m_numIndices;\n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +	"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceB>=0)\n" +	"	{\n" +	"		numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" +	"				posA,ornA,\n" +	"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" +	"				faces,\n" +	"				indices,localContactsOut,localContactCapacity);\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"int	clipHullAgainstHullLocalA(const float4 separatingNormal,\n" +	"	const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +	"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +	"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +	"	const float minDist, float maxDist,\n" +	"	const float4* verticesA,\n" +	"	const b3GpuFace_t* facesA,\n" +	"	const int* indicesA,\n" +	"	__global const float4* verticesB,\n" +	"	__global const b3GpuFace_t* facesB,\n" +	"	__global const int* indicesB,\n" +	"	float4*	localContactsOut,\n" +	"	int localContactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"	int closestFaceB=-1;\n" +	"	float dmax = -FLT_MAX;\n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" +	"				facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	{\n" +	"		const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +	"		const int numVertices = polyB.m_numIndices;\n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"			const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +	"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceB>=0)\n" +	"	{\n" +	"		numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" +	"				posA,ornA,\n" +	"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" +	"				verticesA,facesA,indicesA,\n" +	"				verticesB,facesB,indicesB,\n" +	"				localContactsOut,localContactCapacity);\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" +	"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" +	"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" +	"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" +	"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" +	"{\n" +	"	if( nPoints == 0 )\n" +	"        return 0;\n" +	"    \n" +	"    if (nPoints <=4)\n" +	"        return nPoints;\n" +	"    \n" +	"    \n" +	"    if (nPoints >64)\n" +	"        nPoints = 64;\n" +	"    \n" +	"	float4 center = make_float4(0.f);\n" +	"	{\n" +	"		\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"			center += p[i];\n" +	"		center /= (float)nPoints;\n" +	"	}\n" +	"    \n" +	"	\n" +	"    \n" +	"	//	sample 4 directions\n" +	"    \n" +	"    float4 aVector = p[0] - center;\n" +	"    float4 u = cross3( nearNormal, aVector );\n" +	"    float4 v = cross3( nearNormal, u );\n" +	"    u = normalize3( u );\n" +	"    v = normalize3( v );\n" +	"    \n" +	"    \n" +	"    //keep point with deepest penetration\n" +	"    float minW= FLT_MAX;\n" +	"    \n" +	"    int minIndex=-1;\n" +	"    \n" +	"    float4 maxDots;\n" +	"    maxDots.x = FLT_MIN;\n" +	"    maxDots.y = FLT_MIN;\n" +	"    maxDots.z = FLT_MIN;\n" +	"    maxDots.w = FLT_MIN;\n" +	"    \n" +	"    //	idx, distance\n" +	"    for(int ie = 0; ie<nPoints; ie++ )\n" +	"    {\n" +	"        if (p[ie].w<minW)\n" +	"        {\n" +	"            minW = p[ie].w;\n" +	"            minIndex=ie;\n" +	"        }\n" +	"        float f;\n" +	"        float4 r = p[ie]-center;\n" +	"        f = dot3F4( u, r );\n" +	"        if (f<maxDots.x)\n" +	"        {\n" +	"            maxDots.x = f;\n" +	"            contactIdx[0].x = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -u, r );\n" +	"        if (f<maxDots.y)\n" +	"        {\n" +	"            maxDots.y = f;\n" +	"            contactIdx[0].y = ie;\n" +	"        }\n" +	"        \n" +	"        \n" +	"        f = dot3F4( v, r );\n" +	"        if (f<maxDots.z)\n" +	"        {\n" +	"            maxDots.z = f;\n" +	"            contactIdx[0].z = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -v, r );\n" +	"        if (f<maxDots.w)\n" +	"        {\n" +	"            maxDots.w = f;\n" +	"            contactIdx[0].w = ie;\n" +	"        }\n" +	"        \n" +	"    }\n" +	"    \n" +	"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" +	"    {\n" +	"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" +	"        contactIdx[0].x = minIndex;\n" +	"    }\n" +	"    \n" +	"    return 4;\n" +	"    \n" +	"}\n" +	"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" +	"{\n" +	"    contactIdx[0] = 0;\n" +	"    contactIdx[1] = 1;\n" +	"    contactIdx[2] = 2;\n" +	"    contactIdx[3] = 3;\n" +	"    \n" +	"	if( nPoints == 0 ) return 0;\n" +	"    \n" +	"	nPoints = min2( nPoints, 4 );\n" +	"    return nPoints;\n" +	"    \n" +	"}\n" +	"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" +	"{\n" +	"	if( nPoints == 0 ) return 0;\n" +	"	nPoints = min2( nPoints, 64 );\n" +	"	float4 center = make_float4(0.f);\n" +	"	{\n" +	"		float4 v[64];\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"			v[i] = p[i];\n" +	"		//memcpy( v, p, nPoints*sizeof(float4) );\n" +	"		PARALLEL_SUM( v, nPoints );\n" +	"		center = v[0]/(float)nPoints;\n" +	"	}\n" +	"	\n" +	"	{	//	sample 4 directions\n" +	"		if( nPoints < 4 )\n" +	"		{\n" +	"			for(int i=0; i<nPoints; i++) \n" +	"				contactIdx[i] = i;\n" +	"			return nPoints;\n" +	"		}\n" +	"		float4 aVector = p[0] - center;\n" +	"		float4 u = cross3( nearNormal, aVector );\n" +	"		float4 v = cross3( nearNormal, u );\n" +	"		u = normalize3( u );\n" +	"		v = normalize3( v );\n" +	"		int idx[4];\n" +	"		float2 max00 = make_float2(0,FLT_MAX);\n" +	"		{\n" +	"			//	idx, distance\n" +	"			{\n" +	"				{\n" +	"					int4 a[64];\n" +	"					for(int ie = 0; ie<nPoints; ie++ )\n" +	"					{\n" +	"						\n" +	"						\n" +	"						float f;\n" +	"						float4 r = p[ie]-center;\n" +	"						f = dot3F4( u, r );\n" +	"						a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"						f = dot3F4( -u, r );\n" +	"						a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"						f = dot3F4( v, r );\n" +	"						a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"						f = dot3F4( -v, r );\n" +	"						a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"					}\n" +	"					for(int ie=0; ie<nPoints; ie++)\n" +	"					{\n" +	"						a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" +	"						a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" +	"						a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" +	"						a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" +	"					}\n" +	"					idx[0] = (int)a[0].x & 0xff;\n" +	"					idx[1] = (int)a[0].y & 0xff;\n" +	"					idx[2] = (int)a[0].z & 0xff;\n" +	"					idx[3] = (int)a[0].w & 0xff;\n" +	"				}\n" +	"			}\n" +	"			{\n" +	"				float2 h[64];\n" +	"				PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" +	"				REDUCE_MIN( h, nPoints );\n" +	"				max00 = h[0];\n" +	"			}\n" +	"		}\n" +	"		contactIdx[0] = idx[0];\n" +	"		contactIdx[1] = idx[1];\n" +	"		contactIdx[2] = idx[2];\n" +	"		contactIdx[3] = idx[3];\n" +	"		return 4;\n" +	"	}\n" +	"}\n" +	"__kernel void   extractManifoldAndAddContactKernel(__global const int4* pairs, \n" +	"																	__global const b3RigidBodyData_t* rigidBodies, \n" +	"																	__global const float4* closestPointsWorld,\n" +	"																	__global const float4* separatingNormalsWorld,\n" +	"																	__global const int* contactCounts,\n" +	"																	__global const int* contactOffsets,\n" +	"																	__global struct b3Contact4Data* restrict contactsOut,\n" +	"																	counter32_t nContactsOut,\n" +	"																	int contactCapacity,\n" +	"																	int numPairs,\n" +	"																	int pairIndex\n" +	"																	)\n" +	"{\n" +	"	int idx = get_global_id(0);\n" +	"	\n" +	"	if (idx<numPairs)\n" +	"	{\n" +	"		float4 normal = separatingNormalsWorld[idx];\n" +	"		int nPoints = contactCounts[idx];\n" +	"		__global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" +	"		float4 localPoints[64];\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"		{\n" +	"			localPoints[i] = pointsIn[i];\n" +	"		}\n" +	"		int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"		contactIdx[0] = -1;\n" +	"		contactIdx[1] = -1;\n" +	"		contactIdx[2] = -1;\n" +	"		contactIdx[3] = -1;\n" +	"		int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" +	"		int dstIdx;\n" +	"		AppendInc( nContactsOut, dstIdx );\n" +	"		if (dstIdx<contactCapacity)\n" +	"		{\n" +	"			__global struct b3Contact4Data* c = contactsOut + dstIdx;\n" +	"			c->m_worldNormalOnB = -normal;\n" +	"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"			c->m_batchIdx = idx;\n" +	"			int bodyA = pairs[pairIndex].x;\n" +	"			int bodyB = pairs[pairIndex].y;\n" +	"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +	"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +	"			c->m_childIndexA = -1;\n" +	"			c->m_childIndexB = -1;\n" +	"			for (int i=0;i<nContacts;i++)\n" +	"			{\n" +	"				c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" +	"			}\n" +	"			GET_NPOINTS(*c) = nContacts;\n" +	"		}\n" +	"	}\n" +	"}\n" +	"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtInvert(orientationIn);\n" +	"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" +	"}\n" +	"void	trMul(float4 translationA, Quaternion orientationA,\n" +	"						float4 translationB, Quaternion orientationB,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtMul(orientationA,orientationB);\n" +	"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" +	"}\n" +	"__kernel void   clipHullHullKernel( __global int4* pairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const b3GpuFace_t* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const float4* separatingNormals,\n" +	"																					__global const int* hasSeparatingAxis,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int numPairs,\n" +	"																					int contactCapacity)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"			\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"		\n" +	"			int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" +	"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +	"														rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +	"													  rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +	"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +	"														minDist, maxDist,\n" +	"														vertices,faces,indices,\n" +	"														localContactsOut,localContactCapacity);\n" +	"												\n" +	"		if (numLocalContactsOut>0)\n" +	"		{\n" +	"				float4 normal = -separatingNormals[i];\n" +	"				int nPoints = numLocalContactsOut;\n" +	"				float4* pointsIn = localContactsOut;\n" +	"				int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"				contactIdx[0] = -1;\n" +	"				contactIdx[1] = -1;\n" +	"				contactIdx[2] = -1;\n" +	"				contactIdx[3] = -1;\n" +	"		\n" +	"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +	"		\n" +	"				\n" +	"				int mprContactIndex = pairs[pairIndex].z;\n" +	"				int dstIdx = mprContactIndex;\n" +	"				if (dstIdx<0)\n" +	"				{\n" +	"					AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				}\n" +	"				if (dstIdx<contactCapacity)\n" +	"				{\n" +	"					pairs[pairIndex].z = dstIdx;\n" +	"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +	"					c->m_worldNormalOnB = -normal;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = -1;\n" +	"					for (int i=0;i<nReducedContacts;i++)\n" +	"					{\n" +	"					//this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" +	"						if (i>0||(mprContactIndex<0))\n" +	"						{\n" +	"							c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +	"						}\n" +	"					}\n" +	"					GET_NPOINTS(*c) = nReducedContacts;\n" +	"				}\n" +	"				\n" +	"			}//		if (numContactsOut>0)\n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"}\n" +	"__kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const b3GpuFace_t* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" +	"																					__global const float4* gpuCompoundSepNormalsOut,\n" +	"																					__global const int* gpuHasCompoundSepNormalsOut,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int numCompoundPairs, int maxContactCapacity)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numCompoundPairs)\n" +	"	{\n" +	"		if (gpuHasCompoundSepNormalsOut[i])\n" +	"		{\n" +	"			int bodyIndexA = gpuCompoundPairs[i].x;\n" +	"			int bodyIndexB = gpuCompoundPairs[i].y;\n" +	"			\n" +	"			int childShapeIndexA = gpuCompoundPairs[i].z;\n" +	"			int childShapeIndexB = gpuCompoundPairs[i].w;\n" +	"			\n" +	"			int collidableIndexA = -1;\n" +	"			int collidableIndexB = -1;\n" +	"			\n" +	"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			\n" +	"			float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"								\n" +	"			if (childShapeIndexA >= 0)\n" +	"			{\n" +	"				collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"				float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"				float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"				float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"				float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"				posA = newPosA;\n" +	"				ornA = newOrnA;\n" +	"			} else\n" +	"			{\n" +	"				collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			}\n" +	"			\n" +	"			if (childShapeIndexB>=0)\n" +	"			{\n" +	"				collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"				float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"				float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"				float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"				float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"				posB = newPosB;\n" +	"				ornB = newOrnB;\n" +	"			} else\n" +	"			{\n" +	"				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" +	"			}\n" +	"			\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"			int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" +	"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +	"														posA,ornA,\n" +	"													  posB,ornB,\n" +	"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +	"														minDist, maxDist,\n" +	"														vertices,faces,indices,\n" +	"														localContactsOut,localContactCapacity);\n" +	"												\n" +	"		if (numLocalContactsOut>0)\n" +	"		{\n" +	"				float4 normal = -gpuCompoundSepNormalsOut[i];\n" +	"				int nPoints = numLocalContactsOut;\n" +	"				float4* pointsIn = localContactsOut;\n" +	"				int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"				contactIdx[0] = -1;\n" +	"				contactIdx[1] = -1;\n" +	"				contactIdx[2] = -1;\n" +	"				contactIdx[3] = -1;\n" +	"		\n" +	"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +	"		\n" +	"				int dstIdx;\n" +	"				AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +	"					c->m_worldNormalOnB = -normal;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = gpuCompoundPairs[pairIndex].x;\n" +	"					int bodyB = gpuCompoundPairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_childIndexA = childShapeIndexA;\n" +	"					c->m_childIndexB = childShapeIndexB;\n" +	"					for (int i=0;i<nReducedContacts;i++)\n" +	"					{\n" +	"						c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +	"					}\n" +	"					GET_NPOINTS(*c) = nReducedContacts;\n" +	"				}\n" +	"				\n" +	"			}//		if (numContactsOut>0)\n" +	"		}//		if (gpuHasCompoundSepNormalsOut[i])\n" +	"	}//	if (i<numCompoundPairs)\n" +	"}\n" +	"__kernel void   sphereSphereCollisionKernel( __global const int4* pairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const float4* separatingNormals,\n" +	"																					__global const int* hasSeparatingAxis,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int contactCapacity,\n" +	"																					int numPairs)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"			//sphere-sphere\n" +	"			float radiusA = collidables[collidableIndexA].m_radius;\n" +	"			float radiusB = collidables[collidableIndexB].m_radius;\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			float4 diff = posA-posB;\n" +	"			float len = length(diff);\n" +	"			\n" +	"			///iff distance positive, don't generate a new contact\n" +	"			if ( len <= (radiusA+radiusB))\n" +	"			{\n" +	"				///distance (negative means penetration)\n" +	"				float dist = len - (radiusA+radiusB);\n" +	"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" +	"				if (len > 0.00001)\n" +	"				{\n" +	"					normalOnSurfaceB = diff / len;\n" +	"				}\n" +	"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +	"				contactPosB.w = dist;\n" +	"								\n" +	"				int dstIdx;\n" +	"				AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				if (dstIdx < contactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = -normalOnSurfaceB;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_worldPosB[0] = contactPosB;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = -1;\n" +	"					GET_NPOINTS(*c) = 1;\n" +	"				}//if (dstIdx < numPairs)\n" +	"			}//if ( len <= (radiusA+radiusB))\n" +	"		}//SHAPE_SPHERE SHAPE_SPHERE\n" +	"	}//if (i<numPairs)\n" +	"}				\n" +	"__kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const b3GpuFace_t* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" +	"																					__global const float4* separatingNormals,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int contactCapacity,\n" +	"																					int numConcavePairs)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numConcavePairs)\n" +	"	{\n" +	"		//negative value means that the pair is invalid\n" +	"		if (concavePairsIn[i].w<0)\n" +	"			return;\n" +	"		int bodyIndexA = concavePairsIn[i].x;\n" +	"		int bodyIndexB = concavePairsIn[i].y;\n" +	"		int f = concavePairsIn[i].z;\n" +	"		int childShapeIndexA = f;\n" +	"		\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"		///////////////////////////////////////////////////////////////\n" +	"		\n" +	"	\n" +	"		bool overlap = false;\n" +	"		\n" +	"		b3ConvexPolyhedronData_t convexPolyhedronA;\n" +	"	//add 3 vertices of the triangle\n" +	"		convexPolyhedronA.m_numVertices = 3;\n" +	"		convexPolyhedronA.m_vertexOffset = 0;\n" +	"		float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"		b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"		\n" +	"		float4 verticesA[3];\n" +	"		for (int i=0;i<3;i++)\n" +	"		{\n" +	"			int index = indices[face.m_indexOffset+i];\n" +	"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"			verticesA[i] = vert;\n" +	"			localCenter += vert;\n" +	"		}\n" +	"		float dmin = FLT_MAX;\n" +	"		int localCC=0;\n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"                                  \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"                             \n" +	"		b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot3F4(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                                            \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot3F4(edgeNormal,v0);\n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                                            \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 sepAxis = separatingNormals[i];\n" +	"		\n" +	"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"		int childShapeIndexB =-1;\n" +	"		if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			///////////////////\n" +	"			///compound shape support\n" +	"			\n" +	"			childShapeIndexB = concavePairsIn[pairIndex].w;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			\n" +	"		}\n" +	"		\n" +	"		////////////////////////////////////////\n" +	"		\n" +	"		\n" +	"		\n" +	"		int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" +	"														&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"														posA,ornA,\n" +	"													  posB,ornB,\n" +	"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +	"														minDist, maxDist,\n" +	"														&verticesA,&facesA,&indicesA,\n" +	"														vertices,faces,indices,\n" +	"														localContactsOut,localContactCapacity);\n" +	"												\n" +	"		if (numLocalContactsOut>0)\n" +	"		{\n" +	"			float4 normal = -separatingNormals[i];\n" +	"			int nPoints = numLocalContactsOut;\n" +	"			float4* pointsIn = localContactsOut;\n" +	"			int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"			contactIdx[0] = -1;\n" +	"			contactIdx[1] = -1;\n" +	"			contactIdx[2] = -1;\n" +	"			contactIdx[3] = -1;\n" +	"	\n" +	"			int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +	"	\n" +	"			int dstIdx;\n" +	"			AppendInc( nGlobalContactsOut, dstIdx );\n" +	"			if (dstIdx<contactCapacity)\n" +	"			{\n" +	"				__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +	"				c->m_worldNormalOnB = -normal;\n" +	"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"				c->m_batchIdx = pairIndex;\n" +	"				int bodyA = concavePairsIn[pairIndex].x;\n" +	"				int bodyB = concavePairsIn[pairIndex].y;\n" +	"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"				c->m_childIndexA = childShapeIndexA;\n" +	"				c->m_childIndexB = childShapeIndexB;\n" +	"				for (int i=0;i<nReducedContacts;i++)\n" +	"				{\n" +	"					c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +	"				}\n" +	"				GET_NPOINTS(*c) = nReducedContacts;\n" +	"			}\n" +	"				\n" +	"		}//		if (numContactsOut>0)\n" +	"	}//	if (i<numPairs)\n" +	"}\n" +	"int	findClippingFaces(const float4 separatingNormal,\n" +	"                      __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" +	"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +	"                       __global float4* worldVertsA1,\n" +	"                      __global float4* worldNormalsA1,\n" +	"                      __global float4* worldVertsB1,\n" +	"                      int capacityWorldVerts,\n" +	"                      const float minDist, float maxDist,\n" +	"                      __global const float4* vertices,\n" +	"                      __global const b3GpuFace_t* faces,\n" +	"                      __global const int* indices,\n" +	"                      __global int4* clippingFaces, int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"    \n" +	"    \n" +	"	int closestFaceB=-1;\n" +	"	float dmax = -FLT_MAX;\n" +	"    \n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" +	"                                              faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"	{\n" +	"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +	"		const int numVertices = polyB.m_numIndices;\n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +	"			worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int closestFaceA=-1;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"                                              faces[hullA->m_faceOffset+face].m_plane.x,\n" +	"                                              faces[hullA->m_faceOffset+face].m_plane.y,\n" +	"                                              faces[hullA->m_faceOffset+face].m_plane.z,\n" +	"                                              0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"            \n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"                worldNormalsA1[pairIndex] = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"        const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +	"        worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +	"    }\n" +	"    \n" +	"    clippingFaces[pairIndex].x = closestFaceA;\n" +	"    clippingFaces[pairIndex].y = closestFaceB;\n" +	"    clippingFaces[pairIndex].z = numVerticesA;\n" +	"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" +	"    \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"int clipFaces(__global float4* worldVertsA1,\n" +	"              __global float4* worldNormalsA1,\n" +	"              __global float4* worldVertsB1,\n" +	"              __global float4* worldVertsB2, \n" +	"              int capacityWorldVertsB2,\n" +	"              const float minDist, float maxDist,\n" +	"              __global int4* clippingFaces,\n" +	"              int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"    \n" +	"    int closestFaceA = clippingFaces[pairIndex].x;\n" +	"    int closestFaceB = clippingFaces[pairIndex].y;\n" +	"	int numVertsInA = clippingFaces[pairIndex].z;\n" +	"	int numVertsInB = clippingFaces[pairIndex].w;\n" +	"    \n" +	"	int numVertsOut = 0;\n" +	"    \n" +	"	if (closestFaceA<0)\n" +	"		return numContactsOut;\n" +	"    \n" +	"    __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +	"    __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +	"    \n" +	"    \n" +	"	\n" +	"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"    \n" +	"	for(int e0=0;e0<numVertsInA;e0++)\n" +	"	{\n" +	"		const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" +	"		const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" +	"		const float4 WorldEdge0 = aw - bw;\n" +	"		float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" +	"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"		float4 worldA1 = aw;\n" +	"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"		float4 planeNormalWS = planeNormalWS1;\n" +	"		float planeEqWS=planeEqWS1;\n" +	"		numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" +	"		__global float4* tmp = pVtxOut;\n" +	"		pVtxOut = pVtxIn;\n" +	"		pVtxIn = tmp;\n" +	"		numVertsInB = numVertsOut;\n" +	"		numVertsOut = 0;\n" +	"	}\n" +	"    \n" +	"    //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +	"    //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +	"    \n" +	"    /*for (int i=0;i<numVertsInB;i++)\n" +	"    {\n" +	"        pVtxOut[i] = pVtxIn[i];\n" +	"    }*/\n" +	"    \n" +	"    \n" +	"    \n" +	"    \n" +	"    //numVertsInB=0;\n" +	"	\n" +	"    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +	"    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +	"    for (int i=0;i<numVertsInB;i++)\n" +	"    {\n" +	"        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"        if (depth <=minDist)\n" +	"        {\n" +	"            depth = minDist;\n" +	"        }\n" +	"        \n" +	"        if (depth <=maxDist)\n" +	"        {\n" +	"            float4 pointInWorld = pVtxIn[i];\n" +	"            pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"        }\n" +	"    }\n" +	"   \n" +	"    clippingFaces[pairIndex].w =numContactsOut;\n" +	"   \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"__kernel void   findClippingFacesKernel(  __global const int4* pairs,\n" +	"                                        __global const b3RigidBodyData_t* rigidBodies,\n" +	"                                        __global const b3Collidable_t* collidables,\n" +	"                                        __global const b3ConvexPolyhedronData_t* convexShapes,\n" +	"                                        __global const float4* vertices,\n" +	"                                        __global const float4* uniqueEdges,\n" +	"                                        __global const b3GpuFace_t* faces,\n" +	"                                        __global const int* indices,\n" +	"                                        __global const float4* separatingNormals,\n" +	"                                        __global const int* hasSeparatingAxis,\n" +	"                                        __global int4* clippingFacesOut,\n" +	"                                        __global float4* worldVertsA1,\n" +	"                                        __global float4* worldNormalsA1,\n" +	"                                        __global float4* worldVertsB1,\n" +	"                                        int capacityWorldVerts,\n" +	"                                        int numPairs\n" +	"                                        )\n" +	"{\n" +	"    \n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"    \n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"    \n" +	"	if (i<numPairs)\n" +	"	{\n" +	"        \n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"            \n" +	"			int bodyIndexA = pairs[i].x;\n" +	"			int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"			\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"            \n" +	"            \n" +	"			int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" +	"                                                        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +	"                                                        rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +	"                                                        rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +	"                                                        worldVertsA1,\n" +	"                                                        worldNormalsA1,\n" +	"                                                        worldVertsB1,capacityWorldVerts,\n" +	"                                                        minDist, maxDist,\n" +	"                                                        vertices,faces,indices,\n" +	"                                                        clippingFacesOut,i);\n" +	"            \n" +	"            \n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"    \n" +	"}\n" +	"__kernel void   clipFacesAndFindContactsKernel(    __global const float4* separatingNormals,\n" +	"                                                   __global const int* hasSeparatingAxis,\n" +	"                                                   __global int4* clippingFacesOut,\n" +	"                                                   __global float4* worldVertsA1,\n" +	"                                                   __global float4* worldNormalsA1,\n" +	"                                                   __global float4* worldVertsB1,\n" +	"                                                   __global float4* worldVertsB2,\n" +	"                                                    int vertexFaceCapacity,\n" +	"                                                   int numPairs,\n" +	"					                                        int debugMode\n" +	"                                                   )\n" +	"{\n" +	"    int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"    \n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"    \n" +	"	if (i<numPairs)\n" +	"	{\n" +	"        \n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"            \n" +	"//			int bodyIndexA = pairs[i].x;\n" +	"	//		int bodyIndexB = pairs[i].y;\n" +	"		    \n" +	"            int numLocalContactsOut = 0;\n" +	"            int capacityWorldVertsB2 = vertexFaceCapacity;\n" +	"            \n" +	"            __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +	"            __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +	"            \n" +	"            {\n" +	"                __global int4* clippingFaces = clippingFacesOut;\n" +	"            \n" +	"                \n" +	"                int closestFaceA = clippingFaces[pairIndex].x;\n" +	"                int closestFaceB = clippingFaces[pairIndex].y;\n" +	"                int numVertsInA = clippingFaces[pairIndex].z;\n" +	"                int numVertsInB = clippingFaces[pairIndex].w;\n" +	"                \n" +	"                int numVertsOut = 0;\n" +	"                \n" +	"                if (closestFaceA>=0)\n" +	"                {\n" +	"                    \n" +	"                    \n" +	"                    \n" +	"                    // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"                    \n" +	"                    for(int e0=0;e0<numVertsInA;e0++)\n" +	"                    {\n" +	"                        const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" +	"                        const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" +	"                        const float4 WorldEdge0 = aw - bw;\n" +	"                        float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" +	"                        float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"                        float4 worldA1 = aw;\n" +	"                        float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"                        float4 planeNormalWS = planeNormalWS1;\n" +	"                        float planeEqWS=planeEqWS1;\n" +	"                        numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" +	"                        __global float4* tmp = pVtxOut;\n" +	"                        pVtxOut = pVtxIn;\n" +	"                        pVtxIn = tmp;\n" +	"                        numVertsInB = numVertsOut;\n" +	"                        numVertsOut = 0;\n" +	"                    }\n" +	"                    \n" +	"                    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +	"                    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +	"                    \n" +	"                    for (int i=0;i<numVertsInB;i++)\n" +	"                    {\n" +	"                        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"                        if (depth <=minDist)\n" +	"                        {\n" +	"                            depth = minDist;\n" +	"                        }\n" +	"                        \n" +	"                        if (depth <=maxDist)\n" +	"                        {\n" +	"                            float4 pointInWorld = pVtxIn[i];\n" +	"                            pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"                        }\n" +	"                    }\n" +	"                    \n" +	"                }\n" +	"                clippingFaces[pairIndex].w =numLocalContactsOut;\n" +	"                \n" +	"            }\n" +	"            \n" +	"            for (int i=0;i<numLocalContactsOut;i++)\n" +	"                pVtxIn[i] = pVtxOut[i];\n" +	"                \n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"    \n" +	"}\n" +	"__kernel void   newContactReductionKernel( __global int4* pairs,\n" +	"                                                   __global const b3RigidBodyData_t* rigidBodies,\n" +	"                                                   __global const float4* separatingNormals,\n" +	"                                                   __global const int* hasSeparatingAxis,\n" +	"                                                   __global struct b3Contact4Data* globalContactsOut,\n" +	"                                                   __global int4* clippingFaces,\n" +	"                                                   __global float4* worldVertsB2,\n" +	"                                                   volatile __global int* nGlobalContactsOut,\n" +	"                                                   int vertexFaceCapacity,\n" +	"												   int contactCapacity,\n" +	"                                                   int numPairs\n" +	"                                                   )\n" +	"{\n" +	"    int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"    int4 contactIdx;\n" +	"    contactIdx=make_int4(0,1,2,3);\n" +	"    \n" +	"	if (i<numPairs)\n" +	"	{\n" +	"        \n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"            \n" +	"			\n" +	"            \n" +	"            \n" +	"			int nPoints = clippingFaces[pairIndex].w;\n" +	"           \n" +	"            if (nPoints>0)\n" +	"            {\n" +	"                 __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" +	"                float4 normal = -separatingNormals[i];\n" +	"                \n" +	"                int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" +	"            \n" +	"				int mprContactIndex = pairs[pairIndex].z;\n" +	"                int dstIdx = mprContactIndex;\n" +	"				if (dstIdx<0)\n" +	"				{\n" +	"	                AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				}\n" +	"//#if 0\n" +	"                \n" +	"				if (dstIdx < contactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = -normal;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					pairs[pairIndex].w = dstIdx;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"                    c->m_childIndexA =-1;\n" +	"					c->m_childIndexB =-1;\n" +	"                    switch (nReducedContacts)\n" +	"                    {\n" +	"                        case 4:\n" +	"                            c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" +	"                        case 3:\n" +	"                            c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" +	"                        case 2:\n" +	"                            c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" +	"                        case 1:\n" +	"							if (mprContactIndex<0)//test\n" +	"	                            c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" +	"                        default:\n" +	"                        {\n" +	"                        }\n" +	"                    };\n" +	"                    \n" +	"					GET_NPOINTS(*c) = nReducedContacts;\n" +	"                    \n" +	"                 }\n" +	"                 \n" +	"                \n" +	"//#endif\n" +	"				\n" +	"			}//		if (numContactsOut>0)\n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"    \n" +	"    \n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h index 611569cacf..a60702ca62 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h @@ -1,1457 +1,1456 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satConcaveKernelsCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define B3_MAX_STACK_DEPTH 256\n" -"typedef unsigned int u32;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes\n" -"	int	m_escapeIndexOrTriangleIndex;\n" -"} b3QuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -"	float4		m_aabbMin;\n" -"	float4		m_aabbMax;\n" -"	float4		m_quantization;\n" -"	int			m_numNodes;\n" -"	int			m_numSubTrees;\n" -"	int			m_nodeOffset;\n" -"	int			m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"	\n" -"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes, points to the root of the subtree\n" -"	int			m_rootNodeIndex;\n" -"	//4 bytes\n" -"	int			m_subtreeSize;\n" -"	int			m_padding[3];\n" -"} b3BvhSubtreeInfo;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct  \n" -"{\n" -"	float4		m_localCenter;\n" -"	float4		m_extents;\n" -"	float4		mC;\n" -"	float4		mE;\n" -"	\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -"	union\n" -"	{\n" -"		float m_min[4];\n" -"		b3Float4 m_minVec;\n" -"		int m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float	m_max[4];\n" -"		b3Float4 m_maxVec;\n" -"		int m_signedMaxIndices[4];\n" -"	};\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -"						b3Float4ConstArg pos,\n" -"						b3QuatConstArg orn,\n" -"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -"		b3Mat3x3 m;\n" -"		m = b3QuatGetRotationMatrix(orn);\n" -"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -"		\n" -"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -"										 0.f);\n" -"		*aabbMinOut = center-extent;\n" -"		*aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -"	bool overlap = true;\n" -"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -"	return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"#ifndef B3_INT2_H\n" -"#define B3_INT2_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define b3UnsignedInt2 uint2\n" -"#define b3Int2 int2\n" -"#define b3MakeInt2 (int2)\n" -"#endif //__cplusplus\n" -"#endif\n" -"typedef struct\n" -"{\n" -"	float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"} btGpuFace;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"	\n" -"//	float4 a1 = make_float4(a.xyz,0.f);\n" -"//	float4 b1 = make_float4(b.xyz,0.f);\n" -"//	return cross(a1,b1);\n" -"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//return c;\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	v = make_float4(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA,const float4 ornA,\n" -"	const float4 posB,const float4 ornB,\n" -"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" -"{\n" -"	float Min0,Max0;\n" -"	float Min1,Max1;\n" -"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" -"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" -"	if(Max0<Min1 || Max1<Min0)\n" -"		return false;\n" -"	float d0 = Max0 - Min1;\n" -"	float d1 = Max1 - Min0;\n" -"	*depth = d0<d1 ? d0:d1;\n" -"	return true;\n" -"}\n" -"inline bool IsAlmostZero(const float4 v)\n" -"{\n" -"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" -"		return false;\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS*=-1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* verticesA, \n" -"	__global const float4* uniqueEdgesA, \n" -"	__global const btGpuFace* facesA,\n" -"	__global const int*  indicesA,\n" -"	const float4* verticesB,\n" -"	const float4* uniqueEdgesB, \n" -"	const btGpuFace* facesB,\n" -"	const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS *= -1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"		float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test edges\n" -"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -"	{\n" -"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" -"		float4 edge0World = qtRotate(ornA,edge0);\n" -"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -"		{\n" -"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" -"			float4 edge1World = qtRotate(ornB,edge1);\n" -"			float4 crossje = cross3(edge0World,edge1World);\n" -"			curEdgeEdge++;\n" -"			if(!IsAlmostZero(crossje))\n" -"			{\n" -"				crossje = normalize3(crossje);\n" -"				if (dot3F4(DeltaC2,crossje)<0)\n" -"					crossje *= -1.f;\n" -"				float dist;\n" -"				bool result = true;\n" -"				{\n" -"					float Min0,Max0;\n" -"					float Min1,Max1;\n" -"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" -"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" -"				\n" -"					if(Max0<Min1 || Max1<Min0)\n" -"						result = false;\n" -"				\n" -"					float d0 = Max0 - Min1;\n" -"					float d1 = Max1 - Min0;\n" -"					dist = d0<d1 ? d0:d1;\n" -"					result = true;\n" -"				}\n" -"				\n" -"				if(dist<*dmin)\n" -"				{\n" -"					*dmin = dist;\n" -"					*sep = crossje;\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"inline int	findClippingFaces(const float4 separatingNormal,\n" -"                      const ConvexPolyhedronCL* hullA, \n" -"					  __global const ConvexPolyhedronCL* hullB,\n" -"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -"                       __global float4* worldVertsA1,\n" -"                      __global float4* worldNormalsA1,\n" -"                      __global float4* worldVertsB1,\n" -"                      int capacityWorldVerts,\n" -"                      const float minDist, float maxDist,\n" -"					  const float4* verticesA,\n" -"                      const btGpuFace* facesA,\n" -"                      const int* indicesA,\n" -"					  __global const float4* verticesB,\n" -"                      __global const btGpuFace* facesB,\n" -"                      __global const int* indicesB,\n" -"                      __global int4* clippingFaces, int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"    \n" -"    \n" -"	int closestFaceB=0;\n" -"	float dmax = -FLT_MAX;\n" -"    \n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" -"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"	{\n" -"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -"		int numVertices = polyB.m_numIndices;\n" -"        if (numVertices>capacityWorldVerts)\n" -"            numVertices = capacityWorldVerts;\n" -"        if (numVertices<0)\n" -"            numVertices = 0;\n" -"        \n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"            if (e0<capacityWorldVerts)\n" -"            {\n" -"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"            }\n" -"		}\n" -"	}\n" -"    \n" -"    int closestFaceA=0;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" -"                                              0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"            \n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"                worldNormalsA1[pairIndex] = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -"    if (numVerticesA>capacityWorldVerts)\n" -"       numVerticesA = capacityWorldVerts;\n" -"    if (numVerticesA<0)\n" -"        numVerticesA=0;\n" -"    \n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"        if (e0<capacityWorldVerts)\n" -"        {\n" -"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -"        }\n" -"    }\n" -"    \n" -"    clippingFaces[pairIndex].x = closestFaceA;\n" -"    clippingFaces[pairIndex].y = closestFaceB;\n" -"    clippingFaces[pairIndex].z = numVerticesA;\n" -"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" -"    \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" -"                                                __global const BodyData* rigidBodies,\n" -"                                                __global const btCollidableGpu* collidables,\n" -"                                                __global const ConvexPolyhedronCL* convexShapes,\n" -"                                                __global const float4* vertices,\n" -"                                                __global const float4* uniqueEdges,\n" -"                                                __global const btGpuFace* faces,\n" -"                                                __global const int* indices,\n" -"                                                __global const btGpuChildShape* gpuChildShapes,\n" -"                                                __global btAabbCL* aabbs,\n" -"                                                __global float4* concaveSeparatingNormalsOut,\n" -"                                                __global int* concaveHasSeparatingNormals,\n" -"                                                __global int4* clippingFacesOut,\n" -"                                                __global float4* worldVertsA1GPU,\n" -"                                                __global float4*  worldNormalsAGPU,\n" -"                                                __global float4* worldVertsB1GPU,\n" -"                                                __global float* dmins,\n" -"                                                int vertexFaceCapacity,\n" -"                                                int numConcavePairs\n" -"                                                )\n" -"{\n" -"    \n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"    \n" -"	concaveHasSeparatingNormals[i] = 0;\n" -"    \n" -"	int pairIdx = i;\n" -"    \n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"    \n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"    \n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"    \n" -"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" -"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"	{\n" -"		concavePairs[pairIdx].w = -1;\n" -"		return;\n" -"	}\n" -"    \n" -"    \n" -"    \n" -"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	int numActualConcaveConvexTests = 0;\n" -"	\n" -"	int f = concavePairs[i].z;\n" -"	\n" -"	bool overlap = false;\n" -"	\n" -"	ConvexPolyhedronCL convexPolyhedronA;\n" -"    \n" -"	//add 3 vertices of the triangle\n" -"	convexPolyhedronA.m_numVertices = 3;\n" -"	convexPolyhedronA.m_vertexOffset = 0;\n" -"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"    \n" -"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"	float4 triMinAabb, triMaxAabb;\n" -"	btAabbCL triAabb;\n" -"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -"	\n" -"	float4 verticesA[3];\n" -"	for (int i=0;i<3;i++)\n" -"	{\n" -"		int index = indices[face.m_indexOffset+i];\n" -"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"		verticesA[i] = vert;\n" -"		localCenter += vert;\n" -"        \n" -"		triAabb.m_min = min(triAabb.m_min,vert);\n" -"		triAabb.m_max = max(triAabb.m_max,vert);\n" -"        \n" -"	}\n" -"    \n" -"	overlap = true;\n" -"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -"    \n" -"	if (overlap)\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		int hasSeparatingAxis=5;\n" -"		float4 sepAxis=make_float4(1,2,3,4);\n" -"        \n" -"		int localCC=0;\n" -"		numActualConcaveConvexTests++;\n" -"        \n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"        \n" -"        \n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"        \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"        \n" -"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"        \n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"        \n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot(edgeNormal,v0);\n" -"                \n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"        \n" -"        \n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"        \n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"        \n" -"		\n" -"        \n" -"        \n" -"		///////////////////\n" -"		///compound shape support\n" -"        \n" -"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			int compoundChild = concavePairs[pairIdx].w;\n" -"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"		}\n" -"		//////////////////\n" -"        \n" -"		float4 c0local = convexPolyhedronA.m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"        \n" -"        \n" -"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"                                             posA,ornA,\n" -"                                             posB,ornB,\n" -"                                             DeltaC2,\n" -"                                             verticesA,uniqueEdgesA,facesA,indicesA,\n" -"                                             vertices,uniqueEdges,faces,indices,\n" -"                                             &sepAxis,&dmin);\n" -"		hasSeparatingAxis = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" -"                                                 posB,ornB,\n" -"                                                 posA,ornA,\n" -"                                                 DeltaC2,\n" -"                                                 vertices,uniqueEdges,faces,indices,\n" -"                                                 verticesA,uniqueEdgesA,facesA,indicesA,\n" -"                                                 &sepAxis,&dmin);\n" -"            \n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else\n" -"			{\n" -"				hasSeparatingAxis = 1;\n" -"			}\n" -"		}	\n" -"		\n" -"		if (hasSeparatingAxis)\n" -"		{\n" -"            dmins[i] = dmin;\n" -"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -"			concaveHasSeparatingNormals[i]=1;\n" -"            \n" -"		} else\n" -"		{	\n" -"			//mark this pair as in-active\n" -"			concavePairs[pairIdx].w = -1;\n" -"		}\n" -"	}\n" -"	else\n" -"	{	\n" -"		//mark this pair as in-active\n" -"		concavePairs[pairIdx].w = -1;\n" -"	}\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" -"                                                          __global const BodyData* rigidBodies,\n" -"                                                          __global const btCollidableGpu* collidables,\n" -"                                                          __global const ConvexPolyhedronCL* convexShapes,\n" -"                                                          __global const float4* vertices,\n" -"                                                          __global const float4* uniqueEdges,\n" -"                                                          __global const btGpuFace* faces,\n" -"                                                          __global const int* indices,\n" -"                                                          __global const btGpuChildShape* gpuChildShapes,\n" -"                                                          __global btAabbCL* aabbs,\n" -"                                                          __global float4* concaveSeparatingNormalsOut,\n" -"                                                          __global int* concaveHasSeparatingNormals,\n" -"                                                          __global int4* clippingFacesOut,\n" -"                                                          __global float4* worldVertsA1GPU,\n" -"                                                          __global float4*  worldNormalsAGPU,\n" -"                                                          __global float4* worldVertsB1GPU,\n" -"                                                          __global float* dmins,\n" -"                                                          int vertexFaceCapacity,\n" -"                                                          int numConcavePairs\n" -"                                                          )\n" -"{\n" -"    \n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"    \n" -"	if (!concaveHasSeparatingNormals[i])\n" -"        return;\n" -"    \n" -"	int pairIdx = i;\n" -"    \n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"    \n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"    \n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"    \n" -"    \n" -"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	int numActualConcaveConvexTests = 0;\n" -"	\n" -"	int f = concavePairs[i].z;\n" -"	\n" -"	bool overlap = false;\n" -"	\n" -"	ConvexPolyhedronCL convexPolyhedronA;\n" -"    \n" -"	//add 3 vertices of the triangle\n" -"	convexPolyhedronA.m_numVertices = 3;\n" -"	convexPolyhedronA.m_vertexOffset = 0;\n" -"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"    \n" -"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"	float4 triMinAabb, triMaxAabb;\n" -"	btAabbCL triAabb;\n" -"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -"	\n" -"	float4 verticesA[3];\n" -"	for (int i=0;i<3;i++)\n" -"	{\n" -"		int index = indices[face.m_indexOffset+i];\n" -"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"		verticesA[i] = vert;\n" -"		localCenter += vert;\n" -"        \n" -"		triAabb.m_min = min(triAabb.m_min,vert);\n" -"		triAabb.m_max = max(triAabb.m_max,vert);\n" -"        \n" -"	}\n" -"    \n" -"	overlap = true;\n" -"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -"    \n" -"	if (overlap)\n" -"	{\n" -"		float dmin = dmins[i];\n" -"		int hasSeparatingAxis=5;\n" -"		float4 sepAxis=make_float4(1,2,3,4);\n" -"        sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" -"        \n" -"		int localCC=0;\n" -"		numActualConcaveConvexTests++;\n" -"        \n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"        \n" -"        \n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"        \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"        \n" -"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"        \n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"        \n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot(edgeNormal,v0);\n" -"                \n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"        \n" -"        \n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"        \n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"        \n" -"		\n" -"        \n" -"        \n" -"		///////////////////\n" -"		///compound shape support\n" -"        \n" -"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			int compoundChild = concavePairs[pairIdx].w;\n" -"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"		}\n" -"		//////////////////\n" -"        \n" -"		float4 c0local = convexPolyhedronA.m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"        \n" -"        \n" -"		{\n" -"			bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"                                                              posA,ornA,\n" -"                                                              posB,ornB,\n" -"                                                              DeltaC2,\n" -"                                                              verticesA,uniqueEdgesA,facesA,indicesA,\n" -"                                                              vertices,uniqueEdges,faces,indices,\n" -"                                                              &sepAxis,&dmin);\n" -"                \n" -"			if (!sepEE)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else\n" -"			{\n" -"				hasSeparatingAxis = 1;\n" -"			}\n" -"		}\n" -"		\n" -"		\n" -"		if (hasSeparatingAxis)\n" -"		{\n" -"			sepAxis.w = dmin;\n" -"            dmins[i] = dmin;\n" -"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -"			concaveHasSeparatingNormals[i]=1;\n" -"           \n" -" 	float minDist = -1e30f;\n" -"			float maxDist = 0.02f;\n" -"            \n" -"            findClippingFaces(sepAxis,\n" -"                              &convexPolyhedronA,\n" -"                              &convexShapes[shapeIndexB],\n" -"                              posA,ornA,\n" -"                              posB,ornB,\n" -"                              worldVertsA1GPU,\n" -"                              worldNormalsAGPU,\n" -"                              worldVertsB1GPU,\n" -"                              vertexFaceCapacity,\n" -"                              minDist, maxDist,\n" -"                              verticesA,\n" -"                              facesA,\n" -"                              indicesA,\n" -"                              vertices,\n" -"                              faces,\n" -"                              indices,\n" -"                              clippingFacesOut, pairIdx);\n" -"	           \n" -"            \n" -"		} else\n" -"		{	\n" -"			//mark this pair as in-active\n" -"			concavePairs[pairIdx].w = -1;\n" -"		}\n" -"	}\n" -"	else\n" -"	{	\n" -"		//mark this pair as in-active\n" -"		concavePairs[pairIdx].w = -1;\n" -"	}\n" -"	\n" -"	concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" -"}\n" -; +static const char* satConcaveKernelsCL = +	"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +	"//written by Erwin Coumans\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define B3_MAX_STACK_DEPTH 256\n" +	"typedef unsigned int u32;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"#define MAX_NUM_PARTS_IN_BITS 10\n" +	"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +	"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes\n" +	"	int	m_escapeIndexOrTriangleIndex;\n" +	"} b3QuantizedBvhNode;\n" +	"typedef struct\n" +	"{\n" +	"	float4		m_aabbMin;\n" +	"	float4		m_aabbMax;\n" +	"	float4		m_quantization;\n" +	"	int			m_numNodes;\n" +	"	int			m_numSubTrees;\n" +	"	int			m_nodeOffset;\n" +	"	int			m_subTreeOffset;\n" +	"} b3BvhInfo;\n" +	"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"	\n" +	"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes, points to the root of the subtree\n" +	"	int			m_rootNodeIndex;\n" +	"	//4 bytes\n" +	"	int			m_subtreeSize;\n" +	"	int			m_padding[3];\n" +	"} b3BvhSubtreeInfo;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct  \n" +	"{\n" +	"	float4		m_localCenter;\n" +	"	float4		m_extents;\n" +	"	float4		mC;\n" +	"	float4		mE;\n" +	"	\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"} ConvexPolyhedronCL;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"#ifndef B3_AABB_H\n" +	"#define B3_AABB_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3Aabb b3Aabb_t;\n" +	"struct b3Aabb\n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float m_min[4];\n" +	"		b3Float4 m_minVec;\n" +	"		int m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float	m_max[4];\n" +	"		b3Float4 m_maxVec;\n" +	"		int m_signedMaxIndices[4];\n" +	"	};\n" +	"};\n" +	"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +	"						b3Float4ConstArg pos,\n" +	"						b3QuatConstArg orn,\n" +	"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +	"{\n" +	"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +	"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +	"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +	"		b3Mat3x3 m;\n" +	"		m = b3QuatGetRotationMatrix(orn);\n" +	"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +	"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +	"		\n" +	"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +	"										 0.f);\n" +	"		*aabbMinOut = center-extent;\n" +	"		*aabbMaxOut = center+extent;\n" +	"}\n" +	"/// conservative test for overlap between two aabbs\n" +	"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +	"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +	"{\n" +	"	bool overlap = true;\n" +	"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +	"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +	"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +	"	return overlap;\n" +	"}\n" +	"#endif //B3_AABB_H\n" +	"/*\n" +	"Bullet Continuous Collision Detection and Physics Library\n" +	"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" +	"This software is provided 'as-is', without any express or implied warranty.\n" +	"In no event will the authors be held liable for any damages arising from the use of this software.\n" +	"Permission is granted to anyone to use this software for any purpose,\n" +	"including commercial applications, and to alter it and redistribute it freely,\n" +	"subject to the following restrictions:\n" +	"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +	"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +	"3. This notice may not be removed or altered from any source distribution.\n" +	"*/\n" +	"#ifndef B3_INT2_H\n" +	"#define B3_INT2_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#define b3UnsignedInt2 uint2\n" +	"#define b3Int2 int2\n" +	"#define b3MakeInt2 (int2)\n" +	"#endif //__cplusplus\n" +	"#endif\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"} btGpuFace;\n" +	"#define make_float4 (float4)\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"	\n" +	"//	float4 a1 = make_float4(a.xyz,0.f);\n" +	"//	float4 b1 = make_float4(b.xyz,0.f);\n" +	"//	return cross(a1,b1);\n" +	"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//return c;\n" +	"}\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	v = make_float4(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA,const float4 ornA,\n" +	"	const float4 posB,const float4 ornB,\n" +	"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +	"{\n" +	"	float Min0,Max0;\n" +	"	float Min1,Max1;\n" +	"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +	"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +	"	if(Max0<Min1 || Max1<Min0)\n" +	"		return false;\n" +	"	float d0 = Max0 - Min1;\n" +	"	float d1 = Max1 - Min0;\n" +	"	*depth = d0<d1 ? d0:d1;\n" +	"	return true;\n" +	"}\n" +	"inline bool IsAlmostZero(const float4 v)\n" +	"{\n" +	"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS*=-1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* verticesA, \n" +	"	__global const float4* uniqueEdgesA, \n" +	"	__global const btGpuFace* facesA,\n" +	"	__global const int*  indicesA,\n" +	"	const float4* verticesB,\n" +	"	const float4* uniqueEdgesB, \n" +	"	const btGpuFace* facesB,\n" +	"	const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS *= -1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"		float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test edges\n" +	"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +	"	{\n" +	"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +	"		float4 edge0World = qtRotate(ornA,edge0);\n" +	"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +	"		{\n" +	"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +	"			float4 edge1World = qtRotate(ornB,edge1);\n" +	"			float4 crossje = cross3(edge0World,edge1World);\n" +	"			curEdgeEdge++;\n" +	"			if(!IsAlmostZero(crossje))\n" +	"			{\n" +	"				crossje = normalize3(crossje);\n" +	"				if (dot3F4(DeltaC2,crossje)<0)\n" +	"					crossje *= -1.f;\n" +	"				float dist;\n" +	"				bool result = true;\n" +	"				{\n" +	"					float Min0,Max0;\n" +	"					float Min1,Max1;\n" +	"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +	"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +	"				\n" +	"					if(Max0<Min1 || Max1<Min0)\n" +	"						result = false;\n" +	"				\n" +	"					float d0 = Max0 - Min1;\n" +	"					float d1 = Max1 - Min0;\n" +	"					dist = d0<d1 ? d0:d1;\n" +	"					result = true;\n" +	"				}\n" +	"				\n" +	"				if(dist<*dmin)\n" +	"				{\n" +	"					*dmin = dist;\n" +	"					*sep = crossje;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"inline int	findClippingFaces(const float4 separatingNormal,\n" +	"                      const ConvexPolyhedronCL* hullA, \n" +	"					  __global const ConvexPolyhedronCL* hullB,\n" +	"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +	"                       __global float4* worldVertsA1,\n" +	"                      __global float4* worldNormalsA1,\n" +	"                      __global float4* worldVertsB1,\n" +	"                      int capacityWorldVerts,\n" +	"                      const float minDist, float maxDist,\n" +	"					  const float4* verticesA,\n" +	"                      const btGpuFace* facesA,\n" +	"                      const int* indicesA,\n" +	"					  __global const float4* verticesB,\n" +	"                      __global const btGpuFace* facesB,\n" +	"                      __global const int* indicesB,\n" +	"                      __global int4* clippingFaces, int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"    \n" +	"    \n" +	"	int closestFaceB=0;\n" +	"	float dmax = -FLT_MAX;\n" +	"    \n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +	"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"	{\n" +	"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +	"		int numVertices = polyB.m_numIndices;\n" +	"        if (numVertices>capacityWorldVerts)\n" +	"            numVertices = capacityWorldVerts;\n" +	"        if (numVertices<0)\n" +	"            numVertices = 0;\n" +	"        \n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"            if (e0<capacityWorldVerts)\n" +	"            {\n" +	"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +	"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"            }\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int closestFaceA=0;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" +	"                                              0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"            \n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"                worldNormalsA1[pairIndex] = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +	"    if (numVerticesA>capacityWorldVerts)\n" +	"       numVerticesA = capacityWorldVerts;\n" +	"    if (numVerticesA<0)\n" +	"        numVerticesA=0;\n" +	"    \n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"        if (e0<capacityWorldVerts)\n" +	"        {\n" +	"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +	"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +	"        }\n" +	"    }\n" +	"    \n" +	"    clippingFaces[pairIndex].x = closestFaceA;\n" +	"    clippingFaces[pairIndex].y = closestFaceB;\n" +	"    clippingFaces[pairIndex].z = numVerticesA;\n" +	"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" +	"    \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" +	"                                                __global const BodyData* rigidBodies,\n" +	"                                                __global const btCollidableGpu* collidables,\n" +	"                                                __global const ConvexPolyhedronCL* convexShapes,\n" +	"                                                __global const float4* vertices,\n" +	"                                                __global const float4* uniqueEdges,\n" +	"                                                __global const btGpuFace* faces,\n" +	"                                                __global const int* indices,\n" +	"                                                __global const btGpuChildShape* gpuChildShapes,\n" +	"                                                __global btAabbCL* aabbs,\n" +	"                                                __global float4* concaveSeparatingNormalsOut,\n" +	"                                                __global int* concaveHasSeparatingNormals,\n" +	"                                                __global int4* clippingFacesOut,\n" +	"                                                __global float4* worldVertsA1GPU,\n" +	"                                                __global float4*  worldNormalsAGPU,\n" +	"                                                __global float4* worldVertsB1GPU,\n" +	"                                                __global float* dmins,\n" +	"                                                int vertexFaceCapacity,\n" +	"                                                int numConcavePairs\n" +	"                                                )\n" +	"{\n" +	"    \n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"    \n" +	"	concaveHasSeparatingNormals[i] = 0;\n" +	"    \n" +	"	int pairIdx = i;\n" +	"    \n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"    \n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"    \n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"    \n" +	"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" +	"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"	{\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"		return;\n" +	"	}\n" +	"    \n" +	"    \n" +	"    \n" +	"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	int numActualConcaveConvexTests = 0;\n" +	"	\n" +	"	int f = concavePairs[i].z;\n" +	"	\n" +	"	bool overlap = false;\n" +	"	\n" +	"	ConvexPolyhedronCL convexPolyhedronA;\n" +	"    \n" +	"	//add 3 vertices of the triangle\n" +	"	convexPolyhedronA.m_numVertices = 3;\n" +	"	convexPolyhedronA.m_vertexOffset = 0;\n" +	"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"    \n" +	"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"	float4 triMinAabb, triMaxAabb;\n" +	"	btAabbCL triAabb;\n" +	"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +	"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +	"	\n" +	"	float4 verticesA[3];\n" +	"	for (int i=0;i<3;i++)\n" +	"	{\n" +	"		int index = indices[face.m_indexOffset+i];\n" +	"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"		verticesA[i] = vert;\n" +	"		localCenter += vert;\n" +	"        \n" +	"		triAabb.m_min = min(triAabb.m_min,vert);\n" +	"		triAabb.m_max = max(triAabb.m_max,vert);\n" +	"        \n" +	"	}\n" +	"    \n" +	"	overlap = true;\n" +	"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +	"    \n" +	"	if (overlap)\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		int hasSeparatingAxis=5;\n" +	"		float4 sepAxis=make_float4(1,2,3,4);\n" +	"        \n" +	"		int localCC=0;\n" +	"		numActualConcaveConvexTests++;\n" +	"        \n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"        \n" +	"        \n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"        \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"        \n" +	"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"        \n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"        \n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot(edgeNormal,v0);\n" +	"                \n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"        \n" +	"        \n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"        \n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"        \n" +	"		\n" +	"        \n" +	"        \n" +	"		///////////////////\n" +	"		///compound shape support\n" +	"        \n" +	"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			int compoundChild = concavePairs[pairIdx].w;\n" +	"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"		}\n" +	"		//////////////////\n" +	"        \n" +	"		float4 c0local = convexPolyhedronA.m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"        \n" +	"        \n" +	"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"                                             posA,ornA,\n" +	"                                             posB,ornB,\n" +	"                                             DeltaC2,\n" +	"                                             verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"                                             vertices,uniqueEdges,faces,indices,\n" +	"                                             &sepAxis,&dmin);\n" +	"		hasSeparatingAxis = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" +	"                                                 posB,ornB,\n" +	"                                                 posA,ornA,\n" +	"                                                 DeltaC2,\n" +	"                                                 vertices,uniqueEdges,faces,indices,\n" +	"                                                 verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"                                                 &sepAxis,&dmin);\n" +	"            \n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else\n" +	"			{\n" +	"				hasSeparatingAxis = 1;\n" +	"			}\n" +	"		}	\n" +	"		\n" +	"		if (hasSeparatingAxis)\n" +	"		{\n" +	"            dmins[i] = dmin;\n" +	"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +	"			concaveHasSeparatingNormals[i]=1;\n" +	"            \n" +	"		} else\n" +	"		{	\n" +	"			//mark this pair as in-active\n" +	"			concavePairs[pairIdx].w = -1;\n" +	"		}\n" +	"	}\n" +	"	else\n" +	"	{	\n" +	"		//mark this pair as in-active\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"	}\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" +	"                                                          __global const BodyData* rigidBodies,\n" +	"                                                          __global const btCollidableGpu* collidables,\n" +	"                                                          __global const ConvexPolyhedronCL* convexShapes,\n" +	"                                                          __global const float4* vertices,\n" +	"                                                          __global const float4* uniqueEdges,\n" +	"                                                          __global const btGpuFace* faces,\n" +	"                                                          __global const int* indices,\n" +	"                                                          __global const btGpuChildShape* gpuChildShapes,\n" +	"                                                          __global btAabbCL* aabbs,\n" +	"                                                          __global float4* concaveSeparatingNormalsOut,\n" +	"                                                          __global int* concaveHasSeparatingNormals,\n" +	"                                                          __global int4* clippingFacesOut,\n" +	"                                                          __global float4* worldVertsA1GPU,\n" +	"                                                          __global float4*  worldNormalsAGPU,\n" +	"                                                          __global float4* worldVertsB1GPU,\n" +	"                                                          __global float* dmins,\n" +	"                                                          int vertexFaceCapacity,\n" +	"                                                          int numConcavePairs\n" +	"                                                          )\n" +	"{\n" +	"    \n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"    \n" +	"	if (!concaveHasSeparatingNormals[i])\n" +	"        return;\n" +	"    \n" +	"	int pairIdx = i;\n" +	"    \n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"    \n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"    \n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"    \n" +	"    \n" +	"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	int numActualConcaveConvexTests = 0;\n" +	"	\n" +	"	int f = concavePairs[i].z;\n" +	"	\n" +	"	bool overlap = false;\n" +	"	\n" +	"	ConvexPolyhedronCL convexPolyhedronA;\n" +	"    \n" +	"	//add 3 vertices of the triangle\n" +	"	convexPolyhedronA.m_numVertices = 3;\n" +	"	convexPolyhedronA.m_vertexOffset = 0;\n" +	"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"    \n" +	"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"	float4 triMinAabb, triMaxAabb;\n" +	"	btAabbCL triAabb;\n" +	"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +	"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +	"	\n" +	"	float4 verticesA[3];\n" +	"	for (int i=0;i<3;i++)\n" +	"	{\n" +	"		int index = indices[face.m_indexOffset+i];\n" +	"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"		verticesA[i] = vert;\n" +	"		localCenter += vert;\n" +	"        \n" +	"		triAabb.m_min = min(triAabb.m_min,vert);\n" +	"		triAabb.m_max = max(triAabb.m_max,vert);\n" +	"        \n" +	"	}\n" +	"    \n" +	"	overlap = true;\n" +	"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +	"    \n" +	"	if (overlap)\n" +	"	{\n" +	"		float dmin = dmins[i];\n" +	"		int hasSeparatingAxis=5;\n" +	"		float4 sepAxis=make_float4(1,2,3,4);\n" +	"        sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" +	"        \n" +	"		int localCC=0;\n" +	"		numActualConcaveConvexTests++;\n" +	"        \n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"        \n" +	"        \n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"        \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"        \n" +	"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"        \n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"        \n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot(edgeNormal,v0);\n" +	"                \n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"        \n" +	"        \n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"        \n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"        \n" +	"		\n" +	"        \n" +	"        \n" +	"		///////////////////\n" +	"		///compound shape support\n" +	"        \n" +	"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			int compoundChild = concavePairs[pairIdx].w;\n" +	"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"		}\n" +	"		//////////////////\n" +	"        \n" +	"		float4 c0local = convexPolyhedronA.m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"        \n" +	"        \n" +	"		{\n" +	"			bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"                                                              posA,ornA,\n" +	"                                                              posB,ornB,\n" +	"                                                              DeltaC2,\n" +	"                                                              verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"                                                              vertices,uniqueEdges,faces,indices,\n" +	"                                                              &sepAxis,&dmin);\n" +	"                \n" +	"			if (!sepEE)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else\n" +	"			{\n" +	"				hasSeparatingAxis = 1;\n" +	"			}\n" +	"		}\n" +	"		\n" +	"		\n" +	"		if (hasSeparatingAxis)\n" +	"		{\n" +	"			sepAxis.w = dmin;\n" +	"            dmins[i] = dmin;\n" +	"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +	"			concaveHasSeparatingNormals[i]=1;\n" +	"           \n" +	" 	float minDist = -1e30f;\n" +	"			float maxDist = 0.02f;\n" +	"            \n" +	"            findClippingFaces(sepAxis,\n" +	"                              &convexPolyhedronA,\n" +	"                              &convexShapes[shapeIndexB],\n" +	"                              posA,ornA,\n" +	"                              posB,ornB,\n" +	"                              worldVertsA1GPU,\n" +	"                              worldNormalsAGPU,\n" +	"                              worldVertsB1GPU,\n" +	"                              vertexFaceCapacity,\n" +	"                              minDist, maxDist,\n" +	"                              verticesA,\n" +	"                              facesA,\n" +	"                              indicesA,\n" +	"                              vertices,\n" +	"                              faces,\n" +	"                              indices,\n" +	"                              clippingFacesOut, pairIdx);\n" +	"	           \n" +	"            \n" +	"		} else\n" +	"		{	\n" +	"			//mark this pair as in-active\n" +	"			concavePairs[pairIdx].w = -1;\n" +	"		}\n" +	"	}\n" +	"	else\n" +	"	{	\n" +	"		//mark this pair as in-active\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"	}\n" +	"	\n" +	"	concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h index 6f8b0a90db..e627af2799 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -1,2104 +1,2103 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satKernelsCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define B3_MAX_STACK_DEPTH 256\n" -"typedef unsigned int u32;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes\n" -"	int	m_escapeIndexOrTriangleIndex;\n" -"} b3QuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -"	float4		m_aabbMin;\n" -"	float4		m_aabbMax;\n" -"	float4		m_quantization;\n" -"	int			m_numNodes;\n" -"	int			m_numSubTrees;\n" -"	int			m_nodeOffset;\n" -"	int			m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"	\n" -"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes, points to the root of the subtree\n" -"	int			m_rootNodeIndex;\n" -"	//4 bytes\n" -"	int			m_subtreeSize;\n" -"	int			m_padding[3];\n" -"} b3BvhSubtreeInfo;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct  \n" -"{\n" -"	float4		m_localCenter;\n" -"	float4		m_extents;\n" -"	float4		mC;\n" -"	float4		mE;\n" -"	\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -"	union\n" -"	{\n" -"		float m_min[4];\n" -"		b3Float4 m_minVec;\n" -"		int m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float	m_max[4];\n" -"		b3Float4 m_maxVec;\n" -"		int m_signedMaxIndices[4];\n" -"	};\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -"						b3Float4ConstArg pos,\n" -"						b3QuatConstArg orn,\n" -"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -"		b3Mat3x3 m;\n" -"		m = b3QuatGetRotationMatrix(orn);\n" -"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -"		\n" -"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -"										 0.f);\n" -"		*aabbMinOut = center-extent;\n" -"		*aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -"	bool overlap = true;\n" -"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -"	return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"#ifndef B3_INT2_H\n" -"#define B3_INT2_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define b3UnsignedInt2 uint2\n" -"#define b3Int2 int2\n" -"#define b3MakeInt2 (int2)\n" -"#endif //__cplusplus\n" -"#endif\n" -"typedef struct\n" -"{\n" -"	float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"} btGpuFace;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"	\n" -"//	float4 a1 = make_float4(a.xyz,0.f);\n" -"//	float4 b1 = make_float4(b.xyz,0.f);\n" -"//	return cross(a1,b1);\n" -"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//return c;\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	v = make_float4(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA,const float4 ornA,\n" -"	const float4 posB,const float4 ornB,\n" -"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" -"{\n" -"	float Min0,Max0;\n" -"	float Min1,Max1;\n" -"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" -"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" -"	if(Max0<Min1 || Max1<Min0)\n" -"		return false;\n" -"	float d0 = Max0 - Min1;\n" -"	float d1 = Max1 - Min0;\n" -"	*depth = d0<d1 ? d0:d1;\n" -"	return true;\n" -"}\n" -"inline bool IsAlmostZero(const float4 v)\n" -"{\n" -"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" -"		return false;\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS*=-1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* verticesA, \n" -"	__global const float4* uniqueEdgesA, \n" -"	__global const btGpuFace* facesA,\n" -"	__global const int*  indicesA,\n" -"	const float4* verticesB,\n" -"	const float4* uniqueEdgesB, \n" -"	const btGpuFace* facesB,\n" -"	const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS *= -1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"		float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test edges\n" -"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -"	{\n" -"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" -"		float4 edge0World = qtRotate(ornA,edge0);\n" -"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -"		{\n" -"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" -"			float4 edge1World = qtRotate(ornB,edge1);\n" -"			float4 crossje = cross3(edge0World,edge1World);\n" -"			curEdgeEdge++;\n" -"			if(!IsAlmostZero(crossje))\n" -"			{\n" -"				crossje = normalize3(crossje);\n" -"				if (dot3F4(DeltaC2,crossje)<0)\n" -"					crossje *= -1.f;\n" -"				float dist;\n" -"				bool result = true;\n" -"				{\n" -"					float Min0,Max0;\n" -"					float Min1,Max1;\n" -"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" -"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" -"				\n" -"					if(Max0<Min1 || Max1<Min0)\n" -"						result = false;\n" -"				\n" -"					float d0 = Max0 - Min1;\n" -"					float d1 = Max1 - Min0;\n" -"					dist = d0<d1 ? d0:d1;\n" -"					result = true;\n" -"				}\n" -"				\n" -"				if(dist<*dmin)\n" -"				{\n" -"					*dmin = dist;\n" -"					*sep = crossje;\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA,const float4 ornA,\n" -"	const float4 posB,const float4 ornB,\n" -"	float4* sep_axis, __global const float4* vertices,float* depth)\n" -"{\n" -"	float Min0,Max0;\n" -"	float Min1,Max1;\n" -"	project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" -"	project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" -"	if(Max0<Min1 || Max1<Min0)\n" -"		return false;\n" -"	float d0 = Max0 - Min1;\n" -"	float d1 = Max1 - Min0;\n" -"	*depth = d0<d1 ? d0:d1;\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxis(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices, \n" -"	__global const float4* uniqueEdges, \n" -"	__global const btGpuFace* faces,\n" -"	__global const int*  indices,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"	\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS*=-1.f;\n" -"				\n" -"			curPlaneTests++;\n" -"	\n" -"			float d;\n" -"			if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" -"				return false;\n" -"	\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"		if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"		{\n" -"			*sep = -(*sep);\n" -"		}\n" -"	\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisUnitSphere(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices,\n" -"	__global const float4* unitSphereDirections,\n" -"	int numUnitSphereDirections,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test unit sphere directions\n" -"	for (int i=0;i<numUnitSphereDirections;i++)\n" -"	{\n" -"		float4 crossje;\n" -"		crossje = unitSphereDirections[i];	\n" -"		if (dot3F4(DeltaC2,crossje)>0)\n" -"			crossje *= -1.f;\n" -"		{\n" -"			float dist;\n" -"			bool result = true;\n" -"			float Min0,Max0;\n" -"			float Min1,Max1;\n" -"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -"		\n" -"			if(Max0<Min1 || Max1<Min0)\n" -"				return false;\n" -"		\n" -"			float d0 = Max0 - Min1;\n" -"			float d1 = Max1 - Min0;\n" -"			dist = d0<d1 ? d0:d1;\n" -"			result = true;\n" -"	\n" -"			if(dist<*dmin)\n" -"			{\n" -"				*dmin = dist;\n" -"				*sep = crossje;\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdge(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices, \n" -"	__global const float4* uniqueEdges, \n" -"	__global const btGpuFace* faces,\n" -"	__global const int*  indices,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test edges\n" -"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -"	{\n" -"		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" -"		float4 edge0World = qtRotate(ornA,edge0);\n" -"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -"		{\n" -"			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" -"			float4 edge1World = qtRotate(ornB,edge1);\n" -"			float4 crossje = cross3(edge0World,edge1World);\n" -"			curEdgeEdge++;\n" -"			if(!IsAlmostZero(crossje))\n" -"			{\n" -"				crossje = normalize3(crossje);\n" -"				if (dot3F4(DeltaC2,crossje)<0)\n" -"					crossje*=-1.f;\n" -"					\n" -"				float dist;\n" -"				bool result = true;\n" -"				{\n" -"					float Min0,Max0;\n" -"					float Min1,Max1;\n" -"					project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -"					project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -"				\n" -"					if(Max0<Min1 || Max1<Min0)\n" -"						return false;\n" -"				\n" -"					float d0 = Max0 - Min1;\n" -"					float d1 = Max1 - Min0;\n" -"					dist = d0<d1 ? d0:d1;\n" -"					result = true;\n" -"				}\n" -"				\n" -"				if(dist<*dmin)\n" -"				{\n" -"					*dmin = dist;\n" -"					*sep = crossje;\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global const btGpuChildShape* gpuChildShapes,\n" -"																					__global volatile float4* gpuCompoundSepNormalsOut,\n" -"																					__global volatile int* gpuHasCompoundSepNormalsOut,\n" -"																					int numCompoundPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i<numCompoundPairs)\n" -"	{\n" -"		int bodyIndexA = gpuCompoundPairs[i].x;\n" -"		int bodyIndexB = gpuCompoundPairs[i].y;\n" -"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" -"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" -"		\n" -"		int collidableIndexA = -1;\n" -"		int collidableIndexB = -1;\n" -"		\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		\n" -"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							\n" -"		if (childShapeIndexA >= 0)\n" -"		{\n" -"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"			float4 newOrnA = qtMul(ornA,childOrnA);\n" -"			posA = newPosA;\n" -"			ornA = newOrnA;\n" -"		} else\n" -"		{\n" -"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		}\n" -"		\n" -"		if (childShapeIndexB>=0)\n" -"		{\n" -"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"		} else\n" -"		{\n" -"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" -"		}\n" -"	\n" -"		gpuHasCompoundSepNormalsOut[i] = 0;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	\n" -"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" -"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"	\n" -"		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" -"		{\n" -"			return;\n" -"		}\n" -"		int hasSeparatingAxis = 5;\n" -"							\n" -"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"		float dmin = FLT_MAX;\n" -"		posA.w = 0.f;\n" -"		posB.w = 0.f;\n" -"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		float4 sepNormal = make_float4(1,0,0,0);\n" -"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -"		hasSeparatingAxis = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else//(!sepB)\n" -"			{\n" -"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -"				if (sepEE)\n" -"				{\n" -"						gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" -"						gpuHasCompoundSepNormalsOut[i] = 1;\n" -"				}//sepEE\n" -"			}//(!sepB)\n" -"		}//(!sepA)\n" -"		\n" -"		\n" -"	}\n" -"		\n" -"}\n" -"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" -"{\n" -"		b3Float4 vecOut;\n" -"		vecOut = b3MakeFloat4(\n" -"			(float)(vecIn[0]) / (quantization.x),\n" -"			(float)(vecIn[1]) / (quantization.y),\n" -"			(float)(vecIn[2]) / (quantization.z),\n" -"			0.f);\n" -"		vecOut += bvhAabbMin;\n" -"		return vecOut;\n" -"}\n" -"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" -"{\n" -"		b3Float4 vecOut;\n" -"		vecOut = b3MakeFloat4(\n" -"			(float)(vecIn[0]) / (quantization.x),\n" -"			(float)(vecIn[1]) / (quantization.y),\n" -"			(float)(vecIn[2]) / (quantization.z),\n" -"			0.f);\n" -"		vecOut += bvhAabbMin;\n" -"		return vecOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findCompoundPairsKernel( __global const int4* pairs, \n" -"	__global const BodyData* rigidBodies, \n" -"	__global const btCollidableGpu* collidables,\n" -"	__global const ConvexPolyhedronCL* convexShapes, \n" -"	__global const float4* vertices,\n" -"	__global const float4* uniqueEdges,\n" -"	__global const btGpuFace* faces,\n" -"	__global const int* indices,\n" -"	__global b3Aabb_t* aabbLocalSpace,\n" -"	__global const btGpuChildShape* gpuChildShapes,\n" -"	__global volatile int4* gpuCompoundPairsOut,\n" -"	__global volatile int* numCompoundPairsOut,\n" -"	__global const b3BvhSubtreeInfo* subtrees,\n" -"	__global const b3QuantizedBvhNode* quantizedNodes,\n" -"	__global const b3BvhInfo* bvhInfos,\n" -"	int numPairs,\n" -"	int maxNumCompoundPairsCapacity\n" -"	)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			return;\n" -"		}\n" -"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"		{\n" -"			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" -"			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" -"			int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" -"			int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" -"			int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" -"			int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" -"			\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			\n" -"			for (int p=0;p<numSubTreesA;p++)\n" -"			{\n" -"				b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" -"				//bvhInfos[bvhA].m_quantization\n" -"				b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"				b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"				b3Float4 aabbAMinOut,aabbAMaxOut;\n" -"				float margin=0.f;\n" -"				b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" -"				\n" -"				for (int q=0;q<numSubTreesB;q++)\n" -"				{\n" -"					b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" -"					b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"					b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"					b3Float4 aabbBMinOut,aabbBMaxOut;\n" -"					float margin=0.f;\n" -"					b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" -"					\n" -"					\n" -"					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" -"					if (aabbOverlap)\n" -"					{\n" -"						\n" -"						int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" -"						int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" -"						int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" -"						int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" -"						b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" -"						b3Int2 node0;\n" -"						node0.x = startNodeIndexA;\n" -"						node0.y = startNodeIndexB;\n" -"						int maxStackDepth = B3_MAX_STACK_DEPTH;\n" -"						int depth=0;\n" -"						nodeStack[depth++]=node0;\n" -"						do\n" -"						{\n" -"							b3Int2 node = nodeStack[--depth];\n" -"							b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"							b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"							b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"							b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"							float margin=0.f;\n" -"							b3Float4 aabbAMinOut,aabbAMaxOut;\n" -"							b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" -"							b3Float4 aabbBMinOut,aabbBMaxOut;\n" -"							b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" -"							\n" -"							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" -"							if (nodeOverlap)\n" -"							{\n" -"								bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" -"								bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" -"								bool isInternalA = !isLeafA;\n" -"								bool isInternalB = !isLeafB;\n" -"								//fail, even though it might hit two leaf nodes\n" -"								if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" -"								{\n" -"									//printf(\"Error: traversal exceeded maxStackDepth\");\n" -"									continue;\n" -"								}\n" -"								if(isInternalA)\n" -"								{\n" -"									int nodeAleftChild = node.x+1;\n" -"									bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" -"									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" -"									if(isInternalB)\n" -"									{					\n" -"										int nodeBleftChild = node.y+1;\n" -"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" -"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" -"									}\n" -"									else\n" -"									{\n" -"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" -"									}\n" -"								}\n" -"								else\n" -"								{\n" -"									if(isInternalB)\n" -"									{\n" -"										int nodeBleftChild = node.y+1;\n" -"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" -"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" -"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" -"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" -"									}\n" -"									else\n" -"									{\n" -"										int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"										if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"										{\n" -"											int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" -"											int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" -"											gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" -"										}\n" -"									}\n" -"								}\n" -"							}\n" -"						} while (depth);\n" -"					}\n" -"				}\n" -"			}\n" -"			\n" -"			return;\n" -"		}\n" -"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"		{\n" -"			if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" -"			{\n" -"				int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" -"				for (int c=0;c<numChildrenA;c++)\n" -"				{\n" -"					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" -"					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"					float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"					float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"					float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"					float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"					float4 newOrnA = qtMul(ornA,childOrnA);\n" -"					int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" -"					b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" -"					float margin = 0.f;\n" -"					\n" -"					b3Float4 aabbAMinWS;\n" -"					b3Float4 aabbAMaxWS;\n" -"					\n" -"					b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" -"						newPosA,\n" -"						newOrnA,\n" -"						&aabbAMinWS,&aabbAMaxWS);\n" -"						\n" -"					\n" -"					if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"					{\n" -"						int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -"						for (int b=0;b<numChildrenB;b++)\n" -"						{\n" -"							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -"							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"							float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"							float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"							float4 newOrnB = qtMul(ornB,childOrnB);\n" -"							int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"							b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" -"							\n" -"							b3Float4 aabbBMinWS;\n" -"							b3Float4 aabbBMaxWS;\n" -"							\n" -"							b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" -"								newPosB,\n" -"								newOrnB,\n" -"								&aabbBMinWS,&aabbBMaxWS);\n" -"								\n" -"								\n" -"							\n" -"							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" -"							if (aabbOverlap)\n" -"							{\n" -"								int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"								float dmin = FLT_MAX;\n" -"								float4 posA = newPosA;\n" -"								posA.w = 0.f;\n" -"								float4 posB = newPosB;\n" -"								posB.w = 0.f;\n" -"								float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"								float4 ornA = newOrnA;\n" -"								float4 c0 = transform(&c0local, &posA, &ornA);\n" -"								float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"								float4 ornB =newOrnB;\n" -"								float4 c1 = transform(&c1local,&posB,&ornB);\n" -"								const float4 DeltaC2 = c0 - c1;\n" -"								{//\n" -"									int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"									if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"									{\n" -"										gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" -"									}\n" -"								}//\n" -"							}//fi(1)\n" -"						} //for (int b=0\n" -"					}//if (collidables[collidableIndexB].\n" -"					else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"					{\n" -"						if (1)\n" -"						{\n" -"							int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"							float dmin = FLT_MAX;\n" -"							float4 posA = newPosA;\n" -"							posA.w = 0.f;\n" -"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							posB.w = 0.f;\n" -"							float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"							float4 ornA = newOrnA;\n" -"							float4 c0 = transform(&c0local, &posA, &ornA);\n" -"							float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"							float4 c1 = transform(&c1local,&posB,&ornB);\n" -"							const float4 DeltaC2 = c0 - c1;\n" -"							{\n" -"								int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"								if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"								{\n" -"									gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" -"								}//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"							}//\n" -"						}//fi (1)\n" -"					}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"				}//for (int b=0;b<numChildrenB;b++)	\n" -"				return;\n" -"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"			if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" -"				&& (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"			{\n" -"				int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -"				for (int b=0;b<numChildrenB;b++)\n" -"				{\n" -"					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -"					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"					float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"					float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"					float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"					float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" -"					float4 newOrnB = qtMul(ornB,childOrnB);\n" -"					int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"					//////////////////////////////////////\n" -"					if (1)\n" -"					{\n" -"						int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"						float dmin = FLT_MAX;\n" -"						float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"						posA.w = 0.f;\n" -"						float4 posB = newPosB;\n" -"						posB.w = 0.f;\n" -"						float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"						float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"						float4 c0 = transform(&c0local, &posA, &ornA);\n" -"						float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"						float4 ornB =newOrnB;\n" -"						float4 c1 = transform(&c1local,&posB,&ornB);\n" -"						const float4 DeltaC2 = c0 - c1;\n" -"						{//\n" -"							int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"							if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"							{\n" -"								gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" -"							}//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"						}//\n" -"					}//fi (1)	\n" -"				}//for (int b=0;b<numChildrenB;b++)\n" -"				return;\n" -"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"			return;\n" -"		}//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"	}//i<numPairs\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findSeparatingAxisKernel( __global const int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global volatile float4* separatingNormals,\n" -"																					__global volatile int* hasSeparatingAxis,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"	\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"		\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"			return;\n" -"		}\n" -"		\n" -"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"			return;\n" -"		}\n" -"			\n" -"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"			return;\n" -"		}\n" -"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"		float dmin = FLT_MAX;\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		float4 sepNormal;\n" -"		\n" -"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																								posB,ornB,\n" -"																								DeltaC2,\n" -"																								vertices,uniqueEdges,faces,\n" -"																								indices,&sepNormal,&dmin);\n" -"		hasSeparatingAxis[i] = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" -"																									posA,ornA,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis[i] = 0;\n" -"			} else\n" -"			{\n" -"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																									posB,ornB,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"				if (!sepEE)\n" -"				{\n" -"					hasSeparatingAxis[i] = 0;\n" -"				} else\n" -"				{\n" -"					hasSeparatingAxis[i] = 1;\n" -"					separatingNormals[i] = sepNormal;\n" -"				}\n" -"			}\n" -"		}\n" -"		\n" -"	}\n" -"}\n" -"__kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global volatile float4* separatingNormals,\n" -"																					__global volatile int* hasSeparatingAxis,\n" -"																					__global  float* dmins,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"	\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	\n" -"		hasSeparatingAxis[i] = 0;	\n" -"		\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			return;\n" -"		}\n" -"		\n" -"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -"		{\n" -"			return;\n" -"		}\n" -"			\n" -"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"		float dmin = FLT_MAX;\n" -"		dmins[i] = dmin;\n" -"		\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		float4 sepNormal;\n" -"		\n" -"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																								posB,ornB,\n" -"																								DeltaC2,\n" -"																								vertices,uniqueEdges,faces,\n" -"																								indices,&sepNormal,&dmin);\n" -"		hasSeparatingAxis[i] = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" -"																									posA,ornA,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"			if (sepB)\n" -"			{\n" -"				dmins[i] = dmin;\n" -"				hasSeparatingAxis[i] = 1;\n" -"				separatingNormals[i] = sepNormal;\n" -"			}\n" -"		}\n" -"		\n" -"	}\n" -"}\n" -"__kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global  float4* separatingNormals,\n" -"																					__global  int* hasSeparatingAxis,\n" -"																					__global  float* dmins,\n" -"																					__global const float4* unitSphereDirections,\n" -"																					int numUnitSphereDirections,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"	\n" -"			int bodyIndexA = pairs[i].x;\n" -"			int bodyIndexB = pairs[i].y;\n" -"	\n" -"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"			\n" -"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	\n" -"			float dmin = dmins[i];\n" -"	\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			posA.w = 0.f;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			posB.w = 0.f;\n" -"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			float4 c0 = transform(&c0local, &posA, &ornA);\n" -"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"			float4 c1 = transform(&c1local,&posB,&ornB);\n" -"			const float4 DeltaC2 = c0 - c1;\n" -"			float4 sepNormal = separatingNormals[i];\n" -"			\n" -"			\n" -"			\n" -"			bool sepEE = false;\n" -"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" -"			if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" -"			{\n" -"				sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																									posB,ornB,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"																									\n" -"					if (!sepEE)\n" -"					{\n" -"						hasSeparatingAxis[i] = 0;\n" -"					} else\n" -"					{\n" -"						hasSeparatingAxis[i] = 1;\n" -"						separatingNormals[i] = sepNormal;\n" -"					}\n" -"			}\n" -"			/*\n" -"			///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" -"			else\n" -"			{\n" -"				sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																									posB,ornB,\n" -"																									DeltaC2,\n" -"																									vertices,unitSphereDirections,numUnitSphereDirections,\n" -"																									&sepNormal,&dmin);\n" -"					if (!sepEE)\n" -"					{\n" -"						hasSeparatingAxis[i] = 0;\n" -"					} else\n" -"					{\n" -"						hasSeparatingAxis[i] = 1;\n" -"						separatingNormals[i] = sepNormal;\n" -"					}\n" -"			}\n" -"			*/\n" -"		}		//if (hasSeparatingAxis[i])\n" -"	}//(i<numPairs)\n" -"}\n" -"inline int	findClippingFaces(const float4 separatingNormal,\n" -"                      const ConvexPolyhedronCL* hullA, \n" -"					  __global const ConvexPolyhedronCL* hullB,\n" -"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -"                       __global float4* worldVertsA1,\n" -"                      __global float4* worldNormalsA1,\n" -"                      __global float4* worldVertsB1,\n" -"                      int capacityWorldVerts,\n" -"                      const float minDist, float maxDist,\n" -"					  const float4* verticesA,\n" -"                      const btGpuFace* facesA,\n" -"                      const int* indicesA,\n" -"					  __global const float4* verticesB,\n" -"                      __global const btGpuFace* facesB,\n" -"                      __global const int* indicesB,\n" -"                      __global int4* clippingFaces, int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"    \n" -"    \n" -"	int closestFaceB=0;\n" -"	float dmax = -FLT_MAX;\n" -"    \n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" -"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"	{\n" -"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -"		int numVertices = polyB.m_numIndices;\n" -"        if (numVertices>capacityWorldVerts)\n" -"            numVertices = capacityWorldVerts;\n" -"        \n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"            if (e0<capacityWorldVerts)\n" -"            {\n" -"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"            }\n" -"		}\n" -"	}\n" -"    \n" -"    int closestFaceA=0;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" -"                                              0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"            \n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"                worldNormalsA1[pairIndex] = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -"    if (numVerticesA>capacityWorldVerts)\n" -"       numVerticesA = capacityWorldVerts;\n" -"    \n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"        if (e0<capacityWorldVerts)\n" -"        {\n" -"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -"        }\n" -"    }\n" -"    \n" -"    clippingFaces[pairIndex].x = closestFaceA;\n" -"    clippingFaces[pairIndex].y = closestFaceB;\n" -"    clippingFaces[pairIndex].z = numVerticesA;\n" -"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" -"    \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" -"																					__global const BodyData* rigidBodies,\n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global const btGpuChildShape* gpuChildShapes,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global float4* concaveSeparatingNormalsOut,\n" -"																					__global int* concaveHasSeparatingNormals,\n" -"																					__global int4* clippingFacesOut,\n" -"																					__global float4* worldVertsA1GPU,\n" -"																					__global float4*  worldNormalsAGPU,\n" -"																					__global float4* worldVertsB1GPU,\n" -"																					int vertexFaceCapacity,\n" -"																					int numConcavePairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"	concaveHasSeparatingNormals[i] = 0;\n" -"	int pairIdx = i;\n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" -"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"	{\n" -"		concavePairs[pairIdx].w = -1;\n" -"		return;\n" -"	}\n" -"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	int numActualConcaveConvexTests = 0;\n" -"	\n" -"	int f = concavePairs[i].z;\n" -"	\n" -"	bool overlap = false;\n" -"	\n" -"	ConvexPolyhedronCL convexPolyhedronA;\n" -"	//add 3 vertices of the triangle\n" -"	convexPolyhedronA.m_numVertices = 3;\n" -"	convexPolyhedronA.m_vertexOffset = 0;\n" -"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"	float4 triMinAabb, triMaxAabb;\n" -"	btAabbCL triAabb;\n" -"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -"	\n" -"	float4 verticesA[3];\n" -"	for (int i=0;i<3;i++)\n" -"	{\n" -"		int index = indices[face.m_indexOffset+i];\n" -"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"		verticesA[i] = vert;\n" -"		localCenter += vert;\n" -"			\n" -"		triAabb.m_min = min(triAabb.m_min,vert);		\n" -"		triAabb.m_max = max(triAabb.m_max,vert);		\n" -"	}\n" -"	overlap = true;\n" -"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -"		\n" -"	if (overlap)\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		int hasSeparatingAxis=5;\n" -"		float4 sepAxis=make_float4(1,2,3,4);\n" -"		int localCC=0;\n" -"		numActualConcaveConvexTests++;\n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"                                  \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"                             \n" -"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                                            \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot(edgeNormal,v0);\n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                                            \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		\n" -"		///////////////////\n" -"		///compound shape support\n" -"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			int compoundChild = concavePairs[pairIdx].w;\n" -"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"		}\n" -"		//////////////////\n" -"		float4 c0local = convexPolyhedronA.m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"												posA,ornA,\n" -"												posB,ornB,\n" -"												DeltaC2,\n" -"												verticesA,uniqueEdgesA,facesA,indicesA,\n" -"												vertices,uniqueEdges,faces,indices,\n" -"												&sepAxis,&dmin);\n" -"		hasSeparatingAxis = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" -"												posB,ornB,\n" -"												posA,ornA,\n" -"												DeltaC2,\n" -"												vertices,uniqueEdges,faces,indices,\n" -"												verticesA,uniqueEdgesA,facesA,indicesA,\n" -"												&sepAxis,&dmin);\n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else\n" -"			{\n" -"				bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"															posA,ornA,\n" -"															posB,ornB,\n" -"															DeltaC2,\n" -"															verticesA,uniqueEdgesA,facesA,indicesA,\n" -"															vertices,uniqueEdges,faces,indices,\n" -"															&sepAxis,&dmin);\n" -"	\n" -"				if (!sepEE)\n" -"				{\n" -"					hasSeparatingAxis = 0;\n" -"				} else\n" -"				{\n" -"					hasSeparatingAxis = 1;\n" -"				}\n" -"			}\n" -"		}	\n" -"		\n" -"		if (hasSeparatingAxis)\n" -"		{\n" -"			sepAxis.w = dmin;\n" -"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -"			concaveHasSeparatingNormals[i]=1;\n" -"			float minDist = -1e30f;\n" -"			float maxDist = 0.02f;\n" -"		\n" -"			findClippingFaces(sepAxis,\n" -"                     &convexPolyhedronA,\n" -"					 &convexShapes[shapeIndexB],\n" -"					 posA,ornA,\n" -"					 posB,ornB,\n" -"                      worldVertsA1GPU,\n" -"                      worldNormalsAGPU,\n" -"                      worldVertsB1GPU,\n" -"					  vertexFaceCapacity,\n" -"                      minDist, maxDist,\n" -"                      verticesA,\n" -"                      facesA,\n" -"                      indicesA,\n" -" 					  vertices,\n" -"                      faces,\n" -"                      indices,\n" -"                      clippingFacesOut, pairIdx);\n" -"		} else\n" -"		{	\n" -"			//mark this pair as in-active\n" -"			concavePairs[pairIdx].w = -1;\n" -"		}\n" -"	}\n" -"	else\n" -"	{	\n" -"		//mark this pair as in-active\n" -"		concavePairs[pairIdx].w = -1;\n" -"	}\n" -"	\n" -"	concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" -"}\n" -; +static const char* satKernelsCL = +	"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +	"//written by Erwin Coumans\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define B3_MAX_STACK_DEPTH 256\n" +	"typedef unsigned int u32;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"#define MAX_NUM_PARTS_IN_BITS 10\n" +	"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +	"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes\n" +	"	int	m_escapeIndexOrTriangleIndex;\n" +	"} b3QuantizedBvhNode;\n" +	"typedef struct\n" +	"{\n" +	"	float4		m_aabbMin;\n" +	"	float4		m_aabbMax;\n" +	"	float4		m_quantization;\n" +	"	int			m_numNodes;\n" +	"	int			m_numSubTrees;\n" +	"	int			m_nodeOffset;\n" +	"	int			m_subTreeOffset;\n" +	"} b3BvhInfo;\n" +	"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"	\n" +	"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes, points to the root of the subtree\n" +	"	int			m_rootNodeIndex;\n" +	"	//4 bytes\n" +	"	int			m_subtreeSize;\n" +	"	int			m_padding[3];\n" +	"} b3BvhSubtreeInfo;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct  \n" +	"{\n" +	"	float4		m_localCenter;\n" +	"	float4		m_extents;\n" +	"	float4		mC;\n" +	"	float4		mE;\n" +	"	\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"} ConvexPolyhedronCL;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"#ifndef B3_AABB_H\n" +	"#define B3_AABB_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3Aabb b3Aabb_t;\n" +	"struct b3Aabb\n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float m_min[4];\n" +	"		b3Float4 m_minVec;\n" +	"		int m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float	m_max[4];\n" +	"		b3Float4 m_maxVec;\n" +	"		int m_signedMaxIndices[4];\n" +	"	};\n" +	"};\n" +	"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +	"						b3Float4ConstArg pos,\n" +	"						b3QuatConstArg orn,\n" +	"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +	"{\n" +	"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +	"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +	"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +	"		b3Mat3x3 m;\n" +	"		m = b3QuatGetRotationMatrix(orn);\n" +	"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +	"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +	"		\n" +	"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +	"										 0.f);\n" +	"		*aabbMinOut = center-extent;\n" +	"		*aabbMaxOut = center+extent;\n" +	"}\n" +	"/// conservative test for overlap between two aabbs\n" +	"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +	"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +	"{\n" +	"	bool overlap = true;\n" +	"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +	"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +	"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +	"	return overlap;\n" +	"}\n" +	"#endif //B3_AABB_H\n" +	"/*\n" +	"Bullet Continuous Collision Detection and Physics Library\n" +	"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" +	"This software is provided 'as-is', without any express or implied warranty.\n" +	"In no event will the authors be held liable for any damages arising from the use of this software.\n" +	"Permission is granted to anyone to use this software for any purpose,\n" +	"including commercial applications, and to alter it and redistribute it freely,\n" +	"subject to the following restrictions:\n" +	"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +	"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +	"3. This notice may not be removed or altered from any source distribution.\n" +	"*/\n" +	"#ifndef B3_INT2_H\n" +	"#define B3_INT2_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#define b3UnsignedInt2 uint2\n" +	"#define b3Int2 int2\n" +	"#define b3MakeInt2 (int2)\n" +	"#endif //__cplusplus\n" +	"#endif\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"} btGpuFace;\n" +	"#define make_float4 (float4)\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"	\n" +	"//	float4 a1 = make_float4(a.xyz,0.f);\n" +	"//	float4 b1 = make_float4(b.xyz,0.f);\n" +	"//	return cross(a1,b1);\n" +	"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//return c;\n" +	"}\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	v = make_float4(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA,const float4 ornA,\n" +	"	const float4 posB,const float4 ornB,\n" +	"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +	"{\n" +	"	float Min0,Max0;\n" +	"	float Min1,Max1;\n" +	"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +	"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +	"	if(Max0<Min1 || Max1<Min0)\n" +	"		return false;\n" +	"	float d0 = Max0 - Min1;\n" +	"	float d1 = Max1 - Min0;\n" +	"	*depth = d0<d1 ? d0:d1;\n" +	"	return true;\n" +	"}\n" +	"inline bool IsAlmostZero(const float4 v)\n" +	"{\n" +	"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS*=-1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* verticesA, \n" +	"	__global const float4* uniqueEdgesA, \n" +	"	__global const btGpuFace* facesA,\n" +	"	__global const int*  indicesA,\n" +	"	const float4* verticesB,\n" +	"	const float4* uniqueEdgesB, \n" +	"	const btGpuFace* facesB,\n" +	"	const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS *= -1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"		float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test edges\n" +	"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +	"	{\n" +	"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +	"		float4 edge0World = qtRotate(ornA,edge0);\n" +	"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +	"		{\n" +	"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +	"			float4 edge1World = qtRotate(ornB,edge1);\n" +	"			float4 crossje = cross3(edge0World,edge1World);\n" +	"			curEdgeEdge++;\n" +	"			if(!IsAlmostZero(crossje))\n" +	"			{\n" +	"				crossje = normalize3(crossje);\n" +	"				if (dot3F4(DeltaC2,crossje)<0)\n" +	"					crossje *= -1.f;\n" +	"				float dist;\n" +	"				bool result = true;\n" +	"				{\n" +	"					float Min0,Max0;\n" +	"					float Min1,Max1;\n" +	"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +	"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +	"				\n" +	"					if(Max0<Min1 || Max1<Min0)\n" +	"						result = false;\n" +	"				\n" +	"					float d0 = Max0 - Min1;\n" +	"					float d1 = Max1 - Min0;\n" +	"					dist = d0<d1 ? d0:d1;\n" +	"					result = true;\n" +	"				}\n" +	"				\n" +	"				if(dist<*dmin)\n" +	"				{\n" +	"					*dmin = dist;\n" +	"					*sep = crossje;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA,const float4 ornA,\n" +	"	const float4 posB,const float4 ornB,\n" +	"	float4* sep_axis, __global const float4* vertices,float* depth)\n" +	"{\n" +	"	float Min0,Max0;\n" +	"	float Min1,Max1;\n" +	"	project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" +	"	project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" +	"	if(Max0<Min1 || Max1<Min0)\n" +	"		return false;\n" +	"	float d0 = Max0 - Min1;\n" +	"	float d1 = Max1 - Min0;\n" +	"	*depth = d0<d1 ? d0:d1;\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxis(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices, \n" +	"	__global const float4* uniqueEdges, \n" +	"	__global const btGpuFace* faces,\n" +	"	__global const int*  indices,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"	\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS*=-1.f;\n" +	"				\n" +	"			curPlaneTests++;\n" +	"	\n" +	"			float d;\n" +	"			if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" +	"				return false;\n" +	"	\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"		if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"		{\n" +	"			*sep = -(*sep);\n" +	"		}\n" +	"	\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisUnitSphere(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices,\n" +	"	__global const float4* unitSphereDirections,\n" +	"	int numUnitSphereDirections,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test unit sphere directions\n" +	"	for (int i=0;i<numUnitSphereDirections;i++)\n" +	"	{\n" +	"		float4 crossje;\n" +	"		crossje = unitSphereDirections[i];	\n" +	"		if (dot3F4(DeltaC2,crossje)>0)\n" +	"			crossje *= -1.f;\n" +	"		{\n" +	"			float dist;\n" +	"			bool result = true;\n" +	"			float Min0,Max0;\n" +	"			float Min1,Max1;\n" +	"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +	"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +	"		\n" +	"			if(Max0<Min1 || Max1<Min0)\n" +	"				return false;\n" +	"		\n" +	"			float d0 = Max0 - Min1;\n" +	"			float d1 = Max1 - Min0;\n" +	"			dist = d0<d1 ? d0:d1;\n" +	"			result = true;\n" +	"	\n" +	"			if(dist<*dmin)\n" +	"			{\n" +	"				*dmin = dist;\n" +	"				*sep = crossje;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisEdgeEdge(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices, \n" +	"	__global const float4* uniqueEdges, \n" +	"	__global const btGpuFace* faces,\n" +	"	__global const int*  indices,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test edges\n" +	"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +	"	{\n" +	"		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" +	"		float4 edge0World = qtRotate(ornA,edge0);\n" +	"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +	"		{\n" +	"			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" +	"			float4 edge1World = qtRotate(ornB,edge1);\n" +	"			float4 crossje = cross3(edge0World,edge1World);\n" +	"			curEdgeEdge++;\n" +	"			if(!IsAlmostZero(crossje))\n" +	"			{\n" +	"				crossje = normalize3(crossje);\n" +	"				if (dot3F4(DeltaC2,crossje)<0)\n" +	"					crossje*=-1.f;\n" +	"					\n" +	"				float dist;\n" +	"				bool result = true;\n" +	"				{\n" +	"					float Min0,Max0;\n" +	"					float Min1,Max1;\n" +	"					project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +	"					project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +	"				\n" +	"					if(Max0<Min1 || Max1<Min0)\n" +	"						return false;\n" +	"				\n" +	"					float d0 = Max0 - Min1;\n" +	"					float d1 = Max1 - Min0;\n" +	"					dist = d0<d1 ? d0:d1;\n" +	"					result = true;\n" +	"				}\n" +	"				\n" +	"				if(dist<*dmin)\n" +	"				{\n" +	"					*dmin = dist;\n" +	"					*sep = crossje;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global const btGpuChildShape* gpuChildShapes,\n" +	"																					__global volatile float4* gpuCompoundSepNormalsOut,\n" +	"																					__global volatile int* gpuHasCompoundSepNormalsOut,\n" +	"																					int numCompoundPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i<numCompoundPairs)\n" +	"	{\n" +	"		int bodyIndexA = gpuCompoundPairs[i].x;\n" +	"		int bodyIndexB = gpuCompoundPairs[i].y;\n" +	"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" +	"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" +	"		\n" +	"		int collidableIndexA = -1;\n" +	"		int collidableIndexB = -1;\n" +	"		\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		\n" +	"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							\n" +	"		if (childShapeIndexA >= 0)\n" +	"		{\n" +	"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"			float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"			posA = newPosA;\n" +	"			ornA = newOrnA;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		}\n" +	"		\n" +	"		if (childShapeIndexB>=0)\n" +	"		{\n" +	"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" +	"		}\n" +	"	\n" +	"		gpuHasCompoundSepNormalsOut[i] = 0;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	\n" +	"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" +	"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"	\n" +	"		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		int hasSeparatingAxis = 5;\n" +	"							\n" +	"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"		float dmin = FLT_MAX;\n" +	"		posA.w = 0.f;\n" +	"		posB.w = 0.f;\n" +	"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		float4 sepNormal = make_float4(1,0,0,0);\n" +	"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +	"		hasSeparatingAxis = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else//(!sepB)\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +	"				if (sepEE)\n" +	"				{\n" +	"						gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" +	"						gpuHasCompoundSepNormalsOut[i] = 1;\n" +	"				}//sepEE\n" +	"			}//(!sepB)\n" +	"		}//(!sepA)\n" +	"		\n" +	"		\n" +	"	}\n" +	"		\n" +	"}\n" +	"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +	"{\n" +	"		b3Float4 vecOut;\n" +	"		vecOut = b3MakeFloat4(\n" +	"			(float)(vecIn[0]) / (quantization.x),\n" +	"			(float)(vecIn[1]) / (quantization.y),\n" +	"			(float)(vecIn[2]) / (quantization.z),\n" +	"			0.f);\n" +	"		vecOut += bvhAabbMin;\n" +	"		return vecOut;\n" +	"}\n" +	"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +	"{\n" +	"		b3Float4 vecOut;\n" +	"		vecOut = b3MakeFloat4(\n" +	"			(float)(vecIn[0]) / (quantization.x),\n" +	"			(float)(vecIn[1]) / (quantization.y),\n" +	"			(float)(vecIn[2]) / (quantization.z),\n" +	"			0.f);\n" +	"		vecOut += bvhAabbMin;\n" +	"		return vecOut;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findCompoundPairsKernel( __global const int4* pairs, \n" +	"	__global const BodyData* rigidBodies, \n" +	"	__global const btCollidableGpu* collidables,\n" +	"	__global const ConvexPolyhedronCL* convexShapes, \n" +	"	__global const float4* vertices,\n" +	"	__global const float4* uniqueEdges,\n" +	"	__global const btGpuFace* faces,\n" +	"	__global const int* indices,\n" +	"	__global b3Aabb_t* aabbLocalSpace,\n" +	"	__global const btGpuChildShape* gpuChildShapes,\n" +	"	__global volatile int4* gpuCompoundPairsOut,\n" +	"	__global volatile int* numCompoundPairsOut,\n" +	"	__global const b3BvhSubtreeInfo* subtrees,\n" +	"	__global const b3QuantizedBvhNode* quantizedNodes,\n" +	"	__global const b3BvhInfo* bvhInfos,\n" +	"	int numPairs,\n" +	"	int maxNumCompoundPairsCapacity\n" +	"	)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"		{\n" +	"			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" +	"			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" +	"			int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" +	"			int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" +	"			int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" +	"			int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" +	"			\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			\n" +	"			for (int p=0;p<numSubTreesA;p++)\n" +	"			{\n" +	"				b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" +	"				//bvhInfos[bvhA].m_quantization\n" +	"				b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"				b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"				b3Float4 aabbAMinOut,aabbAMaxOut;\n" +	"				float margin=0.f;\n" +	"				b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" +	"				\n" +	"				for (int q=0;q<numSubTreesB;q++)\n" +	"				{\n" +	"					b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" +	"					b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"					b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"					b3Float4 aabbBMinOut,aabbBMaxOut;\n" +	"					float margin=0.f;\n" +	"					b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" +	"					\n" +	"					\n" +	"					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" +	"					if (aabbOverlap)\n" +	"					{\n" +	"						\n" +	"						int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" +	"						int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" +	"						int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" +	"						int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" +	"						b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" +	"						b3Int2 node0;\n" +	"						node0.x = startNodeIndexA;\n" +	"						node0.y = startNodeIndexB;\n" +	"						int maxStackDepth = B3_MAX_STACK_DEPTH;\n" +	"						int depth=0;\n" +	"						nodeStack[depth++]=node0;\n" +	"						do\n" +	"						{\n" +	"							b3Int2 node = nodeStack[--depth];\n" +	"							b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"							b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"							b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"							b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"							float margin=0.f;\n" +	"							b3Float4 aabbAMinOut,aabbAMaxOut;\n" +	"							b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" +	"							b3Float4 aabbBMinOut,aabbBMaxOut;\n" +	"							b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" +	"							\n" +	"							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" +	"							if (nodeOverlap)\n" +	"							{\n" +	"								bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" +	"								bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" +	"								bool isInternalA = !isLeafA;\n" +	"								bool isInternalB = !isLeafB;\n" +	"								//fail, even though it might hit two leaf nodes\n" +	"								if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" +	"								{\n" +	"									//printf(\"Error: traversal exceeded maxStackDepth\");\n" +	"									continue;\n" +	"								}\n" +	"								if(isInternalA)\n" +	"								{\n" +	"									int nodeAleftChild = node.x+1;\n" +	"									bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" +	"									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" +	"									if(isInternalB)\n" +	"									{					\n" +	"										int nodeBleftChild = node.y+1;\n" +	"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +	"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" +	"									}\n" +	"									else\n" +	"									{\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" +	"									}\n" +	"								}\n" +	"								else\n" +	"								{\n" +	"									if(isInternalB)\n" +	"									{\n" +	"										int nodeBleftChild = node.y+1;\n" +	"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +	"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +	"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" +	"									}\n" +	"									else\n" +	"									{\n" +	"										int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"										if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"										{\n" +	"											int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" +	"											int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" +	"											gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" +	"										}\n" +	"									}\n" +	"								}\n" +	"							}\n" +	"						} while (depth);\n" +	"					}\n" +	"				}\n" +	"			}\n" +	"			\n" +	"			return;\n" +	"		}\n" +	"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"		{\n" +	"			if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" +	"			{\n" +	"				int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" +	"				for (int c=0;c<numChildrenA;c++)\n" +	"				{\n" +	"					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" +	"					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"					float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"					float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"					float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"					float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"					float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"					int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" +	"					b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" +	"					float margin = 0.f;\n" +	"					\n" +	"					b3Float4 aabbAMinWS;\n" +	"					b3Float4 aabbAMaxWS;\n" +	"					\n" +	"					b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" +	"						newPosA,\n" +	"						newOrnA,\n" +	"						&aabbAMinWS,&aabbAMaxWS);\n" +	"						\n" +	"					\n" +	"					if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"					{\n" +	"						int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +	"						for (int b=0;b<numChildrenB;b++)\n" +	"						{\n" +	"							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +	"							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"							float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"							float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"							float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"							int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"							b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" +	"							\n" +	"							b3Float4 aabbBMinWS;\n" +	"							b3Float4 aabbBMaxWS;\n" +	"							\n" +	"							b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" +	"								newPosB,\n" +	"								newOrnB,\n" +	"								&aabbBMinWS,&aabbBMaxWS);\n" +	"								\n" +	"								\n" +	"							\n" +	"							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" +	"							if (aabbOverlap)\n" +	"							{\n" +	"								int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"								float dmin = FLT_MAX;\n" +	"								float4 posA = newPosA;\n" +	"								posA.w = 0.f;\n" +	"								float4 posB = newPosB;\n" +	"								posB.w = 0.f;\n" +	"								float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"								float4 ornA = newOrnA;\n" +	"								float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"								float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"								float4 ornB =newOrnB;\n" +	"								float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"								const float4 DeltaC2 = c0 - c1;\n" +	"								{//\n" +	"									int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"									if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"									{\n" +	"										gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" +	"									}\n" +	"								}//\n" +	"							}//fi(1)\n" +	"						} //for (int b=0\n" +	"					}//if (collidables[collidableIndexB].\n" +	"					else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"					{\n" +	"						if (1)\n" +	"						{\n" +	"							int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"							float dmin = FLT_MAX;\n" +	"							float4 posA = newPosA;\n" +	"							posA.w = 0.f;\n" +	"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							posB.w = 0.f;\n" +	"							float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"							float4 ornA = newOrnA;\n" +	"							float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"							float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"							float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"							const float4 DeltaC2 = c0 - c1;\n" +	"							{\n" +	"								int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"								if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"								{\n" +	"									gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" +	"								}//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"							}//\n" +	"						}//fi (1)\n" +	"					}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"				}//for (int b=0;b<numChildrenB;b++)	\n" +	"				return;\n" +	"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"			if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" +	"				&& (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"			{\n" +	"				int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +	"				for (int b=0;b<numChildrenB;b++)\n" +	"				{\n" +	"					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +	"					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"					float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"					float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"					float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"					float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" +	"					float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"					int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"					//////////////////////////////////////\n" +	"					if (1)\n" +	"					{\n" +	"						int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"						float dmin = FLT_MAX;\n" +	"						float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"						posA.w = 0.f;\n" +	"						float4 posB = newPosB;\n" +	"						posB.w = 0.f;\n" +	"						float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"						float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"						float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"						float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"						float4 ornB =newOrnB;\n" +	"						float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"						const float4 DeltaC2 = c0 - c1;\n" +	"						{//\n" +	"							int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"							if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"							{\n" +	"								gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" +	"							}//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"						}//\n" +	"					}//fi (1)	\n" +	"				}//for (int b=0;b<numChildrenB;b++)\n" +	"				return;\n" +	"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"			return;\n" +	"		}//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"	}//i<numPairs\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findSeparatingAxisKernel( __global const int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global volatile float4* separatingNormals,\n" +	"																					__global volatile int* hasSeparatingAxis,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"	\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"		\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"			return;\n" +	"		}\n" +	"			\n" +	"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"			return;\n" +	"		}\n" +	"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"		float dmin = FLT_MAX;\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		float4 sepNormal;\n" +	"		\n" +	"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																								posB,ornB,\n" +	"																								DeltaC2,\n" +	"																								vertices,uniqueEdges,faces,\n" +	"																								indices,&sepNormal,&dmin);\n" +	"		hasSeparatingAxis[i] = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" +	"																									posA,ornA,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis[i] = 0;\n" +	"			} else\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																									posB,ornB,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"				if (!sepEE)\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 0;\n" +	"				} else\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 1;\n" +	"					separatingNormals[i] = sepNormal;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"		\n" +	"	}\n" +	"}\n" +	"__kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global volatile float4* separatingNormals,\n" +	"																					__global volatile int* hasSeparatingAxis,\n" +	"																					__global  float* dmins,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"	\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	\n" +	"		hasSeparatingAxis[i] = 0;	\n" +	"		\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"			\n" +	"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"		float dmin = FLT_MAX;\n" +	"		dmins[i] = dmin;\n" +	"		\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		float4 sepNormal;\n" +	"		\n" +	"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																								posB,ornB,\n" +	"																								DeltaC2,\n" +	"																								vertices,uniqueEdges,faces,\n" +	"																								indices,&sepNormal,&dmin);\n" +	"		hasSeparatingAxis[i] = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" +	"																									posA,ornA,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"			if (sepB)\n" +	"			{\n" +	"				dmins[i] = dmin;\n" +	"				hasSeparatingAxis[i] = 1;\n" +	"				separatingNormals[i] = sepNormal;\n" +	"			}\n" +	"		}\n" +	"		\n" +	"	}\n" +	"}\n" +	"__kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global  float4* separatingNormals,\n" +	"																					__global  int* hasSeparatingAxis,\n" +	"																					__global  float* dmins,\n" +	"																					__global const float4* unitSphereDirections,\n" +	"																					int numUnitSphereDirections,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"	\n" +	"			int bodyIndexA = pairs[i].x;\n" +	"			int bodyIndexB = pairs[i].y;\n" +	"	\n" +	"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"			\n" +	"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	\n" +	"			float dmin = dmins[i];\n" +	"	\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			posA.w = 0.f;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			posB.w = 0.f;\n" +	"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"			const float4 DeltaC2 = c0 - c1;\n" +	"			float4 sepNormal = separatingNormals[i];\n" +	"			\n" +	"			\n" +	"			\n" +	"			bool sepEE = false;\n" +	"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" +	"			if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" +	"			{\n" +	"				sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																									posB,ornB,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"																									\n" +	"					if (!sepEE)\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 0;\n" +	"					} else\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 1;\n" +	"						separatingNormals[i] = sepNormal;\n" +	"					}\n" +	"			}\n" +	"			/*\n" +	"			///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" +	"			else\n" +	"			{\n" +	"				sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																									posB,ornB,\n" +	"																									DeltaC2,\n" +	"																									vertices,unitSphereDirections,numUnitSphereDirections,\n" +	"																									&sepNormal,&dmin);\n" +	"					if (!sepEE)\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 0;\n" +	"					} else\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 1;\n" +	"						separatingNormals[i] = sepNormal;\n" +	"					}\n" +	"			}\n" +	"			*/\n" +	"		}		//if (hasSeparatingAxis[i])\n" +	"	}//(i<numPairs)\n" +	"}\n" +	"inline int	findClippingFaces(const float4 separatingNormal,\n" +	"                      const ConvexPolyhedronCL* hullA, \n" +	"					  __global const ConvexPolyhedronCL* hullB,\n" +	"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +	"                       __global float4* worldVertsA1,\n" +	"                      __global float4* worldNormalsA1,\n" +	"                      __global float4* worldVertsB1,\n" +	"                      int capacityWorldVerts,\n" +	"                      const float minDist, float maxDist,\n" +	"					  const float4* verticesA,\n" +	"                      const btGpuFace* facesA,\n" +	"                      const int* indicesA,\n" +	"					  __global const float4* verticesB,\n" +	"                      __global const btGpuFace* facesB,\n" +	"                      __global const int* indicesB,\n" +	"                      __global int4* clippingFaces, int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"    \n" +	"    \n" +	"	int closestFaceB=0;\n" +	"	float dmax = -FLT_MAX;\n" +	"    \n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +	"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"	{\n" +	"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +	"		int numVertices = polyB.m_numIndices;\n" +	"        if (numVertices>capacityWorldVerts)\n" +	"            numVertices = capacityWorldVerts;\n" +	"        \n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"            if (e0<capacityWorldVerts)\n" +	"            {\n" +	"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +	"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"            }\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int closestFaceA=0;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" +	"                                              0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"            \n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"                worldNormalsA1[pairIndex] = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +	"    if (numVerticesA>capacityWorldVerts)\n" +	"       numVerticesA = capacityWorldVerts;\n" +	"    \n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"        if (e0<capacityWorldVerts)\n" +	"        {\n" +	"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +	"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +	"        }\n" +	"    }\n" +	"    \n" +	"    clippingFaces[pairIndex].x = closestFaceA;\n" +	"    clippingFaces[pairIndex].y = closestFaceB;\n" +	"    clippingFaces[pairIndex].z = numVerticesA;\n" +	"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" +	"    \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" +	"																					__global const BodyData* rigidBodies,\n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const btGpuChildShape* gpuChildShapes,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global float4* concaveSeparatingNormalsOut,\n" +	"																					__global int* concaveHasSeparatingNormals,\n" +	"																					__global int4* clippingFacesOut,\n" +	"																					__global float4* worldVertsA1GPU,\n" +	"																					__global float4*  worldNormalsAGPU,\n" +	"																					__global float4* worldVertsB1GPU,\n" +	"																					int vertexFaceCapacity,\n" +	"																					int numConcavePairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"	concaveHasSeparatingNormals[i] = 0;\n" +	"	int pairIdx = i;\n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" +	"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"	{\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"		return;\n" +	"	}\n" +	"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	int numActualConcaveConvexTests = 0;\n" +	"	\n" +	"	int f = concavePairs[i].z;\n" +	"	\n" +	"	bool overlap = false;\n" +	"	\n" +	"	ConvexPolyhedronCL convexPolyhedronA;\n" +	"	//add 3 vertices of the triangle\n" +	"	convexPolyhedronA.m_numVertices = 3;\n" +	"	convexPolyhedronA.m_vertexOffset = 0;\n" +	"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"	float4 triMinAabb, triMaxAabb;\n" +	"	btAabbCL triAabb;\n" +	"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +	"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +	"	\n" +	"	float4 verticesA[3];\n" +	"	for (int i=0;i<3;i++)\n" +	"	{\n" +	"		int index = indices[face.m_indexOffset+i];\n" +	"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"		verticesA[i] = vert;\n" +	"		localCenter += vert;\n" +	"			\n" +	"		triAabb.m_min = min(triAabb.m_min,vert);		\n" +	"		triAabb.m_max = max(triAabb.m_max,vert);		\n" +	"	}\n" +	"	overlap = true;\n" +	"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +	"		\n" +	"	if (overlap)\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		int hasSeparatingAxis=5;\n" +	"		float4 sepAxis=make_float4(1,2,3,4);\n" +	"		int localCC=0;\n" +	"		numActualConcaveConvexTests++;\n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"                                  \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"                             \n" +	"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                                            \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot(edgeNormal,v0);\n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                                            \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		\n" +	"		///////////////////\n" +	"		///compound shape support\n" +	"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			int compoundChild = concavePairs[pairIdx].w;\n" +	"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"		}\n" +	"		//////////////////\n" +	"		float4 c0local = convexPolyhedronA.m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"												posA,ornA,\n" +	"												posB,ornB,\n" +	"												DeltaC2,\n" +	"												verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"												vertices,uniqueEdges,faces,indices,\n" +	"												&sepAxis,&dmin);\n" +	"		hasSeparatingAxis = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" +	"												posB,ornB,\n" +	"												posA,ornA,\n" +	"												DeltaC2,\n" +	"												vertices,uniqueEdges,faces,indices,\n" +	"												verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"												&sepAxis,&dmin);\n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"															posA,ornA,\n" +	"															posB,ornB,\n" +	"															DeltaC2,\n" +	"															verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"															vertices,uniqueEdges,faces,indices,\n" +	"															&sepAxis,&dmin);\n" +	"	\n" +	"				if (!sepEE)\n" +	"				{\n" +	"					hasSeparatingAxis = 0;\n" +	"				} else\n" +	"				{\n" +	"					hasSeparatingAxis = 1;\n" +	"				}\n" +	"			}\n" +	"		}	\n" +	"		\n" +	"		if (hasSeparatingAxis)\n" +	"		{\n" +	"			sepAxis.w = dmin;\n" +	"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +	"			concaveHasSeparatingNormals[i]=1;\n" +	"			float minDist = -1e30f;\n" +	"			float maxDist = 0.02f;\n" +	"		\n" +	"			findClippingFaces(sepAxis,\n" +	"                     &convexPolyhedronA,\n" +	"					 &convexShapes[shapeIndexB],\n" +	"					 posA,ornA,\n" +	"					 posB,ornB,\n" +	"                      worldVertsA1GPU,\n" +	"                      worldNormalsAGPU,\n" +	"                      worldVertsB1GPU,\n" +	"					  vertexFaceCapacity,\n" +	"                      minDist, maxDist,\n" +	"                      verticesA,\n" +	"                      facesA,\n" +	"                      indicesA,\n" +	" 					  vertices,\n" +	"                      faces,\n" +	"                      indices,\n" +	"                      clippingFacesOut, pairIdx);\n" +	"		} else\n" +	"		{	\n" +	"			//mark this pair as in-active\n" +	"			concavePairs[pairIdx].w = -1;\n" +	"		}\n" +	"	}\n" +	"	else\n" +	"	{	\n" +	"		//mark this pair as in-active\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"	}\n" +	"	\n" +	"	concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" +	"}\n";  |