diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels')
6 files changed, 8641 insertions, 8647 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h index 4b3b49eae8..f1df8a6970 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h @@ -1,258 +1,257 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* bvhTraversalKernelCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"typedef unsigned int u32;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes\n" -"	int	m_escapeIndexOrTriangleIndex;\n" -"} btQuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -"	float4		m_aabbMin;\n" -"	float4		m_aabbMax;\n" -"	float4		m_quantization;\n" -"	int			m_numNodes;\n" -"	int			m_numSubTrees;\n" -"	int			m_nodeOffset;\n" -"	int			m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int	getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeaf(const btQuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"	\n" -"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes, points to the root of the subtree\n" -"	int			m_rootNodeIndex;\n" -"	//4 bytes\n" -"	int			m_subtreeSize;\n" -"	int			m_padding[3];\n" -"} btBvhSubtreeInfo;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	int m_numChildShapes;\n" -"	int blaat2;\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"int testQuantizedAabbAgainstQuantizedAabb(\n" -"								const unsigned short int* aabbMin1,\n" -"								const unsigned short int* aabbMax1,\n" -"								const unsigned short int* aabbMin2,\n" -"								const unsigned short int* aabbMax2)\n" -"{\n" -"	//int overlap = 1;\n" -"	if (aabbMin1[0] > aabbMax2[0])\n" -"		return 0;\n" -"	if (aabbMax1[0] < aabbMin2[0])\n" -"		return 0;\n" -"	if (aabbMin1[1] > aabbMax2[1])\n" -"		return 0;\n" -"	if (aabbMax1[1] < aabbMin2[1])\n" -"		return 0;\n" -"	if (aabbMin1[2] > aabbMax2[2])\n" -"		return 0;\n" -"	if (aabbMax1[2] < aabbMin2[2])\n" -"		return 0;\n" -"	return 1;\n" -"	//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" -"	//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" -"	//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" -"	//return overlap;\n" -"}\n" -"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" -"{\n" -"	float4 clampedPoint = max(point2,bvhAabbMin);\n" -"	clampedPoint = min (clampedPoint, bvhAabbMax);\n" -"	float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" -"	if (isMax)\n" -"	{\n" -"		out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" -"		out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" -"		out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" -"	} else\n" -"	{\n" -"		out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" -"		out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" -"		out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" -"	}\n" -"}\n" -"// work-in-progress\n" -"__kernel void   bvhTraversalKernel( __global const int4* pairs, \n" -"									__global const BodyData* rigidBodies, \n" -"									__global const btCollidableGpu* collidables,\n" -"									__global btAabbCL* aabbs,\n" -"									__global int4* concavePairsOut,\n" -"									__global volatile int* numConcavePairsOut,\n" -"									__global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" -"									__global const btQuantizedBvhNode* quantizedNodesRoot,\n" -"									__global const b3BvhInfo* bvhInfos,\n" -"									int numPairs,\n" -"									int maxNumConcavePairsCapacity)\n" -"{\n" -"	int id = get_global_id(0);\n" -"	if (id>=numPairs)\n" -"		return;\n" -"	\n" -"	int bodyIndexA = pairs[id].x;\n" -"	int bodyIndexB = pairs[id].y;\n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"	//once the broadphase avoids static-static pairs, we can remove this test\n" -"	if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"	{\n" -"		return;\n" -"	}\n" -"		\n" -"	if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" -"		return;\n" -"	int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"		\n" -"	if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" -"		shapeTypeB!=SHAPE_SPHERE	&&\n" -"		shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" -"		)\n" -"		return;\n" -"	b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" -"	float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" -"	float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" -"	float4 bvhQuantization = bvhInfo.m_quantization;\n" -"	int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" -"	__global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" -"	__global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" -"	\n" -"	unsigned short int quantizedQueryAabbMin[3];\n" -"	unsigned short int quantizedQueryAabbMax[3];\n" -"	quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" -"	quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" -"	\n" -"	for (int i=0;i<numSubtreeHeaders;i++)\n" -"	{\n" -"		btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" -"				\n" -"		int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" -"		if (overlap != 0)\n" -"		{\n" -"			int startNodeIndex = subtree.m_rootNodeIndex;\n" -"			int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" -"			int curIndex = startNodeIndex;\n" -"			int escapeIndex;\n" -"			int isLeafNode;\n" -"			int aabbOverlap;\n" -"			while (curIndex < endNodeIndex)\n" -"			{\n" -"				btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" -"				aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" -"				isLeafNode = isLeaf(&rootNode);\n" -"				if (aabbOverlap)\n" -"				{\n" -"					if (isLeafNode)\n" -"					{\n" -"						int triangleIndex = getTriangleIndex(&rootNode);\n" -"						if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"						{\n" -"								int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -"								int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" -"								for (int b=0;b<numChildrenB;b++)\n" -"								{\n" -"									if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" -"									{\n" -"										int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -"										int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" -"										concavePairsOut[pairIdx+b] = newPair;\n" -"									}\n" -"								}\n" -"						} else\n" -"						{\n" -"							int pairIdx = atomic_inc(numConcavePairsOut);\n" -"							if (pairIdx<maxNumConcavePairsCapacity)\n" -"							{\n" -"								int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" -"								concavePairsOut[pairIdx] = newPair;\n" -"							}\n" -"						}\n" -"					} \n" -"					curIndex++;\n" -"				} else\n" -"				{\n" -"					if (isLeafNode)\n" -"					{\n" -"						curIndex++;\n" -"					} else\n" -"					{\n" -"						escapeIndex = getEscapeIndex(&rootNode);\n" -"						curIndex += escapeIndex;\n" -"					}\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"}\n" -; +static const char* bvhTraversalKernelCL = +	"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +	"//written by Erwin Coumans\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define SHAPE_SPHERE 7\n" +	"typedef unsigned int u32;\n" +	"#define MAX_NUM_PARTS_IN_BITS 10\n" +	"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +	"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes\n" +	"	int	m_escapeIndexOrTriangleIndex;\n" +	"} btQuantizedBvhNode;\n" +	"typedef struct\n" +	"{\n" +	"	float4		m_aabbMin;\n" +	"	float4		m_aabbMax;\n" +	"	float4		m_quantization;\n" +	"	int			m_numNodes;\n" +	"	int			m_numSubTrees;\n" +	"	int			m_nodeOffset;\n" +	"	int			m_subTreeOffset;\n" +	"} b3BvhInfo;\n" +	"int	getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int isLeaf(const btQuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"	\n" +	"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes, points to the root of the subtree\n" +	"	int			m_rootNodeIndex;\n" +	"	//4 bytes\n" +	"	int			m_subtreeSize;\n" +	"	int			m_padding[3];\n" +	"} btBvhSubtreeInfo;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	int m_numChildShapes;\n" +	"	int blaat2;\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"int testQuantizedAabbAgainstQuantizedAabb(\n" +	"								const unsigned short int* aabbMin1,\n" +	"								const unsigned short int* aabbMax1,\n" +	"								const unsigned short int* aabbMin2,\n" +	"								const unsigned short int* aabbMax2)\n" +	"{\n" +	"	//int overlap = 1;\n" +	"	if (aabbMin1[0] > aabbMax2[0])\n" +	"		return 0;\n" +	"	if (aabbMax1[0] < aabbMin2[0])\n" +	"		return 0;\n" +	"	if (aabbMin1[1] > aabbMax2[1])\n" +	"		return 0;\n" +	"	if (aabbMax1[1] < aabbMin2[1])\n" +	"		return 0;\n" +	"	if (aabbMin1[2] > aabbMax2[2])\n" +	"		return 0;\n" +	"	if (aabbMax1[2] < aabbMin2[2])\n" +	"		return 0;\n" +	"	return 1;\n" +	"	//overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" +	"	//overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" +	"	//overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" +	"	//return overlap;\n" +	"}\n" +	"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" +	"{\n" +	"	float4 clampedPoint = max(point2,bvhAabbMin);\n" +	"	clampedPoint = min (clampedPoint, bvhAabbMax);\n" +	"	float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" +	"	if (isMax)\n" +	"	{\n" +	"		out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" +	"		out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" +	"		out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" +	"	} else\n" +	"	{\n" +	"		out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" +	"		out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" +	"		out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" +	"	}\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   bvhTraversalKernel( __global const int4* pairs, \n" +	"									__global const BodyData* rigidBodies, \n" +	"									__global const btCollidableGpu* collidables,\n" +	"									__global btAabbCL* aabbs,\n" +	"									__global int4* concavePairsOut,\n" +	"									__global volatile int* numConcavePairsOut,\n" +	"									__global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" +	"									__global const btQuantizedBvhNode* quantizedNodesRoot,\n" +	"									__global const b3BvhInfo* bvhInfos,\n" +	"									int numPairs,\n" +	"									int maxNumConcavePairsCapacity)\n" +	"{\n" +	"	int id = get_global_id(0);\n" +	"	if (id>=numPairs)\n" +	"		return;\n" +	"	\n" +	"	int bodyIndexA = pairs[id].x;\n" +	"	int bodyIndexB = pairs[id].y;\n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"	//once the broadphase avoids static-static pairs, we can remove this test\n" +	"	if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"	{\n" +	"		return;\n" +	"	}\n" +	"		\n" +	"	if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" +	"		return;\n" +	"	int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"		\n" +	"	if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" +	"		shapeTypeB!=SHAPE_SPHERE	&&\n" +	"		shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" +	"		)\n" +	"		return;\n" +	"	b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" +	"	float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" +	"	float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" +	"	float4 bvhQuantization = bvhInfo.m_quantization;\n" +	"	int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" +	"	__global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" +	"	__global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" +	"	\n" +	"	unsigned short int quantizedQueryAabbMin[3];\n" +	"	unsigned short int quantizedQueryAabbMax[3];\n" +	"	quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" +	"	quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" +	"	\n" +	"	for (int i=0;i<numSubtreeHeaders;i++)\n" +	"	{\n" +	"		btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" +	"				\n" +	"		int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" +	"		if (overlap != 0)\n" +	"		{\n" +	"			int startNodeIndex = subtree.m_rootNodeIndex;\n" +	"			int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" +	"			int curIndex = startNodeIndex;\n" +	"			int escapeIndex;\n" +	"			int isLeafNode;\n" +	"			int aabbOverlap;\n" +	"			while (curIndex < endNodeIndex)\n" +	"			{\n" +	"				btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" +	"				aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" +	"				isLeafNode = isLeaf(&rootNode);\n" +	"				if (aabbOverlap)\n" +	"				{\n" +	"					if (isLeafNode)\n" +	"					{\n" +	"						int triangleIndex = getTriangleIndex(&rootNode);\n" +	"						if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"						{\n" +	"								int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +	"								int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" +	"								for (int b=0;b<numChildrenB;b++)\n" +	"								{\n" +	"									if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" +	"									{\n" +	"										int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +	"										int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" +	"										concavePairsOut[pairIdx+b] = newPair;\n" +	"									}\n" +	"								}\n" +	"						} else\n" +	"						{\n" +	"							int pairIdx = atomic_inc(numConcavePairsOut);\n" +	"							if (pairIdx<maxNumConcavePairsCapacity)\n" +	"							{\n" +	"								int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" +	"								concavePairsOut[pairIdx] = newPair;\n" +	"							}\n" +	"						}\n" +	"					} \n" +	"					curIndex++;\n" +	"				} else\n" +	"				{\n" +	"					if (isLeafNode)\n" +	"					{\n" +	"						curIndex++;\n" +	"					} else\n" +	"					{\n" +	"						escapeIndex = getEscapeIndex(&rootNode);\n" +	"						curIndex += escapeIndex;\n" +	"					}\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h index 7ed4b382c3..74959a931c 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h @@ -1,1446 +1,1445 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* mprKernelsCL= \ -"/***\n" -" * ---------------------------------\n" -" * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" -" *\n" -" *  This file was ported from mpr.c file, part of libccd.\n" -" *  The Minkoski Portal Refinement implementation was ported \n" -" *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" -" *  at http://github.com/erwincoumans/bullet3\n" -" *\n" -" *  Distributed under the OSI-approved BSD License (the \"License\");\n" -" *  see <http://www.opensource.org/licenses/bsd-license.php>.\n" -" *  This software is distributed WITHOUT ANY WARRANTY; without even the\n" -" *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" -" *  See the License for more information.\n" -" */\n" -"#ifndef B3_MPR_PENETRATION_H\n" -"#define B3_MPR_PENETRATION_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -"	b3Float4				m_pos;\n" -"	b3Quat					m_quat;\n" -"	b3Float4				m_linVel;\n" -"	b3Float4				m_angVel;\n" -"	int 					m_collidableIdx;\n" -"	float 				m_invMass;\n" -"	float 				m_restituitionCoeff;\n" -"	float 				m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -"	b3Mat3x3 m_invInertiaWorld;\n" -"	b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -"	\n" -"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" -"#define B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"typedef struct b3GpuFace b3GpuFace_t;\n" -"struct b3GpuFace\n" -"{\n" -"	b3Float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"	int m_unusedPadding1;\n" -"	int m_unusedPadding2;\n" -"};\n" -"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" -"struct b3ConvexPolyhedronData\n" -"{\n" -"	b3Float4		m_localCenter;\n" -"	b3Float4		m_extents;\n" -"	b3Float4		mC;\n" -"	b3Float4		mE;\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"};\n" -"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -"	SHAPE_HEIGHT_FIELD=1,\n" -"	SHAPE_CONVEX_HULL=3,\n" -"	SHAPE_PLANE=4,\n" -"	SHAPE_CONCAVE_TRIMESH=5,\n" -"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -"	SHAPE_SPHERE=7,\n" -"	MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -"	b3Float4	m_childPosition;\n" -"	b3Quat		m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -"	int m_bodyIndexA;\n" -"	int m_bodyIndexB;\n" -"//	int	m_pairType;\n" -"	int m_childShapeIndexA;\n" -"	int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define B3_MPR_SQRT sqrt\n" -"#endif\n" -"#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" -"#define B3_MPR_FABS fabs\n" -"#define B3_MPR_TOLERANCE 1E-6f\n" -"#define B3_MPR_MAX_ITERATIONS 1000\n" -"struct _b3MprSupport_t \n" -"{\n" -"    b3Float4 v;  //!< Support point in minkowski sum\n" -"    b3Float4 v1; //!< Support point in obj1\n" -"    b3Float4 v2; //!< Support point in obj2\n" -"};\n" -"typedef struct _b3MprSupport_t b3MprSupport_t;\n" -"struct _b3MprSimplex_t \n" -"{\n" -"    b3MprSupport_t ps[4];\n" -"    int last; //!< index of last added point\n" -"};\n" -"typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" -"inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" -"{\n" -"    return &s->ps[idx];\n" -"}\n" -"inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" -"{\n" -"    s->last = size - 1;\n" -"}\n" -"inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" -"{\n" -"    return s->last + 1;\n" -"}\n" -"inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" -"{\n" -"    // here is no check on boundaries\n" -"    return &s->ps[idx];\n" -"}\n" -"inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" -"{\n" -"    *d = *s;\n" -"}\n" -"inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" -"{\n" -"    b3MprSupportCopy(s->ps + pos, a);\n" -"}\n" -"inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" -"{\n" -"    b3MprSupport_t supp;\n" -"    b3MprSupportCopy(&supp, &s->ps[pos1]);\n" -"    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" -"    b3MprSupportCopy(&s->ps[pos2], &supp);\n" -"}\n" -"inline int b3MprIsZero(float val)\n" -"{\n" -"    return B3_MPR_FABS(val) < FLT_EPSILON;\n" -"}\n" -"inline int b3MprEq(float _a, float _b)\n" -"{\n" -"    float ab;\n" -"    float a, b;\n" -"    ab = B3_MPR_FABS(_a - _b);\n" -"    if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" -"        return 1;\n" -"    a = B3_MPR_FABS(_a);\n" -"    b = B3_MPR_FABS(_b);\n" -"    if (b > a){\n" -"        return ab < FLT_EPSILON * b;\n" -"    }else{\n" -"        return ab < FLT_EPSILON * a;\n" -"    }\n" -"}\n" -"inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" -"{\n" -"    return b3MprEq((*a).x, (*b).x)\n" -"            && b3MprEq((*a).y, (*b).y)\n" -"            && b3MprEq((*a).z, (*b).z);\n" -"}\n" -"inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, 	b3ConstArray(b3Float4) verticesA)\n" -"{\n" -"	b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" -"	float maxDot = -B3_LARGE_FLOAT;\n" -"    if( 0 < hull->m_numVertices )\n" -"    {\n" -"        const b3Float4 scaled = supportVec;\n" -"		int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" -"        return verticesA[hull->m_vertexOffset+index];\n" -"    }\n" -"    return supVec;\n" -"}\n" -"B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"														const b3Float4* _dir, b3Float4* outp, int logme)\n" -"{\n" -"	//dir is in worldspace, move to local space\n" -"	\n" -"	b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" -"	b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" -"	\n" -"	b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" -"	\n" -"	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" -"	\n" -"	//find local support vertex\n" -"	int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" -"	\n" -"	b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" -"	__global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" -"	\n" -"	b3Float4 pInA;\n" -"	if (logme)\n" -"	{\n" -"		b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" -"		float maxDot = -B3_LARGE_FLOAT;\n" -"		if( 0 < hull->m_numVertices )\n" -"		{\n" -"			const b3Float4 scaled = localDir;\n" -"			int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" -"			pInA = cpuVertices[hull->m_vertexOffset+index];\n" -"			\n" -"		}\n" -"	} else\n" -"	{\n" -"		pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" -"	}\n" -"	//move vertex to world space\n" -"	*outp = b3TransformPoint(pInA,pos,orn);\n" -"	\n" -"}\n" -"inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"													const b3Float4* _dir, b3MprSupport_t *supp)\n" -"{\n" -"    b3Float4 dir;\n" -"	dir = *_dir;\n" -"	b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" -"    dir = *_dir*-1.f;\n" -"	b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" -"    supp->v = supp->v1 - supp->v2;\n" -"}\n" -"inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" -"{\n" -"    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" -"	center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" -"    center->v = center->v1 - center->v2;\n" -"}\n" -"inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" -"{\n" -"	(*v).x = x;\n" -"	(*v).y = y;\n" -"	(*v).z = z;\n" -"	(*v).w = 0.f;\n" -"}\n" -"inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" -"{\n" -"    (*v).x += (*w).x;\n" -"    (*v).y += (*w).y;\n" -"    (*v).z += (*w).z;\n" -"}\n" -"inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" -"{\n" -"    *v = *w;\n" -"}\n" -"inline void b3MprVec3Scale(b3Float4 *d, float k)\n" -"{\n" -"    *d *= k;\n" -"}\n" -"inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -"    float dot;\n" -"	dot = b3Dot3F4(*a,*b);\n" -"    return dot;\n" -"}\n" -"inline float b3MprVec3Len2(const b3Float4 *v)\n" -"{\n" -"    return b3MprVec3Dot(v, v);\n" -"}\n" -"inline void b3MprVec3Normalize(b3Float4 *d)\n" -"{\n" -"    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" -"    b3MprVec3Scale(d, k);\n" -"}\n" -"inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -"	*d = b3Cross3(*a,*b);\n" -"	\n" -"}\n" -"inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" -"{\n" -"	*d = *v - *w;\n" -"}\n" -"inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" -"{\n" -"    b3Float4 v2v1, v3v1;\n" -"    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b3MprVec3Cross(dir, &v2v1, &v3v1);\n" -"    b3MprVec3Normalize(dir);\n" -"}\n" -"inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" -"                                       const b3Float4 *dir)\n" -"{\n" -"    float dot;\n" -"    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" -"    return b3MprIsZero(dot) || dot > 0.f;\n" -"}\n" -"inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" -"                                     const b3MprSupport_t *v4,\n" -"                                     const b3Float4 *dir)\n" -"{\n" -"    float dv1, dv2, dv3, dv4;\n" -"    float dot1, dot2, dot3;\n" -"    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" -"    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" -"    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" -"    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" -"    dv4 = b3MprVec3Dot(&v4->v, dir);\n" -"    dot1 = dv4 - dv1;\n" -"    dot2 = dv4 - dv2;\n" -"    dot3 = dv4 - dv3;\n" -"    dot1 = B3_MPR_FMIN(dot1, dot2);\n" -"    dot1 = B3_MPR_FMIN(dot1, dot3);\n" -"    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" -"}\n" -"inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   \n" -"                                         const b3MprSupport_t *v4,\n" -"                                         const b3Float4 *dir)\n" -"{\n" -"    float dot;\n" -"    dot = b3MprVec3Dot(&v4->v, dir);\n" -"    return b3MprIsZero(dot) || dot > 0.f;\n" -"}\n" -"inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" -"                              const b3MprSupport_t *v4)\n" -"{\n" -"    float dot;\n" -"    b3Float4 v4v0;\n" -"    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" -"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" -"    if (dot > 0.f){\n" -"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" -"        if (dot > 0.f){\n" -"            b3MprSimplexSet(portal, 1, v4);\n" -"        }else{\n" -"            b3MprSimplexSet(portal, 3, v4);\n" -"        }\n" -"    }else{\n" -"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" -"        if (dot > 0.f){\n" -"            b3MprSimplexSet(portal, 2, v4);\n" -"        }else{\n" -"            b3MprSimplexSet(portal, 1, v4);\n" -"        }\n" -"    }\n" -"}\n" -"B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"													__global int*	hasSepAxis,\n" -"													b3MprSimplex_t *portal)\n" -"{\n" -"    b3Float4 dir, va, vb;\n" -"    float dot;\n" -"    int cont;\n" -"	\n" -"	\n" -"    // vertex 0 is center of portal\n" -"    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" -"    // vertex 0 is center of portal\n" -"    b3MprSimplexSetSize(portal, 1);\n" -"	\n" -"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" -"        // Portal's center lies on origin (0,0,0) => we know that objects\n" -"        // intersect but we would need to know penetration info.\n" -"        // So move center little bit...\n" -"        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" -"        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" -"    }\n" -"    // vertex 1 = support in direction of origin\n" -"    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Scale(&dir, -1.f);\n" -"    b3MprVec3Normalize(&dir);\n" -"    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" -"    b3MprSimplexSetSize(portal, 2);\n" -"    // test if origin isn't outside of v1\n" -"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" -"	\n" -"    if (b3MprIsZero(dot) || dot < 0.f)\n" -"        return -1;\n" -"    // vertex 2\n" -"    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" -"        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" -"            // origin lies on v1\n" -"            return 1;\n" -"        }else{\n" -"            // origin lies on v0-v1 segment\n" -"            return 2;\n" -"        }\n" -"    }\n" -"    b3MprVec3Normalize(&dir);\n" -"	 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" -"    \n" -"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" -"    if (b3MprIsZero(dot) || dot < 0.f)\n" -"        return -1;\n" -"    b3MprSimplexSetSize(portal, 3);\n" -"    // vertex 3 direction\n" -"    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -"                     &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" -"                     &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Cross(&dir, &va, &vb);\n" -"    b3MprVec3Normalize(&dir);\n" -"    // it is better to form portal faces to be oriented \"outside\" origin\n" -"    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" -"    if (dot > 0.f){\n" -"        b3MprSimplexSwap(portal, 1, 2);\n" -"        b3MprVec3Scale(&dir, -1.f);\n" -"    }\n" -"    while (b3MprSimplexSize(portal) < 4){\n" -"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" -"        \n" -"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" -"        if (b3MprIsZero(dot) || dot < 0.f)\n" -"            return -1;\n" -"        cont = 0;\n" -"        // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" -"        // continue\n" -"        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -"                          &b3MprSimplexPoint(portal, 3)->v);\n" -"        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" -"        if (dot < 0.f && !b3MprIsZero(dot)){\n" -"            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" -"            cont = 1;\n" -"        }\n" -"        if (!cont){\n" -"            // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" -"            // continue\n" -"            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" -"                              &b3MprSimplexPoint(portal, 2)->v);\n" -"            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" -"            if (dot < 0.f && !b3MprIsZero(dot)){\n" -"                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" -"                cont = 1;\n" -"            }\n" -"        }\n" -"        if (cont){\n" -"            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -"                             &b3MprSimplexPoint(portal, 0)->v);\n" -"            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" -"                             &b3MprSimplexPoint(portal, 0)->v);\n" -"            b3MprVec3Cross(&dir, &va, &vb);\n" -"            b3MprVec3Normalize(&dir);\n" -"        }else{\n" -"            b3MprSimplexSetSize(portal, 4);\n" -"        }\n" -"    }\n" -"    return 0;\n" -"}\n" -"B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"													b3MprSimplex_t *portal)\n" -"{\n" -"    b3Float4 dir;\n" -"    b3MprSupport_t v4;\n" -"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" -"    //while (1)\n" -"	{\n" -"        // compute direction outside the portal (from v0 throught v1,v2,v3\n" -"        // face)\n" -"        b3PortalDir(portal, &dir);\n" -"        // test if origin is inside the portal\n" -"        if (portalEncapsulesOrigin(portal, &dir))\n" -"            return 0;\n" -"        // get next support point\n" -"        \n" -"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" -"        // test if v4 can expand portal to contain origin and if portal\n" -"        // expanding doesn't reach given tolerance\n" -"        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" -"                || portalReachTolerance(portal, &v4, &dir))\n" -"		{\n" -"            return -1;\n" -"        }\n" -"        // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" -"        // difference (direction from v0).\n" -"        b3ExpandPortal(portal, &v4);\n" -"    }\n" -"    return -1;\n" -"}\n" -"B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" -"{\n" -"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"    b3Float4 dir;\n" -"    size_t i;\n" -"    float b[4], sum, inv;\n" -"    b3Float4 vec, p1, p2;\n" -"    b3PortalDir(portal, &dir);\n" -"    // use barycentric coordinates of tetrahedron to find origin\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" -"                       &b3MprSimplexPoint(portal, 2)->v);\n" -"    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" -"                       &b3MprSimplexPoint(portal, 2)->v);\n" -"    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" -"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" -"                       &b3MprSimplexPoint(portal, 1)->v);\n" -"    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" -"	sum = b[0] + b[1] + b[2] + b[3];\n" -"    if (b3MprIsZero(sum) || sum < 0.f){\n" -"		b[0] = 0.f;\n" -"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" -"                           &b3MprSimplexPoint(portal, 3)->v);\n" -"        b[1] = b3MprVec3Dot(&vec, &dir);\n" -"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" -"                           &b3MprSimplexPoint(portal, 1)->v);\n" -"        b[2] = b3MprVec3Dot(&vec, &dir);\n" -"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" -"                           &b3MprSimplexPoint(portal, 2)->v);\n" -"        b[3] = b3MprVec3Dot(&vec, &dir);\n" -"		sum = b[1] + b[2] + b[3];\n" -"	}\n" -"	inv = 1.f / sum;\n" -"    b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" -"    b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" -"    for (i = 0; i < 4; i++){\n" -"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" -"        b3MprVec3Scale(&vec, b[i]);\n" -"        b3MprVec3Add(&p1, &vec);\n" -"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" -"        b3MprVec3Scale(&vec, b[i]);\n" -"        b3MprVec3Add(&p2, &vec);\n" -"    }\n" -"    b3MprVec3Scale(&p1, inv);\n" -"    b3MprVec3Scale(&p2, inv);\n" -"    b3MprVec3Copy(pos, &p1);\n" -"    b3MprVec3Add(pos, &p2);\n" -"    b3MprVec3Scale(pos, 0.5);\n" -"}\n" -"inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -"    b3Float4 ab;\n" -"    b3MprVec3Sub2(&ab, a, b);\n" -"    return b3MprVec3Len2(&ab);\n" -"}\n" -"inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" -"                                                  const b3Float4 *x0,\n" -"                                                  const b3Float4 *b,\n" -"                                                  b3Float4 *witness)\n" -"{\n" -"    // The computation comes from solving equation of segment:\n" -"    //      S(t) = x0 + t.d\n" -"    //          where - x0 is initial point of segment\n" -"    //                - d is direction of segment from x0 (|d| > 0)\n" -"    //                - t belongs to <0, 1> interval\n" -"    // \n" -"    // Than, distance from a segment to some point P can be expressed:\n" -"    //      D(t) = |x0 + t.d - P|^2\n" -"    //          which is distance from any point on segment. Minimization\n" -"    //          of this function brings distance from P to segment.\n" -"    // Minimization of D(t) leads to simple quadratic equation that's\n" -"    // solving is straightforward.\n" -"    //\n" -"    // Bonus of this method is witness point for free.\n" -"    float dist, t;\n" -"    b3Float4 d, a;\n" -"    // direction of segment\n" -"    b3MprVec3Sub2(&d, b, x0);\n" -"    // precompute vector from P to x0\n" -"    b3MprVec3Sub2(&a, x0, P);\n" -"    t  = -1.f * b3MprVec3Dot(&a, &d);\n" -"    t /= b3MprVec3Len2(&d);\n" -"    if (t < 0.f || b3MprIsZero(t)){\n" -"        dist = b3MprVec3Dist2(x0, P);\n" -"        if (witness)\n" -"            b3MprVec3Copy(witness, x0);\n" -"    }else if (t > 1.f || b3MprEq(t, 1.f)){\n" -"        dist = b3MprVec3Dist2(b, P);\n" -"        if (witness)\n" -"            b3MprVec3Copy(witness, b);\n" -"    }else{\n" -"        if (witness){\n" -"            b3MprVec3Copy(witness, &d);\n" -"            b3MprVec3Scale(witness, t);\n" -"            b3MprVec3Add(witness, x0);\n" -"            dist = b3MprVec3Dist2(witness, P);\n" -"        }else{\n" -"            // recycling variables\n" -"            b3MprVec3Scale(&d, t);\n" -"            b3MprVec3Add(&d, &a);\n" -"            dist = b3MprVec3Len2(&d);\n" -"        }\n" -"    }\n" -"    return dist;\n" -"}\n" -"inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" -"                                const b3Float4 *x0, const b3Float4 *B,\n" -"                                const b3Float4 *C,\n" -"                                b3Float4 *witness)\n" -"{\n" -"    // Computation comes from analytic expression for triangle (x0, B, C)\n" -"    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" -"    // Then equation for distance is:\n" -"    //      D(s, t) = | T(s, t) - P |^2\n" -"    // This leads to minimization of quadratic function of two variables.\n" -"    // The solution from is taken only if s is between 0 and 1, t is\n" -"    // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" -"    // computed.\n" -"    b3Float4 d1, d2, a;\n" -"    float u, v, w, p, q, r;\n" -"    float s, t, dist, dist2;\n" -"    b3Float4 witness2;\n" -"    b3MprVec3Sub2(&d1, B, x0);\n" -"    b3MprVec3Sub2(&d2, C, x0);\n" -"    b3MprVec3Sub2(&a, x0, P);\n" -"    u = b3MprVec3Dot(&a, &a);\n" -"    v = b3MprVec3Dot(&d1, &d1);\n" -"    w = b3MprVec3Dot(&d2, &d2);\n" -"    p = b3MprVec3Dot(&a, &d1);\n" -"    q = b3MprVec3Dot(&a, &d2);\n" -"    r = b3MprVec3Dot(&d1, &d2);\n" -"    s = (q * r - w * p) / (w * v - r * r);\n" -"    t = (-s * r - q) / w;\n" -"    if ((b3MprIsZero(s) || s > 0.f)\n" -"            && (b3MprEq(s, 1.f) || s < 1.f)\n" -"            && (b3MprIsZero(t) || t > 0.f)\n" -"            && (b3MprEq(t, 1.f) || t < 1.f)\n" -"            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" -"        if (witness){\n" -"            b3MprVec3Scale(&d1, s);\n" -"            b3MprVec3Scale(&d2, t);\n" -"            b3MprVec3Copy(witness, x0);\n" -"            b3MprVec3Add(witness, &d1);\n" -"            b3MprVec3Add(witness, &d2);\n" -"            dist = b3MprVec3Dist2(witness, P);\n" -"        }else{\n" -"            dist  = s * s * v;\n" -"            dist += t * t * w;\n" -"            dist += 2.f * s * t * r;\n" -"            dist += 2.f * s * p;\n" -"            dist += 2.f * t * q;\n" -"            dist += u;\n" -"        }\n" -"    }else{\n" -"        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" -"        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" -"        if (dist2 < dist){\n" -"            dist = dist2;\n" -"            if (witness)\n" -"                b3MprVec3Copy(witness, &witness2);\n" -"        }\n" -"        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" -"        if (dist2 < dist){\n" -"            dist = dist2;\n" -"            if (witness)\n" -"                b3MprVec3Copy(witness, &witness2);\n" -"        }\n" -"    }\n" -"    return dist;\n" -"}\n" -"B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" -"													b3ConstArray(b3Float4)					cpuVertices,\n" -"													__global b3Float4* sepAxis,\n" -"                       b3MprSimplex_t *portal,\n" -"                       float *depth, b3Float4 *pdir, b3Float4 *pos)\n" -"{\n" -"    b3Float4 dir;\n" -"    b3MprSupport_t v4;\n" -"    unsigned long iterations;\n" -"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"    iterations = 1UL;\n" -"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" -"    //while (1)\n" -"	{\n" -"        // compute portal direction and obtain next support point\n" -"        b3PortalDir(portal, &dir);\n" -"        \n" -"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" -"        // reached tolerance -> find penetration info\n" -"        if (portalReachTolerance(portal, &v4, &dir)\n" -"                || iterations ==B3_MPR_MAX_ITERATIONS)\n" -"		{\n" -"            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" -"            *depth = B3_MPR_SQRT(*depth);\n" -"			\n" -"			if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" -"			{\n" -"				\n" -"				*pdir = dir;\n" -"			} \n" -"			b3MprVec3Normalize(pdir);\n" -"			\n" -"            // barycentric coordinates:\n" -"            b3FindPos(portal, pos);\n" -"            return;\n" -"        }\n" -"        b3ExpandPortal(portal, &v4);\n" -"        iterations++;\n" -"    }\n" -"}\n" -"B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" -"{\n" -"    // Touching contact on portal's v1 - so depth is zero and direction\n" -"    // is unimportant and pos can be guessed\n" -"    *depth = 0.f;\n" -"    b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -"	b3Float4* b3mpr_vec3_origin = &zero;\n" -"	b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" -"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" -"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" -"    b3MprVec3Scale(pos, 0.5);\n" -"}\n" -"B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" -"                              float *depth, b3Float4 *dir, b3Float4 *pos)\n" -"{\n" -"    \n" -"    // Origin lies on v0-v1 segment.\n" -"    // Depth is distance to v1, direction also and position must be\n" -"    // computed\n" -"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" -"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" -"    b3MprVec3Scale(pos, 0.5f);\n" -"    \n" -"    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" -"    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" -"    b3MprVec3Normalize(dir);\n" -"}\n" -"inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" -"					b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" -"					b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -"					b3ConstArray(b3Collidable_t)	cpuCollidables,\n" -"					b3ConstArray(b3Float4)	cpuVertices,\n" -"					__global b3Float4* sepAxis,\n" -"					__global int*	hasSepAxis,\n" -"					float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" -"{\n" -"	\n" -"	 b3MprSimplex_t portal;\n" -"	 \n" -"//	if (!hasSepAxis[pairIndex])\n" -"	//	return -1;\n" -"	\n" -"	hasSepAxis[pairIndex] = 0;\n" -"	 int res;\n" -"    // Phase 1: Portal discovery\n" -"    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" -"	\n" -"	  \n" -"	//sepAxis[pairIndex] = *pdir;//or -dir?\n" -"	switch (res)\n" -"	{\n" -"	case 0:\n" -"		{\n" -"			// Phase 2: Portal refinement\n" -"		\n" -"			res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" -"			if (res < 0)\n" -"				return -1;\n" -"			// Phase 3. Penetration info\n" -"			b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" -"			hasSepAxis[pairIndex] = 1;\n" -"			sepAxis[pairIndex] = -*dirOut;\n" -"			break;\n" -"		}\n" -"	case 1:\n" -"		{\n" -"			 // Touching contact on portal's v1.\n" -"			b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" -"			break;\n" -"		}\n" -"	case 2:\n" -"		{\n" -"			\n" -"			b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" -"			break;\n" -"		}\n" -"	default:\n" -"		{\n" -"			hasSepAxis[pairIndex]=0;\n" -"			//if (res < 0)\n" -"			//{\n" -"				// Origin isn't inside portal - no collision.\n" -"				return -1;\n" -"			//}\n" -"		}\n" -"	};\n" -"	\n" -"	return 0;\n" -"};\n" -"#endif //B3_MPR_PENETRATION_H\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"typedef  struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -"	b3Float4	m_worldPosB[4];\n" -"//	b3Float4	m_localPosA[4];\n" -"//	b3Float4	m_localPosB[4];\n" -"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" -"	unsigned short  m_restituitionCoeffCmp;\n" -"	unsigned short  m_frictionCoeffCmp;\n" -"	int m_batchIdx;\n" -"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -"	int m_bodyBPtrAndSignBit;\n" -"	int	m_childIndexA;\n" -"	int	m_childIndexB;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -"	return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -"	contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"#ifdef cl_ext_atomic_counters_32\n" -"	#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"	#define counter32_t volatile __global int*\n" -"#endif\n" -"__kernel void   mprPenetrationKernel( __global int4* pairs,\n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global float4* separatingNormals,\n" -"																					__global int* hasSeparatingAxis,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int contactCapacity,\n" -"																					int numPairs)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"		\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			return;\n" -"		}\n" -"		\n" -"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -"		{\n" -"			return;\n" -"		}\n" -"		float depthOut;\n" -"		b3Float4 dirOut;\n" -"		b3Float4 posOut;\n" -"		int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" -"		\n" -"		\n" -"		\n" -"		\n" -"		if (res==0)\n" -"		{\n" -"			//add a contact\n" -"			int dstIdx;\n" -"			AppendInc( nGlobalContactsOut, dstIdx );\n" -"			if (dstIdx<contactCapacity)\n" -"			{\n" -"				pairs[pairIndex].z = dstIdx;\n" -"				__global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" -"				c->m_worldNormalOnB = -dirOut;//normal;\n" -"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"				c->m_batchIdx = pairIndex;\n" -"				int bodyA = pairs[pairIndex].x;\n" -"				int bodyB = pairs[pairIndex].y;\n" -"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" -"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" -"				c->m_childIndexA = -1;\n" -"				c->m_childIndexB = -1;\n" -"				//for (int i=0;i<nContacts;i++)\n" -"				posOut.w = -depthOut;\n" -"				c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" -"				GET_NPOINTS(*c) = 1;//nContacts;\n" -"			}\n" -"		}\n" -"	}\n" -"}\n" -"typedef float4 Quaternion;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"}\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"inline void project(__global const b3ConvexPolyhedronData_t* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"bool findSeparatingAxisUnitSphere(	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices,\n" -"	__global const float4* unitSphereDirections,\n" -"	int numUnitSphereDirections,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test unit sphere directions\n" -"	for (int i=0;i<numUnitSphereDirections;i++)\n" -"	{\n" -"		float4 crossje;\n" -"		crossje = unitSphereDirections[i];	\n" -"		if (dot3F4(DeltaC2,crossje)>0)\n" -"			crossje *= -1.f;\n" -"		{\n" -"			float dist;\n" -"			bool result = true;\n" -"			float Min0,Max0;\n" -"			float Min1,Max1;\n" -"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -"		\n" -"			if(Max0<Min1 || Max1<Min0)\n" -"				return false;\n" -"		\n" -"			float d0 = Max0 - Min1;\n" -"			float d1 = Max1 - Min0;\n" -"			dist = d0<d1 ? d0:d1;\n" -"			result = true;\n" -"	\n" -"			if(dist<*dmin)\n" -"			{\n" -"				*dmin = dist;\n" -"				*sep = crossje;\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"__kernel void   findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* unitSphereDirections,\n" -"																					__global  float4* separatingNormals,\n" -"																					__global  int* hasSeparatingAxis,\n" -"																					__global  float* dmins,\n" -"																					int numUnitSphereDirections,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"	\n" -"			int bodyIndexA = pairs[i].x;\n" -"			int bodyIndexB = pairs[i].y;\n" -"	\n" -"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"			\n" -"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	\n" -"			float dmin = dmins[i];\n" -"	\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			posA.w = 0.f;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			posB.w = 0.f;\n" -"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			float4 c0 = transform(&c0local, &posA, &ornA);\n" -"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"			float4 c1 = transform(&c1local,&posB,&ornB);\n" -"			const float4 DeltaC2 = c0 - c1;\n" -"			float4 sepNormal = separatingNormals[i];\n" -"			\n" -"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" -"			if (numEdgeEdgeDirections>numUnitSphereDirections)\n" -"			{\n" -"				bool sepEE = findSeparatingAxisUnitSphere(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																										posB,ornB,\n" -"																										DeltaC2,\n" -"																										vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" -"				if (!sepEE)\n" -"				{\n" -"					hasSeparatingAxis[i] = 0;\n" -"				} else\n" -"				{\n" -"					hasSeparatingAxis[i] = 1;\n" -"					separatingNormals[i] = sepNormal;\n" -"				}\n" -"			}\n" -"		}		//if (hasSeparatingAxis[i])\n" -"	}//(i<numPairs)\n" -"}\n" -; +static const char* mprKernelsCL = +	"/***\n" +	" * ---------------------------------\n" +	" * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" +	" *\n" +	" *  This file was ported from mpr.c file, part of libccd.\n" +	" *  The Minkoski Portal Refinement implementation was ported \n" +	" *  to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" +	" *  at http://github.com/erwincoumans/bullet3\n" +	" *\n" +	" *  Distributed under the OSI-approved BSD License (the \"License\");\n" +	" *  see <http://www.opensource.org/licenses/bsd-license.php>.\n" +	" *  This software is distributed WITHOUT ANY WARRANTY; without even the\n" +	" *  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" +	" *  See the License for more information.\n" +	" */\n" +	"#ifndef B3_MPR_PENETRATION_H\n" +	"#define B3_MPR_PENETRATION_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_RIGIDBODY_DATA_H\n" +	"#define B3_RIGIDBODY_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +	"struct b3RigidBodyData\n" +	"{\n" +	"	b3Float4				m_pos;\n" +	"	b3Quat					m_quat;\n" +	"	b3Float4				m_linVel;\n" +	"	b3Float4				m_angVel;\n" +	"	int 					m_collidableIdx;\n" +	"	float 				m_invMass;\n" +	"	float 				m_restituitionCoeff;\n" +	"	float 				m_frictionCoeff;\n" +	"};\n" +	"typedef struct b3InertiaData b3InertiaData_t;\n" +	"struct b3InertiaData\n" +	"{\n" +	"	b3Mat3x3 m_invInertiaWorld;\n" +	"	b3Mat3x3 m_initInvInertia;\n" +	"};\n" +	"#endif //B3_RIGIDBODY_DATA_H\n" +	"	\n" +	"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#define B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"typedef struct b3GpuFace b3GpuFace_t;\n" +	"struct b3GpuFace\n" +	"{\n" +	"	b3Float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"	int m_unusedPadding1;\n" +	"	int m_unusedPadding2;\n" +	"};\n" +	"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" +	"struct b3ConvexPolyhedronData\n" +	"{\n" +	"	b3Float4		m_localCenter;\n" +	"	b3Float4		m_extents;\n" +	"	b3Float4		mC;\n" +	"	b3Float4		mE;\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"};\n" +	"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_COLLIDABLE_H\n" +	"#define B3_COLLIDABLE_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"enum b3ShapeTypes\n" +	"{\n" +	"	SHAPE_HEIGHT_FIELD=1,\n" +	"	SHAPE_CONVEX_HULL=3,\n" +	"	SHAPE_PLANE=4,\n" +	"	SHAPE_CONCAVE_TRIMESH=5,\n" +	"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +	"	SHAPE_SPHERE=7,\n" +	"	MAX_NUM_SHAPE_TYPES,\n" +	"};\n" +	"typedef struct b3Collidable b3Collidable_t;\n" +	"struct b3Collidable\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"};\n" +	"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" +	"struct b3GpuChildShape\n" +	"{\n" +	"	b3Float4	m_childPosition;\n" +	"	b3Quat		m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"struct b3CompoundOverlappingPair\n" +	"{\n" +	"	int m_bodyIndexA;\n" +	"	int m_bodyIndexB;\n" +	"//	int	m_pairType;\n" +	"	int m_childShapeIndexA;\n" +	"	int m_childShapeIndexB;\n" +	"};\n" +	"#endif //B3_COLLIDABLE_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#define B3_MPR_SQRT sqrt\n" +	"#endif\n" +	"#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" +	"#define B3_MPR_FABS fabs\n" +	"#define B3_MPR_TOLERANCE 1E-6f\n" +	"#define B3_MPR_MAX_ITERATIONS 1000\n" +	"struct _b3MprSupport_t \n" +	"{\n" +	"    b3Float4 v;  //!< Support point in minkowski sum\n" +	"    b3Float4 v1; //!< Support point in obj1\n" +	"    b3Float4 v2; //!< Support point in obj2\n" +	"};\n" +	"typedef struct _b3MprSupport_t b3MprSupport_t;\n" +	"struct _b3MprSimplex_t \n" +	"{\n" +	"    b3MprSupport_t ps[4];\n" +	"    int last; //!< index of last added point\n" +	"};\n" +	"typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" +	"inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" +	"{\n" +	"    return &s->ps[idx];\n" +	"}\n" +	"inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" +	"{\n" +	"    s->last = size - 1;\n" +	"}\n" +	"inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" +	"{\n" +	"    return s->last + 1;\n" +	"}\n" +	"inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" +	"{\n" +	"    // here is no check on boundaries\n" +	"    return &s->ps[idx];\n" +	"}\n" +	"inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" +	"{\n" +	"    *d = *s;\n" +	"}\n" +	"inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" +	"{\n" +	"    b3MprSupportCopy(s->ps + pos, a);\n" +	"}\n" +	"inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" +	"{\n" +	"    b3MprSupport_t supp;\n" +	"    b3MprSupportCopy(&supp, &s->ps[pos1]);\n" +	"    b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" +	"    b3MprSupportCopy(&s->ps[pos2], &supp);\n" +	"}\n" +	"inline int b3MprIsZero(float val)\n" +	"{\n" +	"    return B3_MPR_FABS(val) < FLT_EPSILON;\n" +	"}\n" +	"inline int b3MprEq(float _a, float _b)\n" +	"{\n" +	"    float ab;\n" +	"    float a, b;\n" +	"    ab = B3_MPR_FABS(_a - _b);\n" +	"    if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" +	"        return 1;\n" +	"    a = B3_MPR_FABS(_a);\n" +	"    b = B3_MPR_FABS(_b);\n" +	"    if (b > a){\n" +	"        return ab < FLT_EPSILON * b;\n" +	"    }else{\n" +	"        return ab < FLT_EPSILON * a;\n" +	"    }\n" +	"}\n" +	"inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" +	"{\n" +	"    return b3MprEq((*a).x, (*b).x)\n" +	"            && b3MprEq((*a).y, (*b).y)\n" +	"            && b3MprEq((*a).z, (*b).z);\n" +	"}\n" +	"inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, 	b3ConstArray(b3Float4) verticesA)\n" +	"{\n" +	"	b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" +	"	float maxDot = -B3_LARGE_FLOAT;\n" +	"    if( 0 < hull->m_numVertices )\n" +	"    {\n" +	"        const b3Float4 scaled = supportVec;\n" +	"		int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" +	"        return verticesA[hull->m_vertexOffset+index];\n" +	"    }\n" +	"    return supVec;\n" +	"}\n" +	"B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"														const b3Float4* _dir, b3Float4* outp, int logme)\n" +	"{\n" +	"	//dir is in worldspace, move to local space\n" +	"	\n" +	"	b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" +	"	b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" +	"	\n" +	"	b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" +	"	\n" +	"	const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" +	"	\n" +	"	//find local support vertex\n" +	"	int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" +	"	\n" +	"	b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" +	"	__global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" +	"	\n" +	"	b3Float4 pInA;\n" +	"	if (logme)\n" +	"	{\n" +	"		b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" +	"		float maxDot = -B3_LARGE_FLOAT;\n" +	"		if( 0 < hull->m_numVertices )\n" +	"		{\n" +	"			const b3Float4 scaled = localDir;\n" +	"			int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" +	"			pInA = cpuVertices[hull->m_vertexOffset+index];\n" +	"			\n" +	"		}\n" +	"	} else\n" +	"	{\n" +	"		pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" +	"	}\n" +	"	//move vertex to world space\n" +	"	*outp = b3TransformPoint(pInA,pos,orn);\n" +	"	\n" +	"}\n" +	"inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB,   b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"													const b3Float4* _dir, b3MprSupport_t *supp)\n" +	"{\n" +	"    b3Float4 dir;\n" +	"	dir = *_dir;\n" +	"	b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" +	"    dir = *_dir*-1.f;\n" +	"	b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" +	"    supp->v = supp->v1 - supp->v2;\n" +	"}\n" +	"inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" +	"{\n" +	"    center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" +	"	center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" +	"    center->v = center->v1 - center->v2;\n" +	"}\n" +	"inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" +	"{\n" +	"	(*v).x = x;\n" +	"	(*v).y = y;\n" +	"	(*v).z = z;\n" +	"	(*v).w = 0.f;\n" +	"}\n" +	"inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" +	"{\n" +	"    (*v).x += (*w).x;\n" +	"    (*v).y += (*w).y;\n" +	"    (*v).z += (*w).z;\n" +	"}\n" +	"inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" +	"{\n" +	"    *v = *w;\n" +	"}\n" +	"inline void b3MprVec3Scale(b3Float4 *d, float k)\n" +	"{\n" +	"    *d *= k;\n" +	"}\n" +	"inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" +	"{\n" +	"    float dot;\n" +	"	dot = b3Dot3F4(*a,*b);\n" +	"    return dot;\n" +	"}\n" +	"inline float b3MprVec3Len2(const b3Float4 *v)\n" +	"{\n" +	"    return b3MprVec3Dot(v, v);\n" +	"}\n" +	"inline void b3MprVec3Normalize(b3Float4 *d)\n" +	"{\n" +	"    float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" +	"    b3MprVec3Scale(d, k);\n" +	"}\n" +	"inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" +	"{\n" +	"	*d = b3Cross3(*a,*b);\n" +	"	\n" +	"}\n" +	"inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" +	"{\n" +	"	*d = *v - *w;\n" +	"}\n" +	"inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" +	"{\n" +	"    b3Float4 v2v1, v3v1;\n" +	"    b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b3MprVec3Cross(dir, &v2v1, &v3v1);\n" +	"    b3MprVec3Normalize(dir);\n" +	"}\n" +	"inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" +	"                                       const b3Float4 *dir)\n" +	"{\n" +	"    float dot;\n" +	"    dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" +	"    return b3MprIsZero(dot) || dot > 0.f;\n" +	"}\n" +	"inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" +	"                                     const b3MprSupport_t *v4,\n" +	"                                     const b3Float4 *dir)\n" +	"{\n" +	"    float dv1, dv2, dv3, dv4;\n" +	"    float dot1, dot2, dot3;\n" +	"    // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" +	"    dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" +	"    dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" +	"    dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" +	"    dv4 = b3MprVec3Dot(&v4->v, dir);\n" +	"    dot1 = dv4 - dv1;\n" +	"    dot2 = dv4 - dv2;\n" +	"    dot3 = dv4 - dv3;\n" +	"    dot1 = B3_MPR_FMIN(dot1, dot2);\n" +	"    dot1 = B3_MPR_FMIN(dot1, dot3);\n" +	"    return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" +	"}\n" +	"inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal,   \n" +	"                                         const b3MprSupport_t *v4,\n" +	"                                         const b3Float4 *dir)\n" +	"{\n" +	"    float dot;\n" +	"    dot = b3MprVec3Dot(&v4->v, dir);\n" +	"    return b3MprIsZero(dot) || dot > 0.f;\n" +	"}\n" +	"inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" +	"                              const b3MprSupport_t *v4)\n" +	"{\n" +	"    float dot;\n" +	"    b3Float4 v4v0;\n" +	"    b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" +	"    if (dot > 0.f){\n" +	"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" +	"        if (dot > 0.f){\n" +	"            b3MprSimplexSet(portal, 1, v4);\n" +	"        }else{\n" +	"            b3MprSimplexSet(portal, 3, v4);\n" +	"        }\n" +	"    }else{\n" +	"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" +	"        if (dot > 0.f){\n" +	"            b3MprSimplexSet(portal, 2, v4);\n" +	"        }else{\n" +	"            b3MprSimplexSet(portal, 1, v4);\n" +	"        }\n" +	"    }\n" +	"}\n" +	"B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"													__global int*	hasSepAxis,\n" +	"													b3MprSimplex_t *portal)\n" +	"{\n" +	"    b3Float4 dir, va, vb;\n" +	"    float dot;\n" +	"    int cont;\n" +	"	\n" +	"	\n" +	"    // vertex 0 is center of portal\n" +	"    b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" +	"    // vertex 0 is center of portal\n" +	"    b3MprSimplexSetSize(portal, 1);\n" +	"	\n" +	"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"    if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" +	"        // Portal's center lies on origin (0,0,0) => we know that objects\n" +	"        // intersect but we would need to know penetration info.\n" +	"        // So move center little bit...\n" +	"        b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" +	"        b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" +	"    }\n" +	"    // vertex 1 = support in direction of origin\n" +	"    b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Scale(&dir, -1.f);\n" +	"    b3MprVec3Normalize(&dir);\n" +	"    b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" +	"    b3MprSimplexSetSize(portal, 2);\n" +	"    // test if origin isn't outside of v1\n" +	"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" +	"	\n" +	"    if (b3MprIsZero(dot) || dot < 0.f)\n" +	"        return -1;\n" +	"    // vertex 2\n" +	"    b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" +	"        if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" +	"            // origin lies on v1\n" +	"            return 1;\n" +	"        }else{\n" +	"            // origin lies on v0-v1 segment\n" +	"            return 2;\n" +	"        }\n" +	"    }\n" +	"    b3MprVec3Normalize(&dir);\n" +	"	 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" +	"    \n" +	"    dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" +	"    if (b3MprIsZero(dot) || dot < 0.f)\n" +	"        return -1;\n" +	"    b3MprSimplexSetSize(portal, 3);\n" +	"    // vertex 3 direction\n" +	"    b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                     &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                     &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Cross(&dir, &va, &vb);\n" +	"    b3MprVec3Normalize(&dir);\n" +	"    // it is better to form portal faces to be oriented \"outside\" origin\n" +	"    dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    if (dot > 0.f){\n" +	"        b3MprSimplexSwap(portal, 1, 2);\n" +	"        b3MprVec3Scale(&dir, -1.f);\n" +	"    }\n" +	"    while (b3MprSimplexSize(portal) < 4){\n" +	"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" +	"        \n" +	"        dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" +	"        if (b3MprIsZero(dot) || dot < 0.f)\n" +	"            return -1;\n" +	"        cont = 0;\n" +	"        // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" +	"        // continue\n" +	"        b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                          &b3MprSimplexPoint(portal, 3)->v);\n" +	"        dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" +	"        if (dot < 0.f && !b3MprIsZero(dot)){\n" +	"            b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" +	"            cont = 1;\n" +	"        }\n" +	"        if (!cont){\n" +	"            // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" +	"            // continue\n" +	"            b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                              &b3MprSimplexPoint(portal, 2)->v);\n" +	"            dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" +	"            if (dot < 0.f && !b3MprIsZero(dot)){\n" +	"                b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" +	"                cont = 1;\n" +	"            }\n" +	"        }\n" +	"        if (cont){\n" +	"            b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                             &b3MprSimplexPoint(portal, 0)->v);\n" +	"            b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                             &b3MprSimplexPoint(portal, 0)->v);\n" +	"            b3MprVec3Cross(&dir, &va, &vb);\n" +	"            b3MprVec3Normalize(&dir);\n" +	"        }else{\n" +	"            b3MprSimplexSetSize(portal, 4);\n" +	"        }\n" +	"    }\n" +	"    return 0;\n" +	"}\n" +	"B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"													b3MprSimplex_t *portal)\n" +	"{\n" +	"    b3Float4 dir;\n" +	"    b3MprSupport_t v4;\n" +	"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" +	"    //while (1)\n" +	"	{\n" +	"        // compute direction outside the portal (from v0 throught v1,v2,v3\n" +	"        // face)\n" +	"        b3PortalDir(portal, &dir);\n" +	"        // test if origin is inside the portal\n" +	"        if (portalEncapsulesOrigin(portal, &dir))\n" +	"            return 0;\n" +	"        // get next support point\n" +	"        \n" +	"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" +	"        // test if v4 can expand portal to contain origin and if portal\n" +	"        // expanding doesn't reach given tolerance\n" +	"        if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" +	"                || portalReachTolerance(portal, &v4, &dir))\n" +	"		{\n" +	"            return -1;\n" +	"        }\n" +	"        // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" +	"        // difference (direction from v0).\n" +	"        b3ExpandPortal(portal, &v4);\n" +	"    }\n" +	"    return -1;\n" +	"}\n" +	"B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" +	"{\n" +	"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"    b3Float4 dir;\n" +	"    size_t i;\n" +	"    float b[4], sum, inv;\n" +	"    b3Float4 vec, p1, p2;\n" +	"    b3PortalDir(portal, &dir);\n" +	"    // use barycentric coordinates of tetrahedron to find origin\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                       &b3MprSimplexPoint(portal, 2)->v);\n" +	"    b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                       &b3MprSimplexPoint(portal, 2)->v);\n" +	"    b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" +	"    b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                       &b3MprSimplexPoint(portal, 1)->v);\n" +	"    b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" +	"	sum = b[0] + b[1] + b[2] + b[3];\n" +	"    if (b3MprIsZero(sum) || sum < 0.f){\n" +	"		b[0] = 0.f;\n" +	"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" +	"                           &b3MprSimplexPoint(portal, 3)->v);\n" +	"        b[1] = b3MprVec3Dot(&vec, &dir);\n" +	"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" +	"                           &b3MprSimplexPoint(portal, 1)->v);\n" +	"        b[2] = b3MprVec3Dot(&vec, &dir);\n" +	"        b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" +	"                           &b3MprSimplexPoint(portal, 2)->v);\n" +	"        b[3] = b3MprVec3Dot(&vec, &dir);\n" +	"		sum = b[1] + b[2] + b[3];\n" +	"	}\n" +	"	inv = 1.f / sum;\n" +	"    b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" +	"    b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" +	"    for (i = 0; i < 4; i++){\n" +	"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" +	"        b3MprVec3Scale(&vec, b[i]);\n" +	"        b3MprVec3Add(&p1, &vec);\n" +	"        b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" +	"        b3MprVec3Scale(&vec, b[i]);\n" +	"        b3MprVec3Add(&p2, &vec);\n" +	"    }\n" +	"    b3MprVec3Scale(&p1, inv);\n" +	"    b3MprVec3Scale(&p2, inv);\n" +	"    b3MprVec3Copy(pos, &p1);\n" +	"    b3MprVec3Add(pos, &p2);\n" +	"    b3MprVec3Scale(pos, 0.5);\n" +	"}\n" +	"inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" +	"{\n" +	"    b3Float4 ab;\n" +	"    b3MprVec3Sub2(&ab, a, b);\n" +	"    return b3MprVec3Len2(&ab);\n" +	"}\n" +	"inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" +	"                                                  const b3Float4 *x0,\n" +	"                                                  const b3Float4 *b,\n" +	"                                                  b3Float4 *witness)\n" +	"{\n" +	"    // The computation comes from solving equation of segment:\n" +	"    //      S(t) = x0 + t.d\n" +	"    //          where - x0 is initial point of segment\n" +	"    //                - d is direction of segment from x0 (|d| > 0)\n" +	"    //                - t belongs to <0, 1> interval\n" +	"    // \n" +	"    // Than, distance from a segment to some point P can be expressed:\n" +	"    //      D(t) = |x0 + t.d - P|^2\n" +	"    //          which is distance from any point on segment. Minimization\n" +	"    //          of this function brings distance from P to segment.\n" +	"    // Minimization of D(t) leads to simple quadratic equation that's\n" +	"    // solving is straightforward.\n" +	"    //\n" +	"    // Bonus of this method is witness point for free.\n" +	"    float dist, t;\n" +	"    b3Float4 d, a;\n" +	"    // direction of segment\n" +	"    b3MprVec3Sub2(&d, b, x0);\n" +	"    // precompute vector from P to x0\n" +	"    b3MprVec3Sub2(&a, x0, P);\n" +	"    t  = -1.f * b3MprVec3Dot(&a, &d);\n" +	"    t /= b3MprVec3Len2(&d);\n" +	"    if (t < 0.f || b3MprIsZero(t)){\n" +	"        dist = b3MprVec3Dist2(x0, P);\n" +	"        if (witness)\n" +	"            b3MprVec3Copy(witness, x0);\n" +	"    }else if (t > 1.f || b3MprEq(t, 1.f)){\n" +	"        dist = b3MprVec3Dist2(b, P);\n" +	"        if (witness)\n" +	"            b3MprVec3Copy(witness, b);\n" +	"    }else{\n" +	"        if (witness){\n" +	"            b3MprVec3Copy(witness, &d);\n" +	"            b3MprVec3Scale(witness, t);\n" +	"            b3MprVec3Add(witness, x0);\n" +	"            dist = b3MprVec3Dist2(witness, P);\n" +	"        }else{\n" +	"            // recycling variables\n" +	"            b3MprVec3Scale(&d, t);\n" +	"            b3MprVec3Add(&d, &a);\n" +	"            dist = b3MprVec3Len2(&d);\n" +	"        }\n" +	"    }\n" +	"    return dist;\n" +	"}\n" +	"inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" +	"                                const b3Float4 *x0, const b3Float4 *B,\n" +	"                                const b3Float4 *C,\n" +	"                                b3Float4 *witness)\n" +	"{\n" +	"    // Computation comes from analytic expression for triangle (x0, B, C)\n" +	"    //      T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" +	"    // Then equation for distance is:\n" +	"    //      D(s, t) = | T(s, t) - P |^2\n" +	"    // This leads to minimization of quadratic function of two variables.\n" +	"    // The solution from is taken only if s is between 0 and 1, t is\n" +	"    // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" +	"    // computed.\n" +	"    b3Float4 d1, d2, a;\n" +	"    float u, v, w, p, q, r;\n" +	"    float s, t, dist, dist2;\n" +	"    b3Float4 witness2;\n" +	"    b3MprVec3Sub2(&d1, B, x0);\n" +	"    b3MprVec3Sub2(&d2, C, x0);\n" +	"    b3MprVec3Sub2(&a, x0, P);\n" +	"    u = b3MprVec3Dot(&a, &a);\n" +	"    v = b3MprVec3Dot(&d1, &d1);\n" +	"    w = b3MprVec3Dot(&d2, &d2);\n" +	"    p = b3MprVec3Dot(&a, &d1);\n" +	"    q = b3MprVec3Dot(&a, &d2);\n" +	"    r = b3MprVec3Dot(&d1, &d2);\n" +	"    s = (q * r - w * p) / (w * v - r * r);\n" +	"    t = (-s * r - q) / w;\n" +	"    if ((b3MprIsZero(s) || s > 0.f)\n" +	"            && (b3MprEq(s, 1.f) || s < 1.f)\n" +	"            && (b3MprIsZero(t) || t > 0.f)\n" +	"            && (b3MprEq(t, 1.f) || t < 1.f)\n" +	"            && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" +	"        if (witness){\n" +	"            b3MprVec3Scale(&d1, s);\n" +	"            b3MprVec3Scale(&d2, t);\n" +	"            b3MprVec3Copy(witness, x0);\n" +	"            b3MprVec3Add(witness, &d1);\n" +	"            b3MprVec3Add(witness, &d2);\n" +	"            dist = b3MprVec3Dist2(witness, P);\n" +	"        }else{\n" +	"            dist  = s * s * v;\n" +	"            dist += t * t * w;\n" +	"            dist += 2.f * s * t * r;\n" +	"            dist += 2.f * s * p;\n" +	"            dist += 2.f * t * q;\n" +	"            dist += u;\n" +	"        }\n" +	"    }else{\n" +	"        dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" +	"        dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" +	"        if (dist2 < dist){\n" +	"            dist = dist2;\n" +	"            if (witness)\n" +	"                b3MprVec3Copy(witness, &witness2);\n" +	"        }\n" +	"        dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" +	"        if (dist2 < dist){\n" +	"            dist = dist2;\n" +	"            if (witness)\n" +	"                b3MprVec3Copy(witness, &witness2);\n" +	"        }\n" +	"    }\n" +	"    return dist;\n" +	"}\n" +	"B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB,  b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +	"													b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"													b3ConstArray(b3Collidable_t)				cpuCollidables,\n" +	"													b3ConstArray(b3Float4)					cpuVertices,\n" +	"													__global b3Float4* sepAxis,\n" +	"                       b3MprSimplex_t *portal,\n" +	"                       float *depth, b3Float4 *pdir, b3Float4 *pos)\n" +	"{\n" +	"    b3Float4 dir;\n" +	"    b3MprSupport_t v4;\n" +	"    unsigned long iterations;\n" +	"	b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"    iterations = 1UL;\n" +	"	for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" +	"    //while (1)\n" +	"	{\n" +	"        // compute portal direction and obtain next support point\n" +	"        b3PortalDir(portal, &dir);\n" +	"        \n" +	"		 b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" +	"        // reached tolerance -> find penetration info\n" +	"        if (portalReachTolerance(portal, &v4, &dir)\n" +	"                || iterations ==B3_MPR_MAX_ITERATIONS)\n" +	"		{\n" +	"            *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" +	"            *depth = B3_MPR_SQRT(*depth);\n" +	"			\n" +	"			if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" +	"			{\n" +	"				\n" +	"				*pdir = dir;\n" +	"			} \n" +	"			b3MprVec3Normalize(pdir);\n" +	"			\n" +	"            // barycentric coordinates:\n" +	"            b3FindPos(portal, pos);\n" +	"            return;\n" +	"        }\n" +	"        b3ExpandPortal(portal, &v4);\n" +	"        iterations++;\n" +	"    }\n" +	"}\n" +	"B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" +	"{\n" +	"    // Touching contact on portal's v1 - so depth is zero and direction\n" +	"    // is unimportant and pos can be guessed\n" +	"    *depth = 0.f;\n" +	"    b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +	"	b3Float4* b3mpr_vec3_origin = &zero;\n" +	"	b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" +	"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" +	"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" +	"    b3MprVec3Scale(pos, 0.5);\n" +	"}\n" +	"B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" +	"                              float *depth, b3Float4 *dir, b3Float4 *pos)\n" +	"{\n" +	"    \n" +	"    // Origin lies on v0-v1 segment.\n" +	"    // Depth is distance to v1, direction also and position must be\n" +	"    // computed\n" +	"    b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" +	"    b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" +	"    b3MprVec3Scale(pos, 0.5f);\n" +	"    \n" +	"    b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" +	"    *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" +	"    b3MprVec3Normalize(dir);\n" +	"}\n" +	"inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" +	"					b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" +	"					b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +	"					b3ConstArray(b3Collidable_t)	cpuCollidables,\n" +	"					b3ConstArray(b3Float4)	cpuVertices,\n" +	"					__global b3Float4* sepAxis,\n" +	"					__global int*	hasSepAxis,\n" +	"					float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" +	"{\n" +	"	\n" +	"	 b3MprSimplex_t portal;\n" +	"	 \n" +	"//	if (!hasSepAxis[pairIndex])\n" +	"	//	return -1;\n" +	"	\n" +	"	hasSepAxis[pairIndex] = 0;\n" +	"	 int res;\n" +	"    // Phase 1: Portal discovery\n" +	"    res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" +	"	\n" +	"	  \n" +	"	//sepAxis[pairIndex] = *pdir;//or -dir?\n" +	"	switch (res)\n" +	"	{\n" +	"	case 0:\n" +	"		{\n" +	"			// Phase 2: Portal refinement\n" +	"		\n" +	"			res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" +	"			if (res < 0)\n" +	"				return -1;\n" +	"			// Phase 3. Penetration info\n" +	"			b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" +	"			hasSepAxis[pairIndex] = 1;\n" +	"			sepAxis[pairIndex] = -*dirOut;\n" +	"			break;\n" +	"		}\n" +	"	case 1:\n" +	"		{\n" +	"			 // Touching contact on portal's v1.\n" +	"			b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" +	"			break;\n" +	"		}\n" +	"	case 2:\n" +	"		{\n" +	"			\n" +	"			b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" +	"			break;\n" +	"		}\n" +	"	default:\n" +	"		{\n" +	"			hasSepAxis[pairIndex]=0;\n" +	"			//if (res < 0)\n" +	"			//{\n" +	"				// Origin isn't inside portal - no collision.\n" +	"				return -1;\n" +	"			//}\n" +	"		}\n" +	"	};\n" +	"	\n" +	"	return 0;\n" +	"};\n" +	"#endif //B3_MPR_PENETRATION_H\n" +	"#ifndef B3_CONTACT4DATA_H\n" +	"#define B3_CONTACT4DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"typedef  struct b3Contact4Data b3Contact4Data_t;\n" +	"struct b3Contact4Data\n" +	"{\n" +	"	b3Float4	m_worldPosB[4];\n" +	"//	b3Float4	m_localPosA[4];\n" +	"//	b3Float4	m_localPosB[4];\n" +	"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" +	"	unsigned short  m_restituitionCoeffCmp;\n" +	"	unsigned short  m_frictionCoeffCmp;\n" +	"	int m_batchIdx;\n" +	"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +	"	int m_bodyBPtrAndSignBit;\n" +	"	int	m_childIndexA;\n" +	"	int	m_childIndexB;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +	"{\n" +	"	return (int)contact->m_worldNormalOnB.w;\n" +	"};\n" +	"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +	"{\n" +	"	contact->m_worldNormalOnB.w = (float)numPoints;\n" +	"};\n" +	"#endif //B3_CONTACT4DATA_H\n" +	"#define AppendInc(x, out) out = atomic_inc(x)\n" +	"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +	"#ifdef cl_ext_atomic_counters_32\n" +	"	#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +	"#else\n" +	"	#define counter32_t volatile __global int*\n" +	"#endif\n" +	"__kernel void   mprPenetrationKernel( __global int4* pairs,\n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global float4* separatingNormals,\n" +	"																					__global int* hasSeparatingAxis,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int contactCapacity,\n" +	"																					int numPairs)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"		\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		float depthOut;\n" +	"		b3Float4 dirOut;\n" +	"		b3Float4 posOut;\n" +	"		int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" +	"		\n" +	"		\n" +	"		\n" +	"		\n" +	"		if (res==0)\n" +	"		{\n" +	"			//add a contact\n" +	"			int dstIdx;\n" +	"			AppendInc( nGlobalContactsOut, dstIdx );\n" +	"			if (dstIdx<contactCapacity)\n" +	"			{\n" +	"				pairs[pairIndex].z = dstIdx;\n" +	"				__global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" +	"				c->m_worldNormalOnB = -dirOut;//normal;\n" +	"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"				c->m_batchIdx = pairIndex;\n" +	"				int bodyA = pairs[pairIndex].x;\n" +	"				int bodyB = pairs[pairIndex].y;\n" +	"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +	"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +	"				c->m_childIndexA = -1;\n" +	"				c->m_childIndexB = -1;\n" +	"				//for (int i=0;i<nContacts;i++)\n" +	"				posOut.w = -depthOut;\n" +	"				c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" +	"				GET_NPOINTS(*c) = 1;//nContacts;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"}\n" +	"typedef float4 Quaternion;\n" +	"#define make_float4 (float4)\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"}\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"inline void project(__global const b3ConvexPolyhedronData_t* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"bool findSeparatingAxisUnitSphere(	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices,\n" +	"	__global const float4* unitSphereDirections,\n" +	"	int numUnitSphereDirections,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test unit sphere directions\n" +	"	for (int i=0;i<numUnitSphereDirections;i++)\n" +	"	{\n" +	"		float4 crossje;\n" +	"		crossje = unitSphereDirections[i];	\n" +	"		if (dot3F4(DeltaC2,crossje)>0)\n" +	"			crossje *= -1.f;\n" +	"		{\n" +	"			float dist;\n" +	"			bool result = true;\n" +	"			float Min0,Max0;\n" +	"			float Min1,Max1;\n" +	"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +	"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +	"		\n" +	"			if(Max0<Min1 || Max1<Min0)\n" +	"				return false;\n" +	"		\n" +	"			float d0 = Max0 - Min1;\n" +	"			float d1 = Max1 - Min0;\n" +	"			dist = d0<d1 ? d0:d1;\n" +	"			result = true;\n" +	"	\n" +	"			if(dist<*dmin)\n" +	"			{\n" +	"				*dmin = dist;\n" +	"				*sep = crossje;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"__kernel void   findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* unitSphereDirections,\n" +	"																					__global  float4* separatingNormals,\n" +	"																					__global  int* hasSeparatingAxis,\n" +	"																					__global  float* dmins,\n" +	"																					int numUnitSphereDirections,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"	\n" +	"			int bodyIndexA = pairs[i].x;\n" +	"			int bodyIndexB = pairs[i].y;\n" +	"	\n" +	"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"			\n" +	"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	\n" +	"			float dmin = dmins[i];\n" +	"	\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			posA.w = 0.f;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			posB.w = 0.f;\n" +	"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"			const float4 DeltaC2 = c0 - c1;\n" +	"			float4 sepNormal = separatingNormals[i];\n" +	"			\n" +	"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" +	"			if (numEdgeEdgeDirections>numUnitSphereDirections)\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisUnitSphere(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																										posB,ornB,\n" +	"																										DeltaC2,\n" +	"																										vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" +	"				if (!sepEE)\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 0;\n" +	"				} else\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 1;\n" +	"					separatingNormals[i] = sepNormal;\n" +	"				}\n" +	"			}\n" +	"		}		//if (hasSeparatingAxis[i])\n" +	"	}//(i<numPairs)\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h index b0103fe674..b2e0a2dd47 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -1,1289 +1,1288 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* primitiveContactsKernelsCL= \ -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef  struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -"	b3Float4	m_worldPosB[4];\n" -"//	b3Float4	m_localPosA[4];\n" -"//	b3Float4	m_localPosB[4];\n" -"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" -"	unsigned short  m_restituitionCoeffCmp;\n" -"	unsigned short  m_frictionCoeffCmp;\n" -"	int m_batchIdx;\n" -"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -"	int m_bodyBPtrAndSignBit;\n" -"	int	m_childIndexA;\n" -"	int	m_childIndexB;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -"	return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -"	contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_PLANE 4\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define max2 max\n" -"#define min2 min\n" -"typedef unsigned int u32;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	int m_numChildShapes;\n" -"	float m_radius;\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;	\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct  \n" -"{\n" -"	float4		m_localCenter;\n" -"	float4		m_extents;\n" -"	float4		mC;\n" -"	float4		mE;\n" -"	\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct\n" -"{\n" -"	float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"} btGpuFace;\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"//	return numerator/denominator;	\n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"}\n" -"//#define dot3F4 dot\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtInvert(orientationIn);\n" -"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"void	trMul(float4 translationA, Quaternion orientationA,\n" -"						float4 translationB, Quaternion orientationB,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtMul(orientationA,orientationB);\n" -"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" -"{\n" -"	return make_float4(	a.x + (b.x - a.x) * t,\n" -"						a.y + (b.y - a.y) * t,\n" -"						a.z + (b.z - a.z) * t,\n" -"						0.f);\n" -"}\n" -"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" -"{\n" -"	float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" -"	float dist = dot3F4(n, point) + planeEqn.w;\n" -"	*closestPointOnFace = point - dist * n;\n" -"	return dist;\n" -"}\n" -"inline bool IsPointInPolygon(float4 p, \n" -"							const btGpuFace* face,\n" -"							__global const float4* baseVertex,\n" -"							__global const  int* convexIndices,\n" -"							float4* out)\n" -"{\n" -"    float4 a;\n" -"    float4 b;\n" -"    float4 ab;\n" -"    float4 ap;\n" -"    float4 v;\n" -"	float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" -"	\n" -"	if (face->m_numIndices<2)\n" -"		return false;\n" -"	\n" -"	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" -"	\n" -"	b = v0;\n" -"    for(unsigned i=0; i != face->m_numIndices; ++i)\n" -"    {\n" -"		a = b;\n" -"		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" -"		b = vi;\n" -"        ab = b-a;\n" -"        ap = p-a;\n" -"        v = cross3(ab,plane);\n" -"        if (dot(ap, v) > 0.f)\n" -"        {\n" -"            float ab_m2 = dot(ab, ab);\n" -"            float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" -"            if (rt <= 0.f)\n" -"            {\n" -"                *out = a;\n" -"            }\n" -"            else if (rt >= 1.f) \n" -"            {\n" -"                *out = b;\n" -"            }\n" -"            else\n" -"            {\n" -"            	float s = 1.f - rt;\n" -"				out[0].x = s * a.x + rt * b.x;\n" -"				out[0].y = s * a.y + rt * b.y;\n" -"				out[0].z = s * a.z + rt * b.z;\n" -"            }\n" -"            return false;\n" -"        }\n" -"    }\n" -"    return true;\n" -"}\n" -"void	computeContactSphereConvex(int pairIndex,\n" -"																int bodyIndexA, int bodyIndexB, \n" -"																int collidableIndexA, int collidableIndexB, \n" -"																__global const BodyData* rigidBodies, \n" -"																__global const btCollidableGpu* collidables,\n" -"																__global const ConvexPolyhedronCL* convexShapes,\n" -"																__global const float4* convexVertices,\n" -"																__global const int* convexIndices,\n" -"																__global const btGpuFace* faces,\n" -"																__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																counter32_t nGlobalContactsOut,\n" -"																int maxContactCapacity,\n" -"																float4 spherePos2,\n" -"																float radius,\n" -"																float4 pos,\n" -"																float4 quat\n" -"																)\n" -"{\n" -"	float4 invPos;\n" -"	float4 invOrn;\n" -"	trInverse(pos,quat, &invPos,&invOrn);\n" -"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -"	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" -"	int numFaces = convexShapes[shapeIndex].m_numFaces;\n" -"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" -"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" -"	float minDist = -1000000.f;\n" -"	bool bCollide = true;\n" -"	for ( int f = 0; f < numFaces; f++ )\n" -"	{\n" -"		btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" -"		// set up a plane equation \n" -"		float4 planeEqn;\n" -"		float4 n1 = face.m_plane;\n" -"		n1.w = 0.f;\n" -"		planeEqn = n1;\n" -"		planeEqn.w = face.m_plane.w;\n" -"		\n" -"	\n" -"		// compute a signed distance from the vertex in cloth to the face of rigidbody.\n" -"		float4 pntReturn;\n" -"		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" -"		// If the distance is positive, the plane is a separating plane. \n" -"		if ( dist > radius )\n" -"		{\n" -"			bCollide = false;\n" -"			break;\n" -"		}\n" -"		if (dist>0)\n" -"		{\n" -"			//might hit an edge or vertex\n" -"			float4 out;\n" -"			float4 zeroPos = make_float4(0,0,0,0);\n" -"			bool isInPoly = IsPointInPolygon(spherePos,\n" -"					&face,\n" -"					&convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" -"					convexIndices,\n" -"           &out);\n" -"			if (isInPoly)\n" -"			{\n" -"				if (dist>minDist)\n" -"				{\n" -"					minDist = dist;\n" -"					closestPnt = pntReturn;\n" -"					hitNormalWorld = planeEqn;\n" -"					\n" -"				}\n" -"			} else\n" -"			{\n" -"				float4 tmp = spherePos-out;\n" -"				float l2 = dot(tmp,tmp);\n" -"				if (l2<radius*radius)\n" -"				{\n" -"					dist  = sqrt(l2);\n" -"					if (dist>minDist)\n" -"					{\n" -"						minDist = dist;\n" -"						closestPnt = out;\n" -"						hitNormalWorld = tmp/dist;\n" -"						\n" -"					}\n" -"					\n" -"				} else\n" -"				{\n" -"					bCollide = false;\n" -"					break;\n" -"				}\n" -"			}\n" -"		} else\n" -"		{\n" -"			if ( dist > minDist )\n" -"			{\n" -"				minDist = dist;\n" -"				closestPnt = pntReturn;\n" -"				hitNormalWorld.xyz = planeEqn.xyz;\n" -"			}\n" -"		}\n" -"		\n" -"	}\n" -"	\n" -"	if (bCollide && minDist > -10000)\n" -"	{\n" -"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" -"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" -"		\n" -"		float actualDepth = minDist-radius;\n" -"		if (actualDepth<=0.f)\n" -"		{\n" -"			\n" -"			pOnB1.w = actualDepth;\n" -"			int dstIdx;\n" -"			AppendInc( nGlobalContactsOut, dstIdx );\n" -"		\n" -"			\n" -"			if (1)//dstIdx < maxContactCapacity)\n" -"			{\n" -"				__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"				c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"				c->m_batchIdx = pairIndex;\n" -"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"				c->m_worldPosB[0] = pOnB1;\n" -"				c->m_childIndexA = -1;\n" -"				c->m_childIndexB = -1;\n" -"				GET_NPOINTS(*c) = 1;\n" -"			} \n" -"		}\n" -"	}//if (hasCollision)\n" -"}\n" -"							\n" -"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" -"{\n" -"	if( nPoints == 0 )\n" -"        return 0;\n" -"    \n" -"    if (nPoints <=4)\n" -"        return nPoints;\n" -"    \n" -"    \n" -"    if (nPoints >64)\n" -"        nPoints = 64;\n" -"    \n" -"	float4 center = make_float4(0.f);\n" -"	{\n" -"		\n" -"		for (int i=0;i<nPoints;i++)\n" -"			center += p[i];\n" -"		center /= (float)nPoints;\n" -"	}\n" -"    \n" -"	\n" -"    \n" -"	//	sample 4 directions\n" -"    \n" -"    float4 aVector = p[0] - center;\n" -"    float4 u = cross3( nearNormal, aVector );\n" -"    float4 v = cross3( nearNormal, u );\n" -"    u = normalize3( u );\n" -"    v = normalize3( v );\n" -"    \n" -"    \n" -"    //keep point with deepest penetration\n" -"    float minW= FLT_MAX;\n" -"    \n" -"    int minIndex=-1;\n" -"    \n" -"    float4 maxDots;\n" -"    maxDots.x = FLT_MIN;\n" -"    maxDots.y = FLT_MIN;\n" -"    maxDots.z = FLT_MIN;\n" -"    maxDots.w = FLT_MIN;\n" -"    \n" -"    //	idx, distance\n" -"    for(int ie = 0; ie<nPoints; ie++ )\n" -"    {\n" -"        if (p[ie].w<minW)\n" -"        {\n" -"            minW = p[ie].w;\n" -"            minIndex=ie;\n" -"        }\n" -"        float f;\n" -"        float4 r = p[ie]-center;\n" -"        f = dot3F4( u, r );\n" -"        if (f<maxDots.x)\n" -"        {\n" -"            maxDots.x = f;\n" -"            contactIdx[0].x = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -u, r );\n" -"        if (f<maxDots.y)\n" -"        {\n" -"            maxDots.y = f;\n" -"            contactIdx[0].y = ie;\n" -"        }\n" -"        \n" -"        \n" -"        f = dot3F4( v, r );\n" -"        if (f<maxDots.z)\n" -"        {\n" -"            maxDots.z = f;\n" -"            contactIdx[0].z = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -v, r );\n" -"        if (f<maxDots.w)\n" -"        {\n" -"            maxDots.w = f;\n" -"            contactIdx[0].w = ie;\n" -"        }\n" -"        \n" -"    }\n" -"    \n" -"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" -"    {\n" -"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" -"        contactIdx[0].x = minIndex;\n" -"    }\n" -"    \n" -"    return 4;\n" -"    \n" -"}\n" -"#define MAX_PLANE_CONVEX_POINTS 64\n" -"int computeContactPlaneConvex(int pairIndex,\n" -"								int bodyIndexA, int bodyIndexB, \n" -"								int collidableIndexA, int collidableIndexB, \n" -"								__global const BodyData* rigidBodies, \n" -"								__global const btCollidableGpu*collidables,\n" -"								__global const ConvexPolyhedronCL* convexShapes,\n" -"								__global const float4* convexVertices,\n" -"								__global const int* convexIndices,\n" -"								__global const btGpuFace* faces,\n" -"								__global struct b3Contact4Data* restrict globalContactsOut,\n" -"								counter32_t nGlobalContactsOut,\n" -"								int maxContactCapacity,\n" -"								float4 posB,\n" -"								Quaternion ornB\n" -"								)\n" -"{\n" -"	int resultIndex=-1;\n" -"		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" -"	__global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" -"	\n" -"	float4 posA;\n" -"	posA = rigidBodies[bodyIndexA].m_pos;\n" -"	Quaternion ornA;\n" -"	ornA = rigidBodies[bodyIndexA].m_quat;\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"	float4 planeEq;\n" -"	 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" -"	float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" -"	float4 planeNormalWorld;\n" -"	planeNormalWorld = qtRotate(ornA,planeNormal);\n" -"	float planeConstant = planeEq.w;\n" -"	\n" -"	float4 invPosA;Quaternion invOrnA;\n" -"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" -"	{\n" -"		\n" -"		trInverse(posA,ornA,&invPosA,&invOrnA);\n" -"		trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -"	}\n" -"	float4 invPosB;Quaternion invOrnB;\n" -"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" -"	{\n" -"		\n" -"		trInverse(posB,ornB,&invPosB,&invOrnB);\n" -"		trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1);	\n" -"	}\n" -"	\n" -"	float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" -"	float maxDot = -1e30;\n" -"	int hitVertex=-1;\n" -"	float4 hitVtx;\n" -"	float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" -"	int numPoints = 0;\n" -"	int4 contactIdx;\n" -"	contactIdx=make_int4(0,1,2,3);\n" -"    \n" -"	\n" -"	for (int i=0;i<hullB->m_numVertices;i++)\n" -"	{\n" -"		float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" -"		float curDot = dot(vtx,planeNormalInConvex);\n" -"		if (curDot>maxDot)\n" -"		{\n" -"			hitVertex=i;\n" -"			maxDot=curDot;\n" -"			hitVtx = vtx;\n" -"			//make sure the deepest points is always included\n" -"			if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" -"				numPoints--;\n" -"		}\n" -"		if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" -"		{\n" -"			float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" -"			float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" -"			float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" -"			if (dist<0.f)\n" -"			{\n" -"				vtxWorld.w = dist;\n" -"				contactPoints[numPoints] = vtxWorld;\n" -"				numPoints++;\n" -"			}\n" -"		}\n" -"	}\n" -"	int numReducedPoints  = numPoints;\n" -"	if (numPoints>4)\n" -"	{\n" -"		numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" -"	}\n" -"	if (numReducedPoints>0)\n" -"	{\n" -"		int dstIdx;\n" -"	    AppendInc( nGlobalContactsOut, dstIdx );\n" -"		if (dstIdx < maxContactCapacity)\n" -"		{\n" -"			resultIndex = dstIdx;\n" -"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"			c->m_worldNormalOnB = -planeNormalWorld;\n" -"			//c->setFrictionCoeff(0.7);\n" -"			//c->setRestituitionCoeff(0.f);\n" -"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"			c->m_batchIdx = pairIndex;\n" -"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"			c->m_childIndexA = -1;\n" -"			c->m_childIndexB = -1;\n" -"			switch (numReducedPoints)\n" -"            {\n" -"                case 4:\n" -"                    c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" -"                case 3:\n" -"                    c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" -"                case 2:\n" -"                    c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" -"                case 1:\n" -"                    c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" -"                default:\n" -"                {\n" -"                }\n" -"            };\n" -"			\n" -"			GET_NPOINTS(*c) = numReducedPoints;\n" -"		}//if (dstIdx < numPairs)\n" -"	}	\n" -"	return resultIndex;\n" -"}\n" -"void	computeContactPlaneSphere(int pairIndex,\n" -"																int bodyIndexA, int bodyIndexB, \n" -"																int collidableIndexA, int collidableIndexB, \n" -"																__global const BodyData* rigidBodies, \n" -"																__global const btCollidableGpu* collidables,\n" -"																__global const btGpuFace* faces,\n" -"																__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																counter32_t nGlobalContactsOut,\n" -"																int maxContactCapacity)\n" -"{\n" -"	float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" -"	float radius = collidables[collidableIndexB].m_radius;\n" -"	float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" -"	float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" -"	float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" -"	float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" -"	\n" -"	bool hasCollision = false;\n" -"	float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" -"	float planeConstant = planeEq.w;\n" -"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" -"	{\n" -"		float4 invPosA;Quaternion invOrnA;\n" -"		trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" -"		trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -"	}\n" -"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" -"	{\n" -"		float4 invPosB;Quaternion invOrnB;\n" -"		trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" -"		trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1);	\n" -"	}\n" -"	float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" -"	float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -"	float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" -"	hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" -"	if (hasCollision)\n" -"	{\n" -"		float4 vtxInPlaneProjected1 = vtxInPlane1 -   distance*planeNormal1;\n" -"		float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" -"		float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" -"		float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" -"		pOnB1.w = distance;\n" -"		int dstIdx;\n" -"    AppendInc( nGlobalContactsOut, dstIdx );\n" -"		\n" -"		if (dstIdx < maxContactCapacity)\n" -"		{\n" -"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"			c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"			c->m_batchIdx = pairIndex;\n" -"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"			c->m_worldPosB[0] = pOnB1;\n" -"			c->m_childIndexA = -1;\n" -"			c->m_childIndexB = -1;\n" -"			GET_NPOINTS(*c) = 1;\n" -"		}//if (dstIdx < numPairs)\n" -"	}//if (hasCollision)\n" -"}\n" -"__kernel void   primitiveContactsKernel( __global int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int numPairs, int maxContactCapacity)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"			\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" -"		{\n" -"			float4 posB;\n" -"			posB = rigidBodies[bodyIndexB].m_pos;\n" -"			Quaternion ornB;\n" -"			ornB = rigidBodies[bodyIndexB].m_quat;\n" -"			int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" -"			if (contactIndex>=0)\n" -"				pairs[pairIndex].z = contactIndex;\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" -"		{\n" -"			float4 posA;\n" -"			posA = rigidBodies[bodyIndexA].m_pos;\n" -"			Quaternion ornA;\n" -"			ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -"			if (contactIndex>=0)\n" -"				pairs[pairIndex].z = contactIndex;\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"			computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"																rigidBodies,collidables,faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" -"		{\n" -"			computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" -"																rigidBodies,collidables,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -"			return;\n" -"		}\n" -"		\n" -"	\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" -"		{\n" -"		\n" -"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" -"			float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" -"			float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" -"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"																spherePos,sphereRadius,convexPos,convexOrn);\n" -"			return;\n" -"		}\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"		\n" -"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" -"			float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" -"			float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"																spherePos,sphereRadius,convexPos,convexOrn);\n" -"			return;\n" -"		}\n" -"	\n" -"	\n" -"	\n" -"		\n" -"	\n" -"	\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"			//sphere-sphere\n" -"			float radiusA = collidables[collidableIndexA].m_radius;\n" -"			float radiusB = collidables[collidableIndexB].m_radius;\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			float4 diff = posA-posB;\n" -"			float len = length(diff);\n" -"			\n" -"			///iff distance positive, don't generate a new contact\n" -"			if ( len <= (radiusA+radiusB))\n" -"			{\n" -"				///distance (negative means penetration)\n" -"				float dist = len - (radiusA+radiusB);\n" -"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" -"				if (len > 0.00001)\n" -"				{\n" -"					normalOnSurfaceB = diff / len;\n" -"				}\n" -"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" -"				contactPosB.w = dist;\n" -"								\n" -"				int dstIdx;\n" -"				 AppendInc( nGlobalContactsOut, dstIdx );\n" -"				\n" -"				if (dstIdx < maxContactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = normalOnSurfaceB;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_worldPosB[0] = contactPosB;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = -1;\n" -"					GET_NPOINTS(*c) = 1;\n" -"				}//if (dstIdx < numPairs)\n" -"			}//if ( len <= (radiusA+radiusB))\n" -"			return;\n" -"		}//SHAPE_SPHERE SHAPE_SPHERE\n" -"	}//	if (i<numPairs)\n" -"}\n" -"// work-in-progress\n" -"__kernel void   processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" -"													__global const BodyData* rigidBodies, \n" -"													__global const btCollidableGpu* collidables,\n" -"													__global const ConvexPolyhedronCL* convexShapes, \n" -"													__global const float4* vertices,\n" -"													__global const float4* uniqueEdges,\n" -"													__global const btGpuFace* faces,\n" -"													__global const int* indices,\n" -"													__global btAabbCL* aabbs,\n" -"													__global const btGpuChildShape* gpuChildShapes,\n" -"													__global struct b3Contact4Data* restrict globalContactsOut,\n" -"													counter32_t nGlobalContactsOut,\n" -"													int numCompoundPairs, int maxContactCapacity\n" -"													)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i<numCompoundPairs)\n" -"	{\n" -"		int bodyIndexA = gpuCompoundPairs[i].x;\n" -"		int bodyIndexB = gpuCompoundPairs[i].y;\n" -"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" -"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" -"		\n" -"		int collidableIndexA = -1;\n" -"		int collidableIndexB = -1;\n" -"		\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		\n" -"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							\n" -"		if (childShapeIndexA >= 0)\n" -"		{\n" -"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"			float4 newOrnA = qtMul(ornA,childOrnA);\n" -"			posA = newPosA;\n" -"			ornA = newOrnA;\n" -"		} else\n" -"		{\n" -"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		}\n" -"		\n" -"		if (childShapeIndexB>=0)\n" -"		{\n" -"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"		} else\n" -"		{\n" -"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" -"		}\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	\n" -"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" -"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"		int pairIndex = i;\n" -"		if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" -"		{\n" -"			computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB,  collidableIndexA,collidableIndexB, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" -"			return;\n" -"		}\n" -"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" -"		{\n" -"			computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" -"																rigidBodies,collidables,convexShapes,vertices,indices,\n" -"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -"			return;\n" -"		}\n" -"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" -"		{\n" -"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" -"			float4 convexPos = posA;\n" -"			float4 convexOrn = ornA;\n" -"			\n" -"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" -"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"										spherePos,sphereRadius,convexPos,convexOrn);\n" -"	\n" -"			return;\n" -"		}\n" -"		if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" -"		{\n" -"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" -"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" -"			float4 convexPos = posB;\n" -"			float4 convexOrn = ornB;\n" -"			\n" -"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"										spherePos,sphereRadius,convexPos,convexOrn);\n" -"	\n" -"			return;\n" -"		}\n" -"	}//	if (i<numCompoundPairs)\n" -"}\n" -"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" -"{\n" -"	const float4* p1 = &vertices[0];\n" -"	const float4* p2 = &vertices[1];\n" -"	const float4* p3 = &vertices[2];\n" -"	float4 edge1;	edge1 = (*p2 - *p1);\n" -"	float4 edge2;	edge2 = ( *p3 - *p2 );\n" -"	float4 edge3;	edge3 = ( *p1 - *p3 );\n" -"	\n" -"	float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" -"	float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" -"	float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" -"	float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" -"	float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" -"	float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" -"	\n" -"	\n" -"	float r1, r2, r3;\n" -"	r1 = dot(edge1_normal,p1_to_p );\n" -"	r2 = dot(edge2_normal,p2_to_p );\n" -"	r3 = dot(edge3_normal,p3_to_p );\n" -"	\n" -"	if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" -"		return true;\n" -"    if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" -"		return true;\n" -"	return false;\n" -"}\n" -"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" -"{\n" -"	float4 diff = p - from;\n" -"	float4 v = to - from;\n" -"	float t = dot(v,diff);\n" -"	\n" -"	if (t > 0) \n" -"	{\n" -"		float dotVV = dot(v,v);\n" -"		if (t < dotVV) \n" -"		{\n" -"			t /= dotVV;\n" -"			diff -= t*v;\n" -"		} else \n" -"		{\n" -"			t = 1;\n" -"			diff -= v;\n" -"		}\n" -"	} else\n" -"	{\n" -"		t = 0;\n" -"	}\n" -"	*nearest = from + t*v;\n" -"	return dot(diff,diff);	\n" -"}\n" -"void	computeContactSphereTriangle(int pairIndex,\n" -"									int bodyIndexA, int bodyIndexB,\n" -"									int collidableIndexA, int collidableIndexB, \n" -"									__global const BodyData* rigidBodies, \n" -"									__global const btCollidableGpu* collidables,\n" -"									const float4* triangleVertices,\n" -"									__global struct b3Contact4Data* restrict globalContactsOut,\n" -"									counter32_t nGlobalContactsOut,\n" -"									int maxContactCapacity,\n" -"									float4 spherePos2,\n" -"									float radius,\n" -"									float4 pos,\n" -"									float4 quat,\n" -"									int faceIndex\n" -"									)\n" -"{\n" -"	float4 invPos;\n" -"	float4 invOrn;\n" -"	trInverse(pos,quat, &invPos,&invOrn);\n" -"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -"	int numFaces = 3;\n" -"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" -"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" -"	float minDist = -1000000.f;\n" -"	bool bCollide = false;\n" -"	\n" -"	//////////////////////////////////////\n" -"	float4 sphereCenter;\n" -"	sphereCenter = spherePos;\n" -"	const float4* vertices = triangleVertices;\n" -"	float contactBreakingThreshold = 0.f;//todo?\n" -"	float radiusWithThreshold = radius + contactBreakingThreshold;\n" -"	float4 edge10;\n" -"	edge10 = vertices[1]-vertices[0];\n" -"	edge10.w = 0.f;//is this needed?\n" -"	float4 edge20;\n" -"	edge20 = vertices[2]-vertices[0];\n" -"	edge20.w = 0.f;//is this needed?\n" -"	float4 normal = cross3(edge10,edge20);\n" -"	normal = normalize(normal);\n" -"	float4 p1ToCenter;\n" -"	p1ToCenter = sphereCenter - vertices[0];\n" -"	\n" -"	float distanceFromPlane = dot(p1ToCenter,normal);\n" -"	if (distanceFromPlane < 0.f)\n" -"	{\n" -"		//triangle facing the other way\n" -"		distanceFromPlane *= -1.f;\n" -"		normal *= -1.f;\n" -"	}\n" -"	hitNormalWorld = normal;\n" -"	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" -"	\n" -"	// Check for contact / intersection\n" -"	bool hasContact = false;\n" -"	float4 contactPoint;\n" -"	if (isInsideContactPlane) \n" -"	{\n" -"	\n" -"		if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" -"		{\n" -"			// Inside the contact wedge - touches a point on the shell plane\n" -"			hasContact = true;\n" -"			contactPoint = sphereCenter - normal*distanceFromPlane;\n" -"			\n" -"		} else {\n" -"			// Could be inside one of the contact capsules\n" -"			float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" -"			float4 nearestOnEdge;\n" -"			int numEdges = 3;\n" -"			for (int i = 0; i < numEdges; i++) \n" -"			{\n" -"				float4 pa =vertices[i];\n" -"				float4 pb = vertices[(i+1)%3];\n" -"				float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" -"				if (distanceSqr < contactCapsuleRadiusSqr) \n" -"				{\n" -"					// Yep, we're inside a capsule\n" -"					hasContact = true;\n" -"					contactPoint = nearestOnEdge;\n" -"					\n" -"				}\n" -"				\n" -"			}\n" -"		}\n" -"	}\n" -"	if (hasContact) \n" -"	{\n" -"		closestPnt = contactPoint;\n" -"		float4 contactToCenter = sphereCenter - contactPoint;\n" -"		minDist = length(contactToCenter);\n" -"		if (minDist>FLT_EPSILON)\n" -"		{\n" -"			hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" -"			bCollide  = true;\n" -"		}\n" -"		\n" -"	}\n" -"	/////////////////////////////////////\n" -"	if (bCollide && minDist > -10000)\n" -"	{\n" -"		\n" -"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" -"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" -"		float actualDepth = minDist-radius;\n" -"		\n" -"		if (actualDepth<=0.f)\n" -"		{\n" -"			pOnB1.w = actualDepth;\n" -"			int dstIdx;\n" -"			\n" -"			float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" -"			if (lenSqr>FLT_EPSILON)\n" -"			{\n" -"				AppendInc( nGlobalContactsOut, dstIdx );\n" -"			\n" -"				if (dstIdx < maxContactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -"					c->m_worldPosB[0] = pOnB1;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = faceIndex;\n" -"					GET_NPOINTS(*c) = 1;\n" -"				} \n" -"			}\n" -"		}\n" -"	}//if (hasCollision)\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSphereContactsKernel( __global int4* concavePairs,\n" -"												__global const BodyData* rigidBodies,\n" -"												__global const btCollidableGpu* collidables,\n" -"												__global const ConvexPolyhedronCL* convexShapes, \n" -"												__global const float4* vertices,\n" -"												__global const float4* uniqueEdges,\n" -"												__global const btGpuFace* faces,\n" -"												__global const int* indices,\n" -"												__global btAabbCL* aabbs,\n" -"												__global struct b3Contact4Data* restrict globalContactsOut,\n" -"												counter32_t nGlobalContactsOut,\n" -"													int numConcavePairs, int maxContactCapacity\n" -"												)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"	int pairIdx = i;\n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" -"	{\n" -"		int f = concavePairs[i].z;\n" -"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"		\n" -"		float4 verticesA[3];\n" -"		for (int i=0;i<3;i++)\n" -"		{\n" -"			int index = indices[face.m_indexOffset+i];\n" -"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"			verticesA[i] = vert;\n" -"		}\n" -"		float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -"		float sphereRadius = collidables[collidableIndexB].m_radius;\n" -"		float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" -"		float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -"		computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" -"																rigidBodies,collidables,\n" -"																verticesA,\n" -"																globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -"																spherePos,sphereRadius,convexPos,convexOrn, f);\n" -"		return;\n" -"	}\n" -"}\n" -; +static const char* primitiveContactsKernelsCL = +	"#ifndef B3_CONTACT4DATA_H\n" +	"#define B3_CONTACT4DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"typedef  struct b3Contact4Data b3Contact4Data_t;\n" +	"struct b3Contact4Data\n" +	"{\n" +	"	b3Float4	m_worldPosB[4];\n" +	"//	b3Float4	m_localPosA[4];\n" +	"//	b3Float4	m_localPosB[4];\n" +	"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" +	"	unsigned short  m_restituitionCoeffCmp;\n" +	"	unsigned short  m_frictionCoeffCmp;\n" +	"	int m_batchIdx;\n" +	"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +	"	int m_bodyBPtrAndSignBit;\n" +	"	int	m_childIndexA;\n" +	"	int	m_childIndexB;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +	"{\n" +	"	return (int)contact->m_worldNormalOnB.w;\n" +	"};\n" +	"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +	"{\n" +	"	contact->m_worldNormalOnB.w = (float)numPoints;\n" +	"};\n" +	"#endif //B3_CONTACT4DATA_H\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_PLANE 4\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define SHAPE_SPHERE 7\n" +	"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +	"#ifdef cl_ext_atomic_counters_32\n" +	"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +	"#else\n" +	"#define counter32_t volatile __global int*\n" +	"#endif\n" +	"#define GET_GROUP_IDX get_group_id(0)\n" +	"#define GET_LOCAL_IDX get_local_id(0)\n" +	"#define GET_GLOBAL_IDX get_global_id(0)\n" +	"#define GET_GROUP_SIZE get_local_size(0)\n" +	"#define GET_NUM_GROUPS get_num_groups(0)\n" +	"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +	"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +	"#define AtomInc(x) atom_inc(&(x))\n" +	"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +	"#define AppendInc(x, out) out = atomic_inc(x)\n" +	"#define AtomAdd(x, value) atom_add(&(x), value)\n" +	"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +	"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +	"#define max2 max\n" +	"#define min2 min\n" +	"typedef unsigned int u32;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	int m_numChildShapes;\n" +	"	float m_radius;\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;	\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct  \n" +	"{\n" +	"	float4		m_localCenter;\n" +	"	float4		m_extents;\n" +	"	float4		mC;\n" +	"	float4		mE;\n" +	"	\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"} ConvexPolyhedronCL;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"} btGpuFace;\n" +	"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +	"#define make_float4 (float4)\n" +	"#define make_float2 (float2)\n" +	"#define make_uint4 (uint4)\n" +	"#define make_int4 (int4)\n" +	"#define make_uint2 (uint2)\n" +	"#define make_int2 (int2)\n" +	"__inline\n" +	"float fastDiv(float numerator, float denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"//	return numerator/denominator;	\n" +	"}\n" +	"__inline\n" +	"float4 fastDiv4(float4 numerator, float4 denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"}\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"}\n" +	"//#define dot3F4 dot\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtInvert(orientationIn);\n" +	"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" +	"}\n" +	"void	trMul(float4 translationA, Quaternion orientationA,\n" +	"						float4 translationB, Quaternion orientationB,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtMul(orientationA,orientationB);\n" +	"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" +	"{\n" +	"	return make_float4(	a.x + (b.x - a.x) * t,\n" +	"						a.y + (b.y - a.y) * t,\n" +	"						a.z + (b.z - a.z) * t,\n" +	"						0.f);\n" +	"}\n" +	"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" +	"{\n" +	"	float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" +	"	float dist = dot3F4(n, point) + planeEqn.w;\n" +	"	*closestPointOnFace = point - dist * n;\n" +	"	return dist;\n" +	"}\n" +	"inline bool IsPointInPolygon(float4 p, \n" +	"							const btGpuFace* face,\n" +	"							__global const float4* baseVertex,\n" +	"							__global const  int* convexIndices,\n" +	"							float4* out)\n" +	"{\n" +	"    float4 a;\n" +	"    float4 b;\n" +	"    float4 ab;\n" +	"    float4 ap;\n" +	"    float4 v;\n" +	"	float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" +	"	\n" +	"	if (face->m_numIndices<2)\n" +	"		return false;\n" +	"	\n" +	"	float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" +	"	\n" +	"	b = v0;\n" +	"    for(unsigned i=0; i != face->m_numIndices; ++i)\n" +	"    {\n" +	"		a = b;\n" +	"		float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" +	"		b = vi;\n" +	"        ab = b-a;\n" +	"        ap = p-a;\n" +	"        v = cross3(ab,plane);\n" +	"        if (dot(ap, v) > 0.f)\n" +	"        {\n" +	"            float ab_m2 = dot(ab, ab);\n" +	"            float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" +	"            if (rt <= 0.f)\n" +	"            {\n" +	"                *out = a;\n" +	"            }\n" +	"            else if (rt >= 1.f) \n" +	"            {\n" +	"                *out = b;\n" +	"            }\n" +	"            else\n" +	"            {\n" +	"            	float s = 1.f - rt;\n" +	"				out[0].x = s * a.x + rt * b.x;\n" +	"				out[0].y = s * a.y + rt * b.y;\n" +	"				out[0].z = s * a.z + rt * b.z;\n" +	"            }\n" +	"            return false;\n" +	"        }\n" +	"    }\n" +	"    return true;\n" +	"}\n" +	"void	computeContactSphereConvex(int pairIndex,\n" +	"																int bodyIndexA, int bodyIndexB, \n" +	"																int collidableIndexA, int collidableIndexB, \n" +	"																__global const BodyData* rigidBodies, \n" +	"																__global const btCollidableGpu* collidables,\n" +	"																__global const ConvexPolyhedronCL* convexShapes,\n" +	"																__global const float4* convexVertices,\n" +	"																__global const int* convexIndices,\n" +	"																__global const btGpuFace* faces,\n" +	"																__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																counter32_t nGlobalContactsOut,\n" +	"																int maxContactCapacity,\n" +	"																float4 spherePos2,\n" +	"																float radius,\n" +	"																float4 pos,\n" +	"																float4 quat\n" +	"																)\n" +	"{\n" +	"	float4 invPos;\n" +	"	float4 invOrn;\n" +	"	trInverse(pos,quat, &invPos,&invOrn);\n" +	"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" +	"	int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" +	"	int numFaces = convexShapes[shapeIndex].m_numFaces;\n" +	"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" +	"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" +	"	float minDist = -1000000.f;\n" +	"	bool bCollide = true;\n" +	"	for ( int f = 0; f < numFaces; f++ )\n" +	"	{\n" +	"		btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" +	"		// set up a plane equation \n" +	"		float4 planeEqn;\n" +	"		float4 n1 = face.m_plane;\n" +	"		n1.w = 0.f;\n" +	"		planeEqn = n1;\n" +	"		planeEqn.w = face.m_plane.w;\n" +	"		\n" +	"	\n" +	"		// compute a signed distance from the vertex in cloth to the face of rigidbody.\n" +	"		float4 pntReturn;\n" +	"		float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" +	"		// If the distance is positive, the plane is a separating plane. \n" +	"		if ( dist > radius )\n" +	"		{\n" +	"			bCollide = false;\n" +	"			break;\n" +	"		}\n" +	"		if (dist>0)\n" +	"		{\n" +	"			//might hit an edge or vertex\n" +	"			float4 out;\n" +	"			float4 zeroPos = make_float4(0,0,0,0);\n" +	"			bool isInPoly = IsPointInPolygon(spherePos,\n" +	"					&face,\n" +	"					&convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" +	"					convexIndices,\n" +	"           &out);\n" +	"			if (isInPoly)\n" +	"			{\n" +	"				if (dist>minDist)\n" +	"				{\n" +	"					minDist = dist;\n" +	"					closestPnt = pntReturn;\n" +	"					hitNormalWorld = planeEqn;\n" +	"					\n" +	"				}\n" +	"			} else\n" +	"			{\n" +	"				float4 tmp = spherePos-out;\n" +	"				float l2 = dot(tmp,tmp);\n" +	"				if (l2<radius*radius)\n" +	"				{\n" +	"					dist  = sqrt(l2);\n" +	"					if (dist>minDist)\n" +	"					{\n" +	"						minDist = dist;\n" +	"						closestPnt = out;\n" +	"						hitNormalWorld = tmp/dist;\n" +	"						\n" +	"					}\n" +	"					\n" +	"				} else\n" +	"				{\n" +	"					bCollide = false;\n" +	"					break;\n" +	"				}\n" +	"			}\n" +	"		} else\n" +	"		{\n" +	"			if ( dist > minDist )\n" +	"			{\n" +	"				minDist = dist;\n" +	"				closestPnt = pntReturn;\n" +	"				hitNormalWorld.xyz = planeEqn.xyz;\n" +	"			}\n" +	"		}\n" +	"		\n" +	"	}\n" +	"	\n" +	"	if (bCollide && minDist > -10000)\n" +	"	{\n" +	"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" +	"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" +	"		\n" +	"		float actualDepth = minDist-radius;\n" +	"		if (actualDepth<=0.f)\n" +	"		{\n" +	"			\n" +	"			pOnB1.w = actualDepth;\n" +	"			int dstIdx;\n" +	"			AppendInc( nGlobalContactsOut, dstIdx );\n" +	"		\n" +	"			\n" +	"			if (1)//dstIdx < maxContactCapacity)\n" +	"			{\n" +	"				__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"				c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +	"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"				c->m_batchIdx = pairIndex;\n" +	"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"				c->m_worldPosB[0] = pOnB1;\n" +	"				c->m_childIndexA = -1;\n" +	"				c->m_childIndexB = -1;\n" +	"				GET_NPOINTS(*c) = 1;\n" +	"			} \n" +	"		}\n" +	"	}//if (hasCollision)\n" +	"}\n" +	"							\n" +	"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" +	"{\n" +	"	if( nPoints == 0 )\n" +	"        return 0;\n" +	"    \n" +	"    if (nPoints <=4)\n" +	"        return nPoints;\n" +	"    \n" +	"    \n" +	"    if (nPoints >64)\n" +	"        nPoints = 64;\n" +	"    \n" +	"	float4 center = make_float4(0.f);\n" +	"	{\n" +	"		\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"			center += p[i];\n" +	"		center /= (float)nPoints;\n" +	"	}\n" +	"    \n" +	"	\n" +	"    \n" +	"	//	sample 4 directions\n" +	"    \n" +	"    float4 aVector = p[0] - center;\n" +	"    float4 u = cross3( nearNormal, aVector );\n" +	"    float4 v = cross3( nearNormal, u );\n" +	"    u = normalize3( u );\n" +	"    v = normalize3( v );\n" +	"    \n" +	"    \n" +	"    //keep point with deepest penetration\n" +	"    float minW= FLT_MAX;\n" +	"    \n" +	"    int minIndex=-1;\n" +	"    \n" +	"    float4 maxDots;\n" +	"    maxDots.x = FLT_MIN;\n" +	"    maxDots.y = FLT_MIN;\n" +	"    maxDots.z = FLT_MIN;\n" +	"    maxDots.w = FLT_MIN;\n" +	"    \n" +	"    //	idx, distance\n" +	"    for(int ie = 0; ie<nPoints; ie++ )\n" +	"    {\n" +	"        if (p[ie].w<minW)\n" +	"        {\n" +	"            minW = p[ie].w;\n" +	"            minIndex=ie;\n" +	"        }\n" +	"        float f;\n" +	"        float4 r = p[ie]-center;\n" +	"        f = dot3F4( u, r );\n" +	"        if (f<maxDots.x)\n" +	"        {\n" +	"            maxDots.x = f;\n" +	"            contactIdx[0].x = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -u, r );\n" +	"        if (f<maxDots.y)\n" +	"        {\n" +	"            maxDots.y = f;\n" +	"            contactIdx[0].y = ie;\n" +	"        }\n" +	"        \n" +	"        \n" +	"        f = dot3F4( v, r );\n" +	"        if (f<maxDots.z)\n" +	"        {\n" +	"            maxDots.z = f;\n" +	"            contactIdx[0].z = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -v, r );\n" +	"        if (f<maxDots.w)\n" +	"        {\n" +	"            maxDots.w = f;\n" +	"            contactIdx[0].w = ie;\n" +	"        }\n" +	"        \n" +	"    }\n" +	"    \n" +	"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" +	"    {\n" +	"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" +	"        contactIdx[0].x = minIndex;\n" +	"    }\n" +	"    \n" +	"    return 4;\n" +	"    \n" +	"}\n" +	"#define MAX_PLANE_CONVEX_POINTS 64\n" +	"int computeContactPlaneConvex(int pairIndex,\n" +	"								int bodyIndexA, int bodyIndexB, \n" +	"								int collidableIndexA, int collidableIndexB, \n" +	"								__global const BodyData* rigidBodies, \n" +	"								__global const btCollidableGpu*collidables,\n" +	"								__global const ConvexPolyhedronCL* convexShapes,\n" +	"								__global const float4* convexVertices,\n" +	"								__global const int* convexIndices,\n" +	"								__global const btGpuFace* faces,\n" +	"								__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"								counter32_t nGlobalContactsOut,\n" +	"								int maxContactCapacity,\n" +	"								float4 posB,\n" +	"								Quaternion ornB\n" +	"								)\n" +	"{\n" +	"	int resultIndex=-1;\n" +	"		int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" +	"	__global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" +	"	\n" +	"	float4 posA;\n" +	"	posA = rigidBodies[bodyIndexA].m_pos;\n" +	"	Quaternion ornA;\n" +	"	ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"	float4 planeEq;\n" +	"	 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +	"	float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +	"	float4 planeNormalWorld;\n" +	"	planeNormalWorld = qtRotate(ornA,planeNormal);\n" +	"	float planeConstant = planeEq.w;\n" +	"	\n" +	"	float4 invPosA;Quaternion invOrnA;\n" +	"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +	"	{\n" +	"		\n" +	"		trInverse(posA,ornA,&invPosA,&invOrnA);\n" +	"		trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +	"	}\n" +	"	float4 invPosB;Quaternion invOrnB;\n" +	"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" +	"	{\n" +	"		\n" +	"		trInverse(posB,ornB,&invPosB,&invOrnB);\n" +	"		trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1);	\n" +	"	}\n" +	"	\n" +	"	float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" +	"	float maxDot = -1e30;\n" +	"	int hitVertex=-1;\n" +	"	float4 hitVtx;\n" +	"	float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" +	"	int numPoints = 0;\n" +	"	int4 contactIdx;\n" +	"	contactIdx=make_int4(0,1,2,3);\n" +	"    \n" +	"	\n" +	"	for (int i=0;i<hullB->m_numVertices;i++)\n" +	"	{\n" +	"		float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" +	"		float curDot = dot(vtx,planeNormalInConvex);\n" +	"		if (curDot>maxDot)\n" +	"		{\n" +	"			hitVertex=i;\n" +	"			maxDot=curDot;\n" +	"			hitVtx = vtx;\n" +	"			//make sure the deepest points is always included\n" +	"			if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" +	"				numPoints--;\n" +	"		}\n" +	"		if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" +	"		{\n" +	"			float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" +	"			float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" +	"			float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" +	"			if (dist<0.f)\n" +	"			{\n" +	"				vtxWorld.w = dist;\n" +	"				contactPoints[numPoints] = vtxWorld;\n" +	"				numPoints++;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	int numReducedPoints  = numPoints;\n" +	"	if (numPoints>4)\n" +	"	{\n" +	"		numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" +	"	}\n" +	"	if (numReducedPoints>0)\n" +	"	{\n" +	"		int dstIdx;\n" +	"	    AppendInc( nGlobalContactsOut, dstIdx );\n" +	"		if (dstIdx < maxContactCapacity)\n" +	"		{\n" +	"			resultIndex = dstIdx;\n" +	"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"			c->m_worldNormalOnB = -planeNormalWorld;\n" +	"			//c->setFrictionCoeff(0.7);\n" +	"			//c->setRestituitionCoeff(0.f);\n" +	"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"			c->m_batchIdx = pairIndex;\n" +	"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"			c->m_childIndexA = -1;\n" +	"			c->m_childIndexB = -1;\n" +	"			switch (numReducedPoints)\n" +	"            {\n" +	"                case 4:\n" +	"                    c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" +	"                case 3:\n" +	"                    c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" +	"                case 2:\n" +	"                    c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" +	"                case 1:\n" +	"                    c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" +	"                default:\n" +	"                {\n" +	"                }\n" +	"            };\n" +	"			\n" +	"			GET_NPOINTS(*c) = numReducedPoints;\n" +	"		}//if (dstIdx < numPairs)\n" +	"	}	\n" +	"	return resultIndex;\n" +	"}\n" +	"void	computeContactPlaneSphere(int pairIndex,\n" +	"																int bodyIndexA, int bodyIndexB, \n" +	"																int collidableIndexA, int collidableIndexB, \n" +	"																__global const BodyData* rigidBodies, \n" +	"																__global const btCollidableGpu* collidables,\n" +	"																__global const btGpuFace* faces,\n" +	"																__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																counter32_t nGlobalContactsOut,\n" +	"																int maxContactCapacity)\n" +	"{\n" +	"	float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +	"	float radius = collidables[collidableIndexB].m_radius;\n" +	"	float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" +	"	float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" +	"	float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" +	"	float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" +	"	\n" +	"	bool hasCollision = false;\n" +	"	float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +	"	float planeConstant = planeEq.w;\n" +	"	float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +	"	{\n" +	"		float4 invPosA;Quaternion invOrnA;\n" +	"		trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" +	"		trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +	"	}\n" +	"	float4 planeInConvexPos1;	Quaternion planeInConvexOrn1;\n" +	"	{\n" +	"		float4 invPosB;Quaternion invOrnB;\n" +	"		trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" +	"		trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1);	\n" +	"	}\n" +	"	float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" +	"	float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +	"	float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" +	"	hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" +	"	if (hasCollision)\n" +	"	{\n" +	"		float4 vtxInPlaneProjected1 = vtxInPlane1 -   distance*planeNormal1;\n" +	"		float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" +	"		float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" +	"		float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" +	"		pOnB1.w = distance;\n" +	"		int dstIdx;\n" +	"    AppendInc( nGlobalContactsOut, dstIdx );\n" +	"		\n" +	"		if (dstIdx < maxContactCapacity)\n" +	"		{\n" +	"			__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"			c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +	"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"			c->m_batchIdx = pairIndex;\n" +	"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"			c->m_worldPosB[0] = pOnB1;\n" +	"			c->m_childIndexA = -1;\n" +	"			c->m_childIndexB = -1;\n" +	"			GET_NPOINTS(*c) = 1;\n" +	"		}//if (dstIdx < numPairs)\n" +	"	}//if (hasCollision)\n" +	"}\n" +	"__kernel void   primitiveContactsKernel( __global int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int numPairs, int maxContactCapacity)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" +	"		{\n" +	"			float4 posB;\n" +	"			posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			Quaternion ornB;\n" +	"			ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"			int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" +	"			if (contactIndex>=0)\n" +	"				pairs[pairIndex].z = contactIndex;\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" +	"		{\n" +	"			float4 posA;\n" +	"			posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			Quaternion ornA;\n" +	"			ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" +	"			if (contactIndex>=0)\n" +	"				pairs[pairIndex].z = contactIndex;\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"			computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"																rigidBodies,collidables,faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" +	"		{\n" +	"			computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" +	"																rigidBodies,collidables,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"	\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" +	"		{\n" +	"		\n" +	"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" +	"			float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" +	"			float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"																spherePos,sphereRadius,convexPos,convexOrn);\n" +	"			return;\n" +	"		}\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"		\n" +	"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" +	"			float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" +	"			float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"																spherePos,sphereRadius,convexPos,convexOrn);\n" +	"			return;\n" +	"		}\n" +	"	\n" +	"	\n" +	"	\n" +	"		\n" +	"	\n" +	"	\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"			//sphere-sphere\n" +	"			float radiusA = collidables[collidableIndexA].m_radius;\n" +	"			float radiusB = collidables[collidableIndexB].m_radius;\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			float4 diff = posA-posB;\n" +	"			float len = length(diff);\n" +	"			\n" +	"			///iff distance positive, don't generate a new contact\n" +	"			if ( len <= (radiusA+radiusB))\n" +	"			{\n" +	"				///distance (negative means penetration)\n" +	"				float dist = len - (radiusA+radiusB);\n" +	"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" +	"				if (len > 0.00001)\n" +	"				{\n" +	"					normalOnSurfaceB = diff / len;\n" +	"				}\n" +	"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +	"				contactPosB.w = dist;\n" +	"								\n" +	"				int dstIdx;\n" +	"				 AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				\n" +	"				if (dstIdx < maxContactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = normalOnSurfaceB;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_worldPosB[0] = contactPosB;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = -1;\n" +	"					GET_NPOINTS(*c) = 1;\n" +	"				}//if (dstIdx < numPairs)\n" +	"			}//if ( len <= (radiusA+radiusB))\n" +	"			return;\n" +	"		}//SHAPE_SPHERE SHAPE_SPHERE\n" +	"	}//	if (i<numPairs)\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" +	"													__global const BodyData* rigidBodies, \n" +	"													__global const btCollidableGpu* collidables,\n" +	"													__global const ConvexPolyhedronCL* convexShapes, \n" +	"													__global const float4* vertices,\n" +	"													__global const float4* uniqueEdges,\n" +	"													__global const btGpuFace* faces,\n" +	"													__global const int* indices,\n" +	"													__global btAabbCL* aabbs,\n" +	"													__global const btGpuChildShape* gpuChildShapes,\n" +	"													__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"													counter32_t nGlobalContactsOut,\n" +	"													int numCompoundPairs, int maxContactCapacity\n" +	"													)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i<numCompoundPairs)\n" +	"	{\n" +	"		int bodyIndexA = gpuCompoundPairs[i].x;\n" +	"		int bodyIndexB = gpuCompoundPairs[i].y;\n" +	"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" +	"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" +	"		\n" +	"		int collidableIndexA = -1;\n" +	"		int collidableIndexB = -1;\n" +	"		\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		\n" +	"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							\n" +	"		if (childShapeIndexA >= 0)\n" +	"		{\n" +	"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"			float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"			posA = newPosA;\n" +	"			ornA = newOrnA;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		}\n" +	"		\n" +	"		if (childShapeIndexB>=0)\n" +	"		{\n" +	"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" +	"		}\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	\n" +	"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" +	"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"		int pairIndex = i;\n" +	"		if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB,  collidableIndexA,collidableIndexB, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" +	"			return;\n" +	"		}\n" +	"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" +	"		{\n" +	"			computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA,  collidableIndexB,collidableIndexA, \n" +	"																rigidBodies,collidables,convexShapes,vertices,indices,\n" +	"																faces,	globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" +	"			return;\n" +	"		}\n" +	"		if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" +	"		{\n" +	"			float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexB].m_radius;\n" +	"			float4 convexPos = posA;\n" +	"			float4 convexOrn = ornA;\n" +	"			\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" +	"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"										spherePos,sphereRadius,convexPos,convexOrn);\n" +	"	\n" +	"			return;\n" +	"		}\n" +	"		if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" +	"			float sphereRadius = collidables[collidableIndexA].m_radius;\n" +	"			float4 convexPos = posB;\n" +	"			float4 convexOrn = ornB;\n" +	"			\n" +	"			computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +	"										rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"										spherePos,sphereRadius,convexPos,convexOrn);\n" +	"	\n" +	"			return;\n" +	"		}\n" +	"	}//	if (i<numCompoundPairs)\n" +	"}\n" +	"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" +	"{\n" +	"	const float4* p1 = &vertices[0];\n" +	"	const float4* p2 = &vertices[1];\n" +	"	const float4* p3 = &vertices[2];\n" +	"	float4 edge1;	edge1 = (*p2 - *p1);\n" +	"	float4 edge2;	edge2 = ( *p3 - *p2 );\n" +	"	float4 edge3;	edge3 = ( *p1 - *p3 );\n" +	"	\n" +	"	float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" +	"	float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" +	"	float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" +	"	float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" +	"	float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" +	"	float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" +	"	\n" +	"	\n" +	"	float r1, r2, r3;\n" +	"	r1 = dot(edge1_normal,p1_to_p );\n" +	"	r2 = dot(edge2_normal,p2_to_p );\n" +	"	r3 = dot(edge3_normal,p3_to_p );\n" +	"	\n" +	"	if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" +	"		return true;\n" +	"    if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" +	"		return true;\n" +	"	return false;\n" +	"}\n" +	"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" +	"{\n" +	"	float4 diff = p - from;\n" +	"	float4 v = to - from;\n" +	"	float t = dot(v,diff);\n" +	"	\n" +	"	if (t > 0) \n" +	"	{\n" +	"		float dotVV = dot(v,v);\n" +	"		if (t < dotVV) \n" +	"		{\n" +	"			t /= dotVV;\n" +	"			diff -= t*v;\n" +	"		} else \n" +	"		{\n" +	"			t = 1;\n" +	"			diff -= v;\n" +	"		}\n" +	"	} else\n" +	"	{\n" +	"		t = 0;\n" +	"	}\n" +	"	*nearest = from + t*v;\n" +	"	return dot(diff,diff);	\n" +	"}\n" +	"void	computeContactSphereTriangle(int pairIndex,\n" +	"									int bodyIndexA, int bodyIndexB,\n" +	"									int collidableIndexA, int collidableIndexB, \n" +	"									__global const BodyData* rigidBodies, \n" +	"									__global const btCollidableGpu* collidables,\n" +	"									const float4* triangleVertices,\n" +	"									__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"									counter32_t nGlobalContactsOut,\n" +	"									int maxContactCapacity,\n" +	"									float4 spherePos2,\n" +	"									float radius,\n" +	"									float4 pos,\n" +	"									float4 quat,\n" +	"									int faceIndex\n" +	"									)\n" +	"{\n" +	"	float4 invPos;\n" +	"	float4 invOrn;\n" +	"	trInverse(pos,quat, &invPos,&invOrn);\n" +	"	float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" +	"	int numFaces = 3;\n" +	"	float4 closestPnt = (float4)(0, 0, 0, 0);\n" +	"	float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" +	"	float minDist = -1000000.f;\n" +	"	bool bCollide = false;\n" +	"	\n" +	"	//////////////////////////////////////\n" +	"	float4 sphereCenter;\n" +	"	sphereCenter = spherePos;\n" +	"	const float4* vertices = triangleVertices;\n" +	"	float contactBreakingThreshold = 0.f;//todo?\n" +	"	float radiusWithThreshold = radius + contactBreakingThreshold;\n" +	"	float4 edge10;\n" +	"	edge10 = vertices[1]-vertices[0];\n" +	"	edge10.w = 0.f;//is this needed?\n" +	"	float4 edge20;\n" +	"	edge20 = vertices[2]-vertices[0];\n" +	"	edge20.w = 0.f;//is this needed?\n" +	"	float4 normal = cross3(edge10,edge20);\n" +	"	normal = normalize(normal);\n" +	"	float4 p1ToCenter;\n" +	"	p1ToCenter = sphereCenter - vertices[0];\n" +	"	\n" +	"	float distanceFromPlane = dot(p1ToCenter,normal);\n" +	"	if (distanceFromPlane < 0.f)\n" +	"	{\n" +	"		//triangle facing the other way\n" +	"		distanceFromPlane *= -1.f;\n" +	"		normal *= -1.f;\n" +	"	}\n" +	"	hitNormalWorld = normal;\n" +	"	bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" +	"	\n" +	"	// Check for contact / intersection\n" +	"	bool hasContact = false;\n" +	"	float4 contactPoint;\n" +	"	if (isInsideContactPlane) \n" +	"	{\n" +	"	\n" +	"		if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" +	"		{\n" +	"			// Inside the contact wedge - touches a point on the shell plane\n" +	"			hasContact = true;\n" +	"			contactPoint = sphereCenter - normal*distanceFromPlane;\n" +	"			\n" +	"		} else {\n" +	"			// Could be inside one of the contact capsules\n" +	"			float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" +	"			float4 nearestOnEdge;\n" +	"			int numEdges = 3;\n" +	"			for (int i = 0; i < numEdges; i++) \n" +	"			{\n" +	"				float4 pa =vertices[i];\n" +	"				float4 pb = vertices[(i+1)%3];\n" +	"				float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" +	"				if (distanceSqr < contactCapsuleRadiusSqr) \n" +	"				{\n" +	"					// Yep, we're inside a capsule\n" +	"					hasContact = true;\n" +	"					contactPoint = nearestOnEdge;\n" +	"					\n" +	"				}\n" +	"				\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if (hasContact) \n" +	"	{\n" +	"		closestPnt = contactPoint;\n" +	"		float4 contactToCenter = sphereCenter - contactPoint;\n" +	"		minDist = length(contactToCenter);\n" +	"		if (minDist>FLT_EPSILON)\n" +	"		{\n" +	"			hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" +	"			bCollide  = true;\n" +	"		}\n" +	"		\n" +	"	}\n" +	"	/////////////////////////////////////\n" +	"	if (bCollide && minDist > -10000)\n" +	"	{\n" +	"		\n" +	"		float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" +	"		float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" +	"		float actualDepth = minDist-radius;\n" +	"		\n" +	"		if (actualDepth<=0.f)\n" +	"		{\n" +	"			pOnB1.w = actualDepth;\n" +	"			int dstIdx;\n" +	"			\n" +	"			float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" +	"			if (lenSqr>FLT_EPSILON)\n" +	"			{\n" +	"				AppendInc( nGlobalContactsOut, dstIdx );\n" +	"			\n" +	"				if (dstIdx < maxContactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +	"					c->m_worldPosB[0] = pOnB1;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = faceIndex;\n" +	"					GET_NPOINTS(*c) = 1;\n" +	"				} \n" +	"			}\n" +	"		}\n" +	"	}//if (hasCollision)\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSphereContactsKernel( __global int4* concavePairs,\n" +	"												__global const BodyData* rigidBodies,\n" +	"												__global const btCollidableGpu* collidables,\n" +	"												__global const ConvexPolyhedronCL* convexShapes, \n" +	"												__global const float4* vertices,\n" +	"												__global const float4* uniqueEdges,\n" +	"												__global const btGpuFace* faces,\n" +	"												__global const int* indices,\n" +	"												__global btAabbCL* aabbs,\n" +	"												__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"												counter32_t nGlobalContactsOut,\n" +	"													int numConcavePairs, int maxContactCapacity\n" +	"												)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"	int pairIdx = i;\n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" +	"	{\n" +	"		int f = concavePairs[i].z;\n" +	"		btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"		\n" +	"		float4 verticesA[3];\n" +	"		for (int i=0;i<3;i++)\n" +	"		{\n" +	"			int index = indices[face.m_indexOffset+i];\n" +	"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"			verticesA[i] = vert;\n" +	"		}\n" +	"		float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +	"		float sphereRadius = collidables[collidableIndexB].m_radius;\n" +	"		float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" +	"		float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" +	"		computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" +	"																rigidBodies,collidables,\n" +	"																verticesA,\n" +	"																globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +	"																spherePos,sphereRadius,convexPos,convexOrn, f);\n" +	"		return;\n" +	"	}\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h index f0ecfc7851..907809d8bd 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -1,2099 +1,2098 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satClipKernelsCL= \ -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define max2 max\n" -"#define min2 min\n" -"typedef unsigned int u32;\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef  struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -"	b3Float4	m_worldPosB[4];\n" -"//	b3Float4	m_localPosA[4];\n" -"//	b3Float4	m_localPosB[4];\n" -"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" -"	unsigned short  m_restituitionCoeffCmp;\n" -"	unsigned short  m_frictionCoeffCmp;\n" -"	int m_batchIdx;\n" -"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -"	int m_bodyBPtrAndSignBit;\n" -"	int	m_childIndexA;\n" -"	int	m_childIndexB;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -"	return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -"	contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" -"#define B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"typedef struct b3GpuFace b3GpuFace_t;\n" -"struct b3GpuFace\n" -"{\n" -"	b3Float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"	int m_unusedPadding1;\n" -"	int m_unusedPadding2;\n" -"};\n" -"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" -"struct b3ConvexPolyhedronData\n" -"{\n" -"	b3Float4		m_localCenter;\n" -"	b3Float4		m_extents;\n" -"	b3Float4		mC;\n" -"	b3Float4		mE;\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"};\n" -"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -"	SHAPE_HEIGHT_FIELD=1,\n" -"	SHAPE_CONVEX_HULL=3,\n" -"	SHAPE_PLANE=4,\n" -"	SHAPE_CONCAVE_TRIMESH=5,\n" -"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -"	SHAPE_SPHERE=7,\n" -"	MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -"	b3Float4	m_childPosition;\n" -"	b3Quat		m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -"	int m_bodyIndexA;\n" -"	int m_bodyIndexB;\n" -"//	int	m_pairType;\n" -"	int m_childShapeIndexA;\n" -"	int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -"	b3Float4				m_pos;\n" -"	b3Quat					m_quat;\n" -"	b3Float4				m_linVel;\n" -"	b3Float4				m_angVel;\n" -"	int 					m_collidableIdx;\n" -"	float 				m_invMass;\n" -"	float 				m_restituitionCoeff;\n" -"	float 				m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -"	b3Mat3x3 m_invInertiaWorld;\n" -"	b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -"	\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"//	return numerator/denominator;	\n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -"	return native_divide(numerator, denominator);	\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"}\n" -"//#define dot3F4 dot\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" -"{\n" -"	return make_float4(	a.x + (b.x - a.x) * t,\n" -"						a.y + (b.y - a.y) * t,\n" -"						a.z + (b.z - a.z) * t,\n" -"						0.f);\n" -"}\n" -"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" -"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" -"{\n" -"	\n" -"	int ve;\n" -"	float ds, de;\n" -"	int numVertsOut = 0;\n" -"    //double-check next test\n" -"    	if (numVertsIn < 2)\n" -"    		return 0;\n" -"    \n" -"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" -"	float4 endVertex = pVtxIn[0];\n" -"	\n" -"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -"    \n" -"	for (ve = 0; ve < numVertsIn; ve++)\n" -"	{\n" -"		endVertex=pVtxIn[ve];\n" -"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -"		if (ds<0)\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start < 0, end < 0, so output endVertex\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"			else\n" -"			{\n" -"				// Start < 0, end >= 0, so output intersection\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"			}\n" -"		}\n" -"		else\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start >= 0, end < 0 so output intersection and end\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"		}\n" -"		firstVertex = endVertex;\n" -"		ds = de;\n" -"	}\n" -"	return numVertsOut;\n" -"}\n" -"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" -"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" -"{\n" -"	\n" -"	int ve;\n" -"	float ds, de;\n" -"	int numVertsOut = 0;\n" -"//double-check next test\n" -"	if (numVertsIn < 2)\n" -"		return 0;\n" -"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" -"	float4 endVertex = pVtxIn[0];\n" -"	\n" -"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -"	for (ve = 0; ve < numVertsIn; ve++)\n" -"	{\n" -"		endVertex=pVtxIn[ve];\n" -"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -"		if (ds<0)\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start < 0, end < 0, so output endVertex\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"			else\n" -"			{\n" -"				// Start < 0, end >= 0, so output intersection\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"			}\n" -"		}\n" -"		else\n" -"		{\n" -"			if (de<0)\n" -"			{\n" -"				// Start >= 0, end < 0 so output intersection and end\n" -"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -"				ppVtxOut[numVertsOut++] = endVertex;\n" -"			}\n" -"		}\n" -"		firstVertex = endVertex;\n" -"		ds = de;\n" -"	}\n" -"	return numVertsOut;\n" -"}\n" -"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,  \n" -"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" -"	float4* worldVertsB2, int capacityWorldVertsB2,\n" -"	const float minDist, float maxDist,\n" -"	__global const float4* vertices,\n" -"	__global const b3GpuFace_t* faces,\n" -"	__global const int* indices,\n" -"	float4* contactsOut,\n" -"	int contactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	float4* pVtxIn = worldVertsB1;\n" -"	float4* pVtxOut = worldVertsB2;\n" -"	\n" -"	int numVertsIn = numWorldVertsB1;\n" -"	int numVertsOut = 0;\n" -"	int closestFaceA=-1;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"				faces[hullA->m_faceOffset+face].m_plane.x, \n" -"				faces[hullA->m_faceOffset+face].m_plane.y, \n" -"				faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"		\n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	if (closestFaceA<0)\n" -"		return numContactsOut;\n" -"	b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" -"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"	int numVerticesA = polyA.m_numIndices;\n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"		const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" -"		const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" -"		const float4 edge0 = a - b;\n" -"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" -"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" -"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"		float4 worldA1 = transform(&a,&posA,&ornA);\n" -"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"		\n" -"		float4 planeNormalWS = planeNormalWS1;\n" -"		float planeEqWS=planeEqWS1;\n" -"		\n" -"		//clip face\n" -"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" -"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" -"		//btSwap(pVtxIn,pVtxOut);\n" -"		float4* tmp = pVtxOut;\n" -"		pVtxOut = pVtxIn;\n" -"		pVtxIn = tmp;\n" -"		numVertsIn = numVertsOut;\n" -"		numVertsOut = 0;\n" -"	}\n" -"	\n" -"	// only keep points that are behind the witness face\n" -"	{\n" -"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float localPlaneEq = polyA.m_plane.w;\n" -"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" -"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" -"		for (int i=0;i<numVertsIn;i++)\n" -"		{\n" -"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"			if (depth <=minDist)\n" -"			{\n" -"				depth = minDist;\n" -"			}\n" -"			if (depth <=maxDist)\n" -"			{\n" -"				float4 pointInWorld = pVtxIn[i];\n" -"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" -"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"			}\n" -"		}\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,  \n" -"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" -"	float4* worldVertsB2, int capacityWorldVertsB2,\n" -"	const float minDist, float maxDist,\n" -"	const float4* verticesA,\n" -"	const b3GpuFace_t* facesA,\n" -"	const int* indicesA,\n" -"	__global const float4* verticesB,\n" -"	__global const b3GpuFace_t* facesB,\n" -"	__global const int* indicesB,\n" -"	float4* contactsOut,\n" -"	int contactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	float4* pVtxIn = worldVertsB1;\n" -"	float4* pVtxOut = worldVertsB2;\n" -"	\n" -"	int numVertsIn = numWorldVertsB1;\n" -"	int numVertsOut = 0;\n" -"	int closestFaceA=-1;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"				facesA[hullA->m_faceOffset+face].m_plane.x, \n" -"				facesA[hullA->m_faceOffset+face].m_plane.y, \n" -"				facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"		\n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	if (closestFaceA<0)\n" -"		return numContactsOut;\n" -"	b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" -"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"	int numVerticesA = polyA.m_numIndices;\n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"		const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" -"		const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" -"		const float4 edge0 = a - b;\n" -"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" -"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" -"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"		float4 worldA1 = transform(&a,&posA,&ornA);\n" -"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"		\n" -"		float4 planeNormalWS = planeNormalWS1;\n" -"		float planeEqWS=planeEqWS1;\n" -"		\n" -"		//clip face\n" -"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" -"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" -"		//btSwap(pVtxIn,pVtxOut);\n" -"		float4* tmp = pVtxOut;\n" -"		pVtxOut = pVtxIn;\n" -"		pVtxIn = tmp;\n" -"		numVertsIn = numVertsOut;\n" -"		numVertsOut = 0;\n" -"	}\n" -"	\n" -"	// only keep points that are behind the witness face\n" -"	{\n" -"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -"		float localPlaneEq = polyA.m_plane.w;\n" -"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" -"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" -"		for (int i=0;i<numVertsIn;i++)\n" -"		{\n" -"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"			if (depth <=minDist)\n" -"			{\n" -"				depth = minDist;\n" -"			}\n" -"			if (depth <=maxDist)\n" -"			{\n" -"				float4 pointInWorld = pVtxIn[i];\n" -"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" -"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"			}\n" -"		}\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"int	clipHullAgainstHull(const float4 separatingNormal,\n" -"	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" -"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" -"	const float minDist, float maxDist,\n" -"	__global const float4* vertices,\n" -"	__global const b3GpuFace_t* faces,\n" -"	__global const int* indices,\n" -"	float4*	localContactsOut,\n" -"	int localContactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"	int closestFaceB=-1;\n" -"	float dmax = -FLT_MAX;\n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" -"				faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	{\n" -"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" -"		const int numVertices = polyB.m_numIndices;\n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" -"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"		}\n" -"	}\n" -"	if (closestFaceB>=0)\n" -"	{\n" -"		numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" -"				posA,ornA,\n" -"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" -"				faces,\n" -"				indices,localContactsOut,localContactCapacity);\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"int	clipHullAgainstHullLocalA(const float4 separatingNormal,\n" -"	const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" -"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" -"	const float minDist, float maxDist,\n" -"	const float4* verticesA,\n" -"	const b3GpuFace_t* facesA,\n" -"	const int* indicesA,\n" -"	__global const float4* verticesB,\n" -"	__global const b3GpuFace_t* facesB,\n" -"	__global const int* indicesB,\n" -"	float4*	localContactsOut,\n" -"	int localContactCapacity)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"	int closestFaceB=-1;\n" -"	float dmax = -FLT_MAX;\n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" -"				facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"	{\n" -"		const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -"		const int numVertices = polyB.m_numIndices;\n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"			const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"		}\n" -"	}\n" -"	if (closestFaceB>=0)\n" -"	{\n" -"		numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" -"				posA,ornA,\n" -"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" -"				verticesA,facesA,indicesA,\n" -"				verticesB,facesB,indicesB,\n" -"				localContactsOut,localContactCapacity);\n" -"	}\n" -"	return numContactsOut;\n" -"}\n" -"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" -"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" -"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" -"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" -"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" -"{\n" -"	if( nPoints == 0 )\n" -"        return 0;\n" -"    \n" -"    if (nPoints <=4)\n" -"        return nPoints;\n" -"    \n" -"    \n" -"    if (nPoints >64)\n" -"        nPoints = 64;\n" -"    \n" -"	float4 center = make_float4(0.f);\n" -"	{\n" -"		\n" -"		for (int i=0;i<nPoints;i++)\n" -"			center += p[i];\n" -"		center /= (float)nPoints;\n" -"	}\n" -"    \n" -"	\n" -"    \n" -"	//	sample 4 directions\n" -"    \n" -"    float4 aVector = p[0] - center;\n" -"    float4 u = cross3( nearNormal, aVector );\n" -"    float4 v = cross3( nearNormal, u );\n" -"    u = normalize3( u );\n" -"    v = normalize3( v );\n" -"    \n" -"    \n" -"    //keep point with deepest penetration\n" -"    float minW= FLT_MAX;\n" -"    \n" -"    int minIndex=-1;\n" -"    \n" -"    float4 maxDots;\n" -"    maxDots.x = FLT_MIN;\n" -"    maxDots.y = FLT_MIN;\n" -"    maxDots.z = FLT_MIN;\n" -"    maxDots.w = FLT_MIN;\n" -"    \n" -"    //	idx, distance\n" -"    for(int ie = 0; ie<nPoints; ie++ )\n" -"    {\n" -"        if (p[ie].w<minW)\n" -"        {\n" -"            minW = p[ie].w;\n" -"            minIndex=ie;\n" -"        }\n" -"        float f;\n" -"        float4 r = p[ie]-center;\n" -"        f = dot3F4( u, r );\n" -"        if (f<maxDots.x)\n" -"        {\n" -"            maxDots.x = f;\n" -"            contactIdx[0].x = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -u, r );\n" -"        if (f<maxDots.y)\n" -"        {\n" -"            maxDots.y = f;\n" -"            contactIdx[0].y = ie;\n" -"        }\n" -"        \n" -"        \n" -"        f = dot3F4( v, r );\n" -"        if (f<maxDots.z)\n" -"        {\n" -"            maxDots.z = f;\n" -"            contactIdx[0].z = ie;\n" -"        }\n" -"        \n" -"        f = dot3F4( -v, r );\n" -"        if (f<maxDots.w)\n" -"        {\n" -"            maxDots.w = f;\n" -"            contactIdx[0].w = ie;\n" -"        }\n" -"        \n" -"    }\n" -"    \n" -"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" -"    {\n" -"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" -"        contactIdx[0].x = minIndex;\n" -"    }\n" -"    \n" -"    return 4;\n" -"    \n" -"}\n" -"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" -"{\n" -"    contactIdx[0] = 0;\n" -"    contactIdx[1] = 1;\n" -"    contactIdx[2] = 2;\n" -"    contactIdx[3] = 3;\n" -"    \n" -"	if( nPoints == 0 ) return 0;\n" -"    \n" -"	nPoints = min2( nPoints, 4 );\n" -"    return nPoints;\n" -"    \n" -"}\n" -"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" -"{\n" -"	if( nPoints == 0 ) return 0;\n" -"	nPoints = min2( nPoints, 64 );\n" -"	float4 center = make_float4(0.f);\n" -"	{\n" -"		float4 v[64];\n" -"		for (int i=0;i<nPoints;i++)\n" -"			v[i] = p[i];\n" -"		//memcpy( v, p, nPoints*sizeof(float4) );\n" -"		PARALLEL_SUM( v, nPoints );\n" -"		center = v[0]/(float)nPoints;\n" -"	}\n" -"	\n" -"	{	//	sample 4 directions\n" -"		if( nPoints < 4 )\n" -"		{\n" -"			for(int i=0; i<nPoints; i++) \n" -"				contactIdx[i] = i;\n" -"			return nPoints;\n" -"		}\n" -"		float4 aVector = p[0] - center;\n" -"		float4 u = cross3( nearNormal, aVector );\n" -"		float4 v = cross3( nearNormal, u );\n" -"		u = normalize3( u );\n" -"		v = normalize3( v );\n" -"		int idx[4];\n" -"		float2 max00 = make_float2(0,FLT_MAX);\n" -"		{\n" -"			//	idx, distance\n" -"			{\n" -"				{\n" -"					int4 a[64];\n" -"					for(int ie = 0; ie<nPoints; ie++ )\n" -"					{\n" -"						\n" -"						\n" -"						float f;\n" -"						float4 r = p[ie]-center;\n" -"						f = dot3F4( u, r );\n" -"						a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"						f = dot3F4( -u, r );\n" -"						a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"						f = dot3F4( v, r );\n" -"						a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"						f = dot3F4( -v, r );\n" -"						a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -"					}\n" -"					for(int ie=0; ie<nPoints; ie++)\n" -"					{\n" -"						a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" -"						a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" -"						a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" -"						a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" -"					}\n" -"					idx[0] = (int)a[0].x & 0xff;\n" -"					idx[1] = (int)a[0].y & 0xff;\n" -"					idx[2] = (int)a[0].z & 0xff;\n" -"					idx[3] = (int)a[0].w & 0xff;\n" -"				}\n" -"			}\n" -"			{\n" -"				float2 h[64];\n" -"				PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" -"				REDUCE_MIN( h, nPoints );\n" -"				max00 = h[0];\n" -"			}\n" -"		}\n" -"		contactIdx[0] = idx[0];\n" -"		contactIdx[1] = idx[1];\n" -"		contactIdx[2] = idx[2];\n" -"		contactIdx[3] = idx[3];\n" -"		return 4;\n" -"	}\n" -"}\n" -"__kernel void   extractManifoldAndAddContactKernel(__global const int4* pairs, \n" -"																	__global const b3RigidBodyData_t* rigidBodies, \n" -"																	__global const float4* closestPointsWorld,\n" -"																	__global const float4* separatingNormalsWorld,\n" -"																	__global const int* contactCounts,\n" -"																	__global const int* contactOffsets,\n" -"																	__global struct b3Contact4Data* restrict contactsOut,\n" -"																	counter32_t nContactsOut,\n" -"																	int contactCapacity,\n" -"																	int numPairs,\n" -"																	int pairIndex\n" -"																	)\n" -"{\n" -"	int idx = get_global_id(0);\n" -"	\n" -"	if (idx<numPairs)\n" -"	{\n" -"		float4 normal = separatingNormalsWorld[idx];\n" -"		int nPoints = contactCounts[idx];\n" -"		__global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" -"		float4 localPoints[64];\n" -"		for (int i=0;i<nPoints;i++)\n" -"		{\n" -"			localPoints[i] = pointsIn[i];\n" -"		}\n" -"		int contactIdx[4];// = {-1,-1,-1,-1};\n" -"		contactIdx[0] = -1;\n" -"		contactIdx[1] = -1;\n" -"		contactIdx[2] = -1;\n" -"		contactIdx[3] = -1;\n" -"		int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" -"		int dstIdx;\n" -"		AppendInc( nContactsOut, dstIdx );\n" -"		if (dstIdx<contactCapacity)\n" -"		{\n" -"			__global struct b3Contact4Data* c = contactsOut + dstIdx;\n" -"			c->m_worldNormalOnB = -normal;\n" -"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"			c->m_batchIdx = idx;\n" -"			int bodyA = pairs[pairIndex].x;\n" -"			int bodyB = pairs[pairIndex].y;\n" -"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" -"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" -"			c->m_childIndexA = -1;\n" -"			c->m_childIndexB = -1;\n" -"			for (int i=0;i<nContacts;i++)\n" -"			{\n" -"				c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" -"			}\n" -"			GET_NPOINTS(*c) = nContacts;\n" -"		}\n" -"	}\n" -"}\n" -"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtInvert(orientationIn);\n" -"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"void	trMul(float4 translationA, Quaternion orientationA,\n" -"						float4 translationB, Quaternion orientationB,\n" -"		float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -"	*orientationOut = qtMul(orientationA,orientationB);\n" -"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" -"}\n" -"__kernel void   clipHullHullKernel( __global int4* pairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const b3GpuFace_t* faces,\n" -"																					__global const int* indices,\n" -"																					__global const float4* separatingNormals,\n" -"																					__global const int* hasSeparatingAxis,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int numPairs,\n" -"																					int contactCapacity)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"			\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"			\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"		\n" -"			int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" -"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -"														rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" -"													  rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" -"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -"														minDist, maxDist,\n" -"														vertices,faces,indices,\n" -"														localContactsOut,localContactCapacity);\n" -"												\n" -"		if (numLocalContactsOut>0)\n" -"		{\n" -"				float4 normal = -separatingNormals[i];\n" -"				int nPoints = numLocalContactsOut;\n" -"				float4* pointsIn = localContactsOut;\n" -"				int contactIdx[4];// = {-1,-1,-1,-1};\n" -"				contactIdx[0] = -1;\n" -"				contactIdx[1] = -1;\n" -"				contactIdx[2] = -1;\n" -"				contactIdx[3] = -1;\n" -"		\n" -"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -"		\n" -"				\n" -"				int mprContactIndex = pairs[pairIndex].z;\n" -"				int dstIdx = mprContactIndex;\n" -"				if (dstIdx<0)\n" -"				{\n" -"					AppendInc( nGlobalContactsOut, dstIdx );\n" -"				}\n" -"				if (dstIdx<contactCapacity)\n" -"				{\n" -"					pairs[pairIndex].z = dstIdx;\n" -"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -"					c->m_worldNormalOnB = -normal;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = -1;\n" -"					for (int i=0;i<nReducedContacts;i++)\n" -"					{\n" -"					//this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" -"						if (i>0||(mprContactIndex<0))\n" -"						{\n" -"							c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -"						}\n" -"					}\n" -"					GET_NPOINTS(*c) = nReducedContacts;\n" -"				}\n" -"				\n" -"			}//		if (numContactsOut>0)\n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"}\n" -"__kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const b3GpuFace_t* faces,\n" -"																					__global const int* indices,\n" -"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" -"																					__global const float4* gpuCompoundSepNormalsOut,\n" -"																					__global const int* gpuHasCompoundSepNormalsOut,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int numCompoundPairs, int maxContactCapacity)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numCompoundPairs)\n" -"	{\n" -"		if (gpuHasCompoundSepNormalsOut[i])\n" -"		{\n" -"			int bodyIndexA = gpuCompoundPairs[i].x;\n" -"			int bodyIndexB = gpuCompoundPairs[i].y;\n" -"			\n" -"			int childShapeIndexA = gpuCompoundPairs[i].z;\n" -"			int childShapeIndexB = gpuCompoundPairs[i].w;\n" -"			\n" -"			int collidableIndexA = -1;\n" -"			int collidableIndexB = -1;\n" -"			\n" -"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			\n" -"			float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"								\n" -"			if (childShapeIndexA >= 0)\n" -"			{\n" -"				collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"				float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"				float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"				float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"				float4 newOrnA = qtMul(ornA,childOrnA);\n" -"				posA = newPosA;\n" -"				ornA = newOrnA;\n" -"			} else\n" -"			{\n" -"				collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			}\n" -"			\n" -"			if (childShapeIndexB>=0)\n" -"			{\n" -"				collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"				float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"				float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"				float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"				float4 newOrnB = qtMul(ornB,childOrnB);\n" -"				posB = newPosB;\n" -"				ornB = newOrnB;\n" -"			} else\n" -"			{\n" -"				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" -"			}\n" -"			\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"			int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" -"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -"														posA,ornA,\n" -"													  posB,ornB,\n" -"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -"														minDist, maxDist,\n" -"														vertices,faces,indices,\n" -"														localContactsOut,localContactCapacity);\n" -"												\n" -"		if (numLocalContactsOut>0)\n" -"		{\n" -"				float4 normal = -gpuCompoundSepNormalsOut[i];\n" -"				int nPoints = numLocalContactsOut;\n" -"				float4* pointsIn = localContactsOut;\n" -"				int contactIdx[4];// = {-1,-1,-1,-1};\n" -"				contactIdx[0] = -1;\n" -"				contactIdx[1] = -1;\n" -"				contactIdx[2] = -1;\n" -"				contactIdx[3] = -1;\n" -"		\n" -"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -"		\n" -"				int dstIdx;\n" -"				AppendInc( nGlobalContactsOut, dstIdx );\n" -"				if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -"					c->m_worldNormalOnB = -normal;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = gpuCompoundPairs[pairIndex].x;\n" -"					int bodyB = gpuCompoundPairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_childIndexA = childShapeIndexA;\n" -"					c->m_childIndexB = childShapeIndexB;\n" -"					for (int i=0;i<nReducedContacts;i++)\n" -"					{\n" -"						c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -"					}\n" -"					GET_NPOINTS(*c) = nReducedContacts;\n" -"				}\n" -"				\n" -"			}//		if (numContactsOut>0)\n" -"		}//		if (gpuHasCompoundSepNormalsOut[i])\n" -"	}//	if (i<numCompoundPairs)\n" -"}\n" -"__kernel void   sphereSphereCollisionKernel( __global const int4* pairs, \n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const float4* separatingNormals,\n" -"																					__global const int* hasSeparatingAxis,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int contactCapacity,\n" -"																					int numPairs)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"			\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -"		{\n" -"			//sphere-sphere\n" -"			float radiusA = collidables[collidableIndexA].m_radius;\n" -"			float radiusB = collidables[collidableIndexB].m_radius;\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			float4 diff = posA-posB;\n" -"			float len = length(diff);\n" -"			\n" -"			///iff distance positive, don't generate a new contact\n" -"			if ( len <= (radiusA+radiusB))\n" -"			{\n" -"				///distance (negative means penetration)\n" -"				float dist = len - (radiusA+radiusB);\n" -"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" -"				if (len > 0.00001)\n" -"				{\n" -"					normalOnSurfaceB = diff / len;\n" -"				}\n" -"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" -"				contactPosB.w = dist;\n" -"								\n" -"				int dstIdx;\n" -"				AppendInc( nGlobalContactsOut, dstIdx );\n" -"				if (dstIdx < contactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = -normalOnSurfaceB;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"					c->m_worldPosB[0] = contactPosB;\n" -"					c->m_childIndexA = -1;\n" -"					c->m_childIndexB = -1;\n" -"					GET_NPOINTS(*c) = 1;\n" -"				}//if (dstIdx < numPairs)\n" -"			}//if ( len <= (radiusA+radiusB))\n" -"		}//SHAPE_SPHERE SHAPE_SPHERE\n" -"	}//if (i<numPairs)\n" -"}				\n" -"__kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" -"																					__global const b3RigidBodyData_t* rigidBodies, \n" -"																					__global const b3Collidable_t* collidables,\n" -"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const b3GpuFace_t* faces,\n" -"																					__global const int* indices,\n" -"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" -"																					__global const float4* separatingNormals,\n" -"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" -"																					counter32_t nGlobalContactsOut,\n" -"																					int contactCapacity,\n" -"																					int numConcavePairs)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"	float4 worldVertsB1[64];\n" -"	float4 worldVertsB2[64];\n" -"	int capacityWorldVerts = 64;	\n" -"	float4 localContactsOut[64];\n" -"	int localContactCapacity=64;\n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"	if (i<numConcavePairs)\n" -"	{\n" -"		//negative value means that the pair is invalid\n" -"		if (concavePairsIn[i].w<0)\n" -"			return;\n" -"		int bodyIndexA = concavePairsIn[i].x;\n" -"		int bodyIndexB = concavePairsIn[i].y;\n" -"		int f = concavePairsIn[i].z;\n" -"		int childShapeIndexA = f;\n" -"		\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"		///////////////////////////////////////////////////////////////\n" -"		\n" -"	\n" -"		bool overlap = false;\n" -"		\n" -"		b3ConvexPolyhedronData_t convexPolyhedronA;\n" -"	//add 3 vertices of the triangle\n" -"		convexPolyhedronA.m_numVertices = 3;\n" -"		convexPolyhedronA.m_vertexOffset = 0;\n" -"		float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"		b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"		\n" -"		float4 verticesA[3];\n" -"		for (int i=0;i<3;i++)\n" -"		{\n" -"			int index = indices[face.m_indexOffset+i];\n" -"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"			verticesA[i] = vert;\n" -"			localCenter += vert;\n" -"		}\n" -"		float dmin = FLT_MAX;\n" -"		int localCC=0;\n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"                                  \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"                             \n" -"		b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot3F4(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                                            \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot3F4(edgeNormal,v0);\n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                                            \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		float4 sepAxis = separatingNormals[i];\n" -"		\n" -"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"		int childShapeIndexB =-1;\n" -"		if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			///////////////////\n" -"			///compound shape support\n" -"			\n" -"			childShapeIndexB = concavePairsIn[pairIndex].w;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			\n" -"		}\n" -"		\n" -"		////////////////////////////////////////\n" -"		\n" -"		\n" -"		\n" -"		int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" -"														&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"														posA,ornA,\n" -"													  posB,ornB,\n" -"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -"														minDist, maxDist,\n" -"														&verticesA,&facesA,&indicesA,\n" -"														vertices,faces,indices,\n" -"														localContactsOut,localContactCapacity);\n" -"												\n" -"		if (numLocalContactsOut>0)\n" -"		{\n" -"			float4 normal = -separatingNormals[i];\n" -"			int nPoints = numLocalContactsOut;\n" -"			float4* pointsIn = localContactsOut;\n" -"			int contactIdx[4];// = {-1,-1,-1,-1};\n" -"			contactIdx[0] = -1;\n" -"			contactIdx[1] = -1;\n" -"			contactIdx[2] = -1;\n" -"			contactIdx[3] = -1;\n" -"	\n" -"			int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -"	\n" -"			int dstIdx;\n" -"			AppendInc( nGlobalContactsOut, dstIdx );\n" -"			if (dstIdx<contactCapacity)\n" -"			{\n" -"				__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -"				c->m_worldNormalOnB = -normal;\n" -"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"				c->m_batchIdx = pairIndex;\n" -"				int bodyA = concavePairsIn[pairIndex].x;\n" -"				int bodyB = concavePairsIn[pairIndex].y;\n" -"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"				c->m_childIndexA = childShapeIndexA;\n" -"				c->m_childIndexB = childShapeIndexB;\n" -"				for (int i=0;i<nReducedContacts;i++)\n" -"				{\n" -"					c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -"				}\n" -"				GET_NPOINTS(*c) = nReducedContacts;\n" -"			}\n" -"				\n" -"		}//		if (numContactsOut>0)\n" -"	}//	if (i<numPairs)\n" -"}\n" -"int	findClippingFaces(const float4 separatingNormal,\n" -"                      __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" -"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -"                       __global float4* worldVertsA1,\n" -"                      __global float4* worldNormalsA1,\n" -"                      __global float4* worldVertsB1,\n" -"                      int capacityWorldVerts,\n" -"                      const float minDist, float maxDist,\n" -"                      __global const float4* vertices,\n" -"                      __global const b3GpuFace_t* faces,\n" -"                      __global const int* indices,\n" -"                      __global int4* clippingFaces, int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"    \n" -"    \n" -"	int closestFaceB=-1;\n" -"	float dmax = -FLT_MAX;\n" -"    \n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" -"                                              faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"	{\n" -"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" -"		const int numVertices = polyB.m_numIndices;\n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" -"			worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"		}\n" -"	}\n" -"    \n" -"    int closestFaceA=-1;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"                                              faces[hullA->m_faceOffset+face].m_plane.x,\n" -"                                              faces[hullA->m_faceOffset+face].m_plane.y,\n" -"                                              faces[hullA->m_faceOffset+face].m_plane.z,\n" -"                                              0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"            \n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"                worldNormalsA1[pairIndex] = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"    int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"        const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -"        worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -"    }\n" -"    \n" -"    clippingFaces[pairIndex].x = closestFaceA;\n" -"    clippingFaces[pairIndex].y = closestFaceB;\n" -"    clippingFaces[pairIndex].z = numVerticesA;\n" -"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" -"    \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"int clipFaces(__global float4* worldVertsA1,\n" -"              __global float4* worldNormalsA1,\n" -"              __global float4* worldVertsB1,\n" -"              __global float4* worldVertsB2, \n" -"              int capacityWorldVertsB2,\n" -"              const float minDist, float maxDist,\n" -"              __global int4* clippingFaces,\n" -"              int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"    \n" -"    int closestFaceA = clippingFaces[pairIndex].x;\n" -"    int closestFaceB = clippingFaces[pairIndex].y;\n" -"	int numVertsInA = clippingFaces[pairIndex].z;\n" -"	int numVertsInB = clippingFaces[pairIndex].w;\n" -"    \n" -"	int numVertsOut = 0;\n" -"    \n" -"	if (closestFaceA<0)\n" -"		return numContactsOut;\n" -"    \n" -"    __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" -"    __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" -"    \n" -"    \n" -"	\n" -"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"    \n" -"	for(int e0=0;e0<numVertsInA;e0++)\n" -"	{\n" -"		const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" -"		const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" -"		const float4 WorldEdge0 = aw - bw;\n" -"		float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" -"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"		float4 worldA1 = aw;\n" -"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"		float4 planeNormalWS = planeNormalWS1;\n" -"		float planeEqWS=planeEqWS1;\n" -"		numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" -"		__global float4* tmp = pVtxOut;\n" -"		pVtxOut = pVtxIn;\n" -"		pVtxIn = tmp;\n" -"		numVertsInB = numVertsOut;\n" -"		numVertsOut = 0;\n" -"	}\n" -"    \n" -"    //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -"    //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -"    \n" -"    /*for (int i=0;i<numVertsInB;i++)\n" -"    {\n" -"        pVtxOut[i] = pVtxIn[i];\n" -"    }*/\n" -"    \n" -"    \n" -"    \n" -"    \n" -"    //numVertsInB=0;\n" -"	\n" -"    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -"    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -"    for (int i=0;i<numVertsInB;i++)\n" -"    {\n" -"        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"        if (depth <=minDist)\n" -"        {\n" -"            depth = minDist;\n" -"        }\n" -"        \n" -"        if (depth <=maxDist)\n" -"        {\n" -"            float4 pointInWorld = pVtxIn[i];\n" -"            pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"        }\n" -"    }\n" -"   \n" -"    clippingFaces[pairIndex].w =numContactsOut;\n" -"   \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"__kernel void   findClippingFacesKernel(  __global const int4* pairs,\n" -"                                        __global const b3RigidBodyData_t* rigidBodies,\n" -"                                        __global const b3Collidable_t* collidables,\n" -"                                        __global const b3ConvexPolyhedronData_t* convexShapes,\n" -"                                        __global const float4* vertices,\n" -"                                        __global const float4* uniqueEdges,\n" -"                                        __global const b3GpuFace_t* faces,\n" -"                                        __global const int* indices,\n" -"                                        __global const float4* separatingNormals,\n" -"                                        __global const int* hasSeparatingAxis,\n" -"                                        __global int4* clippingFacesOut,\n" -"                                        __global float4* worldVertsA1,\n" -"                                        __global float4* worldNormalsA1,\n" -"                                        __global float4* worldVertsB1,\n" -"                                        int capacityWorldVerts,\n" -"                                        int numPairs\n" -"                                        )\n" -"{\n" -"    \n" -"	int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"    \n" -"	\n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"    \n" -"	if (i<numPairs)\n" -"	{\n" -"        \n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"            \n" -"			int bodyIndexA = pairs[i].x;\n" -"			int bodyIndexB = pairs[i].y;\n" -"			\n" -"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"			\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"            \n" -"            \n" -"			int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" -"                                                        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -"                                                        rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" -"                                                        rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" -"                                                        worldVertsA1,\n" -"                                                        worldNormalsA1,\n" -"                                                        worldVertsB1,capacityWorldVerts,\n" -"                                                        minDist, maxDist,\n" -"                                                        vertices,faces,indices,\n" -"                                                        clippingFacesOut,i);\n" -"            \n" -"            \n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"    \n" -"}\n" -"__kernel void   clipFacesAndFindContactsKernel(    __global const float4* separatingNormals,\n" -"                                                   __global const int* hasSeparatingAxis,\n" -"                                                   __global int4* clippingFacesOut,\n" -"                                                   __global float4* worldVertsA1,\n" -"                                                   __global float4* worldNormalsA1,\n" -"                                                   __global float4* worldVertsB1,\n" -"                                                   __global float4* worldVertsB2,\n" -"                                                    int vertexFaceCapacity,\n" -"                                                   int numPairs,\n" -"					                                        int debugMode\n" -"                                                   )\n" -"{\n" -"    int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"    \n" -"	float minDist = -1e30f;\n" -"	float maxDist = 0.02f;\n" -"    \n" -"	if (i<numPairs)\n" -"	{\n" -"        \n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"            \n" -"//			int bodyIndexA = pairs[i].x;\n" -"	//		int bodyIndexB = pairs[i].y;\n" -"		    \n" -"            int numLocalContactsOut = 0;\n" -"            int capacityWorldVertsB2 = vertexFaceCapacity;\n" -"            \n" -"            __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" -"            __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" -"            \n" -"            {\n" -"                __global int4* clippingFaces = clippingFacesOut;\n" -"            \n" -"                \n" -"                int closestFaceA = clippingFaces[pairIndex].x;\n" -"                int closestFaceB = clippingFaces[pairIndex].y;\n" -"                int numVertsInA = clippingFaces[pairIndex].z;\n" -"                int numVertsInB = clippingFaces[pairIndex].w;\n" -"                \n" -"                int numVertsOut = 0;\n" -"                \n" -"                if (closestFaceA>=0)\n" -"                {\n" -"                    \n" -"                    \n" -"                    \n" -"                    // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -"                    \n" -"                    for(int e0=0;e0<numVertsInA;e0++)\n" -"                    {\n" -"                        const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" -"                        const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" -"                        const float4 WorldEdge0 = aw - bw;\n" -"                        float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" -"                        float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -"                        float4 worldA1 = aw;\n" -"                        float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -"                        float4 planeNormalWS = planeNormalWS1;\n" -"                        float planeEqWS=planeEqWS1;\n" -"                        numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" -"                        __global float4* tmp = pVtxOut;\n" -"                        pVtxOut = pVtxIn;\n" -"                        pVtxIn = tmp;\n" -"                        numVertsInB = numVertsOut;\n" -"                        numVertsOut = 0;\n" -"                    }\n" -"                    \n" -"                    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -"                    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -"                    \n" -"                    for (int i=0;i<numVertsInB;i++)\n" -"                    {\n" -"                        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -"                        if (depth <=minDist)\n" -"                        {\n" -"                            depth = minDist;\n" -"                        }\n" -"                        \n" -"                        if (depth <=maxDist)\n" -"                        {\n" -"                            float4 pointInWorld = pVtxIn[i];\n" -"                            pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -"                        }\n" -"                    }\n" -"                    \n" -"                }\n" -"                clippingFaces[pairIndex].w =numLocalContactsOut;\n" -"                \n" -"            }\n" -"            \n" -"            for (int i=0;i<numLocalContactsOut;i++)\n" -"                pVtxIn[i] = pVtxOut[i];\n" -"                \n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"    \n" -"}\n" -"__kernel void   newContactReductionKernel( __global int4* pairs,\n" -"                                                   __global const b3RigidBodyData_t* rigidBodies,\n" -"                                                   __global const float4* separatingNormals,\n" -"                                                   __global const int* hasSeparatingAxis,\n" -"                                                   __global struct b3Contact4Data* globalContactsOut,\n" -"                                                   __global int4* clippingFaces,\n" -"                                                   __global float4* worldVertsB2,\n" -"                                                   volatile __global int* nGlobalContactsOut,\n" -"                                                   int vertexFaceCapacity,\n" -"												   int contactCapacity,\n" -"                                                   int numPairs\n" -"                                                   )\n" -"{\n" -"    int i = get_global_id(0);\n" -"	int pairIndex = i;\n" -"	\n" -"    int4 contactIdx;\n" -"    contactIdx=make_int4(0,1,2,3);\n" -"    \n" -"	if (i<numPairs)\n" -"	{\n" -"        \n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"            \n" -"			\n" -"            \n" -"            \n" -"			int nPoints = clippingFaces[pairIndex].w;\n" -"           \n" -"            if (nPoints>0)\n" -"            {\n" -"                 __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" -"                float4 normal = -separatingNormals[i];\n" -"                \n" -"                int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" -"            \n" -"				int mprContactIndex = pairs[pairIndex].z;\n" -"                int dstIdx = mprContactIndex;\n" -"				if (dstIdx<0)\n" -"				{\n" -"	                AppendInc( nGlobalContactsOut, dstIdx );\n" -"				}\n" -"//#if 0\n" -"                \n" -"				if (dstIdx < contactCapacity)\n" -"				{\n" -"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -"					c->m_worldNormalOnB = -normal;\n" -"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -"					c->m_batchIdx = pairIndex;\n" -"					int bodyA = pairs[pairIndex].x;\n" -"					int bodyB = pairs[pairIndex].y;\n" -"					pairs[pairIndex].w = dstIdx;\n" -"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -"                    c->m_childIndexA =-1;\n" -"					c->m_childIndexB =-1;\n" -"                    switch (nReducedContacts)\n" -"                    {\n" -"                        case 4:\n" -"                            c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" -"                        case 3:\n" -"                            c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" -"                        case 2:\n" -"                            c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" -"                        case 1:\n" -"							if (mprContactIndex<0)//test\n" -"	                            c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" -"                        default:\n" -"                        {\n" -"                        }\n" -"                    };\n" -"                    \n" -"					GET_NPOINTS(*c) = nReducedContacts;\n" -"                    \n" -"                 }\n" -"                 \n" -"                \n" -"//#endif\n" -"				\n" -"			}//		if (numContactsOut>0)\n" -"		}//		if (hasSeparatingAxis[i])\n" -"	}//	if (i<numPairs)\n" -"    \n" -"    \n" -"}\n" -; +static const char* satClipKernelsCL = +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +	"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +	"#ifdef cl_ext_atomic_counters_32\n" +	"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +	"#else\n" +	"#define counter32_t volatile __global int*\n" +	"#endif\n" +	"#define GET_GROUP_IDX get_group_id(0)\n" +	"#define GET_LOCAL_IDX get_local_id(0)\n" +	"#define GET_GLOBAL_IDX get_global_id(0)\n" +	"#define GET_GROUP_SIZE get_local_size(0)\n" +	"#define GET_NUM_GROUPS get_num_groups(0)\n" +	"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +	"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +	"#define AtomInc(x) atom_inc(&(x))\n" +	"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +	"#define AppendInc(x, out) out = atomic_inc(x)\n" +	"#define AtomAdd(x, value) atom_add(&(x), value)\n" +	"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +	"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +	"#define max2 max\n" +	"#define min2 min\n" +	"typedef unsigned int u32;\n" +	"#ifndef B3_CONTACT4DATA_H\n" +	"#define B3_CONTACT4DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"typedef  struct b3Contact4Data b3Contact4Data_t;\n" +	"struct b3Contact4Data\n" +	"{\n" +	"	b3Float4	m_worldPosB[4];\n" +	"//	b3Float4	m_localPosA[4];\n" +	"//	b3Float4	m_localPosB[4];\n" +	"	b3Float4	m_worldNormalOnB;	//	w: m_nPoints\n" +	"	unsigned short  m_restituitionCoeffCmp;\n" +	"	unsigned short  m_frictionCoeffCmp;\n" +	"	int m_batchIdx;\n" +	"	int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +	"	int m_bodyBPtrAndSignBit;\n" +	"	int	m_childIndexA;\n" +	"	int	m_childIndexB;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +	"{\n" +	"	return (int)contact->m_worldNormalOnB.w;\n" +	"};\n" +	"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +	"{\n" +	"	contact->m_worldNormalOnB.w = (float)numPoints;\n" +	"};\n" +	"#endif //B3_CONTACT4DATA_H\n" +	"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#define B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"typedef struct b3GpuFace b3GpuFace_t;\n" +	"struct b3GpuFace\n" +	"{\n" +	"	b3Float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"	int m_unusedPadding1;\n" +	"	int m_unusedPadding2;\n" +	"};\n" +	"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" +	"struct b3ConvexPolyhedronData\n" +	"{\n" +	"	b3Float4		m_localCenter;\n" +	"	b3Float4		m_extents;\n" +	"	b3Float4		mC;\n" +	"	b3Float4		mE;\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"};\n" +	"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" +	"#ifndef B3_COLLIDABLE_H\n" +	"#define B3_COLLIDABLE_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"enum b3ShapeTypes\n" +	"{\n" +	"	SHAPE_HEIGHT_FIELD=1,\n" +	"	SHAPE_CONVEX_HULL=3,\n" +	"	SHAPE_PLANE=4,\n" +	"	SHAPE_CONCAVE_TRIMESH=5,\n" +	"	SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +	"	SHAPE_SPHERE=7,\n" +	"	MAX_NUM_SHAPE_TYPES,\n" +	"};\n" +	"typedef struct b3Collidable b3Collidable_t;\n" +	"struct b3Collidable\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"};\n" +	"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" +	"struct b3GpuChildShape\n" +	"{\n" +	"	b3Float4	m_childPosition;\n" +	"	b3Quat		m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"};\n" +	"struct b3CompoundOverlappingPair\n" +	"{\n" +	"	int m_bodyIndexA;\n" +	"	int m_bodyIndexB;\n" +	"//	int	m_pairType;\n" +	"	int m_childShapeIndexA;\n" +	"	int m_childShapeIndexB;\n" +	"};\n" +	"#endif //B3_COLLIDABLE_H\n" +	"#ifndef B3_RIGIDBODY_DATA_H\n" +	"#define B3_RIGIDBODY_DATA_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +	"struct b3RigidBodyData\n" +	"{\n" +	"	b3Float4				m_pos;\n" +	"	b3Quat					m_quat;\n" +	"	b3Float4				m_linVel;\n" +	"	b3Float4				m_angVel;\n" +	"	int 					m_collidableIdx;\n" +	"	float 				m_invMass;\n" +	"	float 				m_restituitionCoeff;\n" +	"	float 				m_frictionCoeff;\n" +	"};\n" +	"typedef struct b3InertiaData b3InertiaData_t;\n" +	"struct b3InertiaData\n" +	"{\n" +	"	b3Mat3x3 m_invInertiaWorld;\n" +	"	b3Mat3x3 m_initInvInertia;\n" +	"};\n" +	"#endif //B3_RIGIDBODY_DATA_H\n" +	"	\n" +	"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +	"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +	"#define make_float4 (float4)\n" +	"#define make_float2 (float2)\n" +	"#define make_uint4 (uint4)\n" +	"#define make_int4 (int4)\n" +	"#define make_uint2 (uint2)\n" +	"#define make_int2 (int2)\n" +	"__inline\n" +	"float fastDiv(float numerator, float denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"//	return numerator/denominator;	\n" +	"}\n" +	"__inline\n" +	"float4 fastDiv4(float4 numerator, float4 denominator)\n" +	"{\n" +	"	return native_divide(numerator, denominator);	\n" +	"}\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"}\n" +	"//#define dot3F4 dot\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"__inline float4 lerp3(const float4 a,const float4 b, float  t)\n" +	"{\n" +	"	return make_float4(	a.x + (b.x - a.x) * t,\n" +	"						a.y + (b.y - a.y) * t,\n" +	"						a.z + (b.z - a.z) * t,\n" +	"						0.f);\n" +	"}\n" +	"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +	"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" +	"{\n" +	"	\n" +	"	int ve;\n" +	"	float ds, de;\n" +	"	int numVertsOut = 0;\n" +	"    //double-check next test\n" +	"    	if (numVertsIn < 2)\n" +	"    		return 0;\n" +	"    \n" +	"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" +	"	float4 endVertex = pVtxIn[0];\n" +	"	\n" +	"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +	"    \n" +	"	for (ve = 0; ve < numVertsIn; ve++)\n" +	"	{\n" +	"		endVertex=pVtxIn[ve];\n" +	"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +	"		if (ds<0)\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start < 0, end < 0, so output endVertex\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"			else\n" +	"			{\n" +	"				// Start < 0, end >= 0, so output intersection\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"			}\n" +	"		}\n" +	"		else\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start >= 0, end < 0 so output intersection and end\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"		}\n" +	"		firstVertex = endVertex;\n" +	"		ds = de;\n" +	"	}\n" +	"	return numVertsOut;\n" +	"}\n" +	"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +	"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" +	"{\n" +	"	\n" +	"	int ve;\n" +	"	float ds, de;\n" +	"	int numVertsOut = 0;\n" +	"//double-check next test\n" +	"	if (numVertsIn < 2)\n" +	"		return 0;\n" +	"	float4 firstVertex=pVtxIn[numVertsIn-1];\n" +	"	float4 endVertex = pVtxIn[0];\n" +	"	\n" +	"	ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +	"	for (ve = 0; ve < numVertsIn; ve++)\n" +	"	{\n" +	"		endVertex=pVtxIn[ve];\n" +	"		de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +	"		if (ds<0)\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start < 0, end < 0, so output endVertex\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"			else\n" +	"			{\n" +	"				// Start < 0, end >= 0, so output intersection\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"			}\n" +	"		}\n" +	"		else\n" +	"		{\n" +	"			if (de<0)\n" +	"			{\n" +	"				// Start >= 0, end < 0 so output intersection and end\n" +	"				ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +	"				ppVtxOut[numVertsOut++] = endVertex;\n" +	"			}\n" +	"		}\n" +	"		firstVertex = endVertex;\n" +	"		ds = de;\n" +	"	}\n" +	"	return numVertsOut;\n" +	"}\n" +	"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA,  \n" +	"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +	"	float4* worldVertsB2, int capacityWorldVertsB2,\n" +	"	const float minDist, float maxDist,\n" +	"	__global const float4* vertices,\n" +	"	__global const b3GpuFace_t* faces,\n" +	"	__global const int* indices,\n" +	"	float4* contactsOut,\n" +	"	int contactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	float4* pVtxIn = worldVertsB1;\n" +	"	float4* pVtxOut = worldVertsB2;\n" +	"	\n" +	"	int numVertsIn = numWorldVertsB1;\n" +	"	int numVertsOut = 0;\n" +	"	int closestFaceA=-1;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"				faces[hullA->m_faceOffset+face].m_plane.x, \n" +	"				faces[hullA->m_faceOffset+face].m_plane.y, \n" +	"				faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"		\n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceA<0)\n" +	"		return numContactsOut;\n" +	"	b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" +	"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"	int numVerticesA = polyA.m_numIndices;\n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"		const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" +	"		const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +	"		const float4 edge0 = a - b;\n" +	"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +	"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +	"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"		float4 worldA1 = transform(&a,&posA,&ornA);\n" +	"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"		\n" +	"		float4 planeNormalWS = planeNormalWS1;\n" +	"		float planeEqWS=planeEqWS1;\n" +	"		\n" +	"		//clip face\n" +	"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +	"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +	"		//btSwap(pVtxIn,pVtxOut);\n" +	"		float4* tmp = pVtxOut;\n" +	"		pVtxOut = pVtxIn;\n" +	"		pVtxIn = tmp;\n" +	"		numVertsIn = numVertsOut;\n" +	"		numVertsOut = 0;\n" +	"	}\n" +	"	\n" +	"	// only keep points that are behind the witness face\n" +	"	{\n" +	"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float localPlaneEq = polyA.m_plane.w;\n" +	"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +	"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +	"		for (int i=0;i<numVertsIn;i++)\n" +	"		{\n" +	"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"			if (depth <=minDist)\n" +	"			{\n" +	"				depth = minDist;\n" +	"			}\n" +	"			if (depth <=maxDist)\n" +	"			{\n" +	"				float4 pointInWorld = pVtxIn[i];\n" +	"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" +	"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA,  \n" +	"	const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +	"	float4* worldVertsB2, int capacityWorldVertsB2,\n" +	"	const float minDist, float maxDist,\n" +	"	const float4* verticesA,\n" +	"	const b3GpuFace_t* facesA,\n" +	"	const int* indicesA,\n" +	"	__global const float4* verticesB,\n" +	"	__global const b3GpuFace_t* facesB,\n" +	"	__global const int* indicesB,\n" +	"	float4* contactsOut,\n" +	"	int contactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	float4* pVtxIn = worldVertsB1;\n" +	"	float4* pVtxOut = worldVertsB2;\n" +	"	\n" +	"	int numVertsIn = numWorldVertsB1;\n" +	"	int numVertsOut = 0;\n" +	"	int closestFaceA=-1;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"				facesA[hullA->m_faceOffset+face].m_plane.x, \n" +	"				facesA[hullA->m_faceOffset+face].m_plane.y, \n" +	"				facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"		\n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceA<0)\n" +	"		return numContactsOut;\n" +	"	b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" +	"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"	int numVerticesA = polyA.m_numIndices;\n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"		const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" +	"		const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +	"		const float4 edge0 = a - b;\n" +	"		const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +	"		float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +	"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"		float4 worldA1 = transform(&a,&posA,&ornA);\n" +	"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"		\n" +	"		float4 planeNormalWS = planeNormalWS1;\n" +	"		float planeEqWS=planeEqWS1;\n" +	"		\n" +	"		//clip face\n" +	"		//clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +	"		numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +	"		//btSwap(pVtxIn,pVtxOut);\n" +	"		float4* tmp = pVtxOut;\n" +	"		pVtxOut = pVtxIn;\n" +	"		pVtxIn = tmp;\n" +	"		numVertsIn = numVertsOut;\n" +	"		numVertsOut = 0;\n" +	"	}\n" +	"	\n" +	"	// only keep points that are behind the witness face\n" +	"	{\n" +	"		float4 localPlaneNormal  = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +	"		float localPlaneEq = polyA.m_plane.w;\n" +	"		float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +	"		float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +	"		for (int i=0;i<numVertsIn;i++)\n" +	"		{\n" +	"			float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"			if (depth <=minDist)\n" +	"			{\n" +	"				depth = minDist;\n" +	"			}\n" +	"			if (depth <=maxDist)\n" +	"			{\n" +	"				float4 pointInWorld = pVtxIn[i];\n" +	"				//resultOut.addContactPoint(separatingNormal,point,depth);\n" +	"				contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"int	clipHullAgainstHull(const float4 separatingNormal,\n" +	"	__global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +	"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +	"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +	"	const float minDist, float maxDist,\n" +	"	__global const float4* vertices,\n" +	"	__global const b3GpuFace_t* faces,\n" +	"	__global const int* indices,\n" +	"	float4*	localContactsOut,\n" +	"	int localContactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"	int closestFaceB=-1;\n" +	"	float dmax = -FLT_MAX;\n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" +	"				faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	{\n" +	"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +	"		const int numVertices = polyB.m_numIndices;\n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +	"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceB>=0)\n" +	"	{\n" +	"		numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" +	"				posA,ornA,\n" +	"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" +	"				faces,\n" +	"				indices,localContactsOut,localContactCapacity);\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"int	clipHullAgainstHullLocalA(const float4 separatingNormal,\n" +	"	const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +	"	const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +	"	float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +	"	const float minDist, float maxDist,\n" +	"	const float4* verticesA,\n" +	"	const b3GpuFace_t* facesA,\n" +	"	const int* indicesA,\n" +	"	__global const float4* verticesB,\n" +	"	__global const b3GpuFace_t* facesB,\n" +	"	__global const int* indicesB,\n" +	"	float4*	localContactsOut,\n" +	"	int localContactCapacity)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"	int closestFaceB=-1;\n" +	"	float dmax = -FLT_MAX;\n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" +	"				facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	{\n" +	"		const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +	"		const int numVertices = polyB.m_numIndices;\n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"			const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +	"			worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"		}\n" +	"	}\n" +	"	if (closestFaceB>=0)\n" +	"	{\n" +	"		numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" +	"				posA,ornA,\n" +	"				worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" +	"				verticesA,facesA,indicesA,\n" +	"				verticesB,facesB,indicesB,\n" +	"				localContactsOut,localContactCapacity);\n" +	"	}\n" +	"	return numContactsOut;\n" +	"}\n" +	"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" +	"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" +	"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" +	"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" +	"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" +	"{\n" +	"	if( nPoints == 0 )\n" +	"        return 0;\n" +	"    \n" +	"    if (nPoints <=4)\n" +	"        return nPoints;\n" +	"    \n" +	"    \n" +	"    if (nPoints >64)\n" +	"        nPoints = 64;\n" +	"    \n" +	"	float4 center = make_float4(0.f);\n" +	"	{\n" +	"		\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"			center += p[i];\n" +	"		center /= (float)nPoints;\n" +	"	}\n" +	"    \n" +	"	\n" +	"    \n" +	"	//	sample 4 directions\n" +	"    \n" +	"    float4 aVector = p[0] - center;\n" +	"    float4 u = cross3( nearNormal, aVector );\n" +	"    float4 v = cross3( nearNormal, u );\n" +	"    u = normalize3( u );\n" +	"    v = normalize3( v );\n" +	"    \n" +	"    \n" +	"    //keep point with deepest penetration\n" +	"    float minW= FLT_MAX;\n" +	"    \n" +	"    int minIndex=-1;\n" +	"    \n" +	"    float4 maxDots;\n" +	"    maxDots.x = FLT_MIN;\n" +	"    maxDots.y = FLT_MIN;\n" +	"    maxDots.z = FLT_MIN;\n" +	"    maxDots.w = FLT_MIN;\n" +	"    \n" +	"    //	idx, distance\n" +	"    for(int ie = 0; ie<nPoints; ie++ )\n" +	"    {\n" +	"        if (p[ie].w<minW)\n" +	"        {\n" +	"            minW = p[ie].w;\n" +	"            minIndex=ie;\n" +	"        }\n" +	"        float f;\n" +	"        float4 r = p[ie]-center;\n" +	"        f = dot3F4( u, r );\n" +	"        if (f<maxDots.x)\n" +	"        {\n" +	"            maxDots.x = f;\n" +	"            contactIdx[0].x = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -u, r );\n" +	"        if (f<maxDots.y)\n" +	"        {\n" +	"            maxDots.y = f;\n" +	"            contactIdx[0].y = ie;\n" +	"        }\n" +	"        \n" +	"        \n" +	"        f = dot3F4( v, r );\n" +	"        if (f<maxDots.z)\n" +	"        {\n" +	"            maxDots.z = f;\n" +	"            contactIdx[0].z = ie;\n" +	"        }\n" +	"        \n" +	"        f = dot3F4( -v, r );\n" +	"        if (f<maxDots.w)\n" +	"        {\n" +	"            maxDots.w = f;\n" +	"            contactIdx[0].w = ie;\n" +	"        }\n" +	"        \n" +	"    }\n" +	"    \n" +	"    if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" +	"    {\n" +	"        //replace the first contact with minimum (todo: replace contact with least penetration)\n" +	"        contactIdx[0].x = minIndex;\n" +	"    }\n" +	"    \n" +	"    return 4;\n" +	"    \n" +	"}\n" +	"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" +	"{\n" +	"    contactIdx[0] = 0;\n" +	"    contactIdx[1] = 1;\n" +	"    contactIdx[2] = 2;\n" +	"    contactIdx[3] = 3;\n" +	"    \n" +	"	if( nPoints == 0 ) return 0;\n" +	"    \n" +	"	nPoints = min2( nPoints, 4 );\n" +	"    return nPoints;\n" +	"    \n" +	"}\n" +	"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" +	"{\n" +	"	if( nPoints == 0 ) return 0;\n" +	"	nPoints = min2( nPoints, 64 );\n" +	"	float4 center = make_float4(0.f);\n" +	"	{\n" +	"		float4 v[64];\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"			v[i] = p[i];\n" +	"		//memcpy( v, p, nPoints*sizeof(float4) );\n" +	"		PARALLEL_SUM( v, nPoints );\n" +	"		center = v[0]/(float)nPoints;\n" +	"	}\n" +	"	\n" +	"	{	//	sample 4 directions\n" +	"		if( nPoints < 4 )\n" +	"		{\n" +	"			for(int i=0; i<nPoints; i++) \n" +	"				contactIdx[i] = i;\n" +	"			return nPoints;\n" +	"		}\n" +	"		float4 aVector = p[0] - center;\n" +	"		float4 u = cross3( nearNormal, aVector );\n" +	"		float4 v = cross3( nearNormal, u );\n" +	"		u = normalize3( u );\n" +	"		v = normalize3( v );\n" +	"		int idx[4];\n" +	"		float2 max00 = make_float2(0,FLT_MAX);\n" +	"		{\n" +	"			//	idx, distance\n" +	"			{\n" +	"				{\n" +	"					int4 a[64];\n" +	"					for(int ie = 0; ie<nPoints; ie++ )\n" +	"					{\n" +	"						\n" +	"						\n" +	"						float f;\n" +	"						float4 r = p[ie]-center;\n" +	"						f = dot3F4( u, r );\n" +	"						a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"						f = dot3F4( -u, r );\n" +	"						a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"						f = dot3F4( v, r );\n" +	"						a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"						f = dot3F4( -v, r );\n" +	"						a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +	"					}\n" +	"					for(int ie=0; ie<nPoints; ie++)\n" +	"					{\n" +	"						a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" +	"						a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" +	"						a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" +	"						a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" +	"					}\n" +	"					idx[0] = (int)a[0].x & 0xff;\n" +	"					idx[1] = (int)a[0].y & 0xff;\n" +	"					idx[2] = (int)a[0].z & 0xff;\n" +	"					idx[3] = (int)a[0].w & 0xff;\n" +	"				}\n" +	"			}\n" +	"			{\n" +	"				float2 h[64];\n" +	"				PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" +	"				REDUCE_MIN( h, nPoints );\n" +	"				max00 = h[0];\n" +	"			}\n" +	"		}\n" +	"		contactIdx[0] = idx[0];\n" +	"		contactIdx[1] = idx[1];\n" +	"		contactIdx[2] = idx[2];\n" +	"		contactIdx[3] = idx[3];\n" +	"		return 4;\n" +	"	}\n" +	"}\n" +	"__kernel void   extractManifoldAndAddContactKernel(__global const int4* pairs, \n" +	"																	__global const b3RigidBodyData_t* rigidBodies, \n" +	"																	__global const float4* closestPointsWorld,\n" +	"																	__global const float4* separatingNormalsWorld,\n" +	"																	__global const int* contactCounts,\n" +	"																	__global const int* contactOffsets,\n" +	"																	__global struct b3Contact4Data* restrict contactsOut,\n" +	"																	counter32_t nContactsOut,\n" +	"																	int contactCapacity,\n" +	"																	int numPairs,\n" +	"																	int pairIndex\n" +	"																	)\n" +	"{\n" +	"	int idx = get_global_id(0);\n" +	"	\n" +	"	if (idx<numPairs)\n" +	"	{\n" +	"		float4 normal = separatingNormalsWorld[idx];\n" +	"		int nPoints = contactCounts[idx];\n" +	"		__global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" +	"		float4 localPoints[64];\n" +	"		for (int i=0;i<nPoints;i++)\n" +	"		{\n" +	"			localPoints[i] = pointsIn[i];\n" +	"		}\n" +	"		int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"		contactIdx[0] = -1;\n" +	"		contactIdx[1] = -1;\n" +	"		contactIdx[2] = -1;\n" +	"		contactIdx[3] = -1;\n" +	"		int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" +	"		int dstIdx;\n" +	"		AppendInc( nContactsOut, dstIdx );\n" +	"		if (dstIdx<contactCapacity)\n" +	"		{\n" +	"			__global struct b3Contact4Data* c = contactsOut + dstIdx;\n" +	"			c->m_worldNormalOnB = -normal;\n" +	"			c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"			c->m_batchIdx = idx;\n" +	"			int bodyA = pairs[pairIndex].x;\n" +	"			int bodyB = pairs[pairIndex].y;\n" +	"			c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +	"			c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +	"			c->m_childIndexA = -1;\n" +	"			c->m_childIndexB = -1;\n" +	"			for (int i=0;i<nContacts;i++)\n" +	"			{\n" +	"				c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" +	"			}\n" +	"			GET_NPOINTS(*c) = nContacts;\n" +	"		}\n" +	"	}\n" +	"}\n" +	"void	trInverse(float4 translationIn, Quaternion orientationIn,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtInvert(orientationIn);\n" +	"	*translationOut = qtRotate(*orientationOut, -translationIn);\n" +	"}\n" +	"void	trMul(float4 translationA, Quaternion orientationA,\n" +	"						float4 translationB, Quaternion orientationB,\n" +	"		float4* translationOut, Quaternion* orientationOut)\n" +	"{\n" +	"	*orientationOut = qtMul(orientationA,orientationB);\n" +	"	*translationOut = transform(&translationB,&translationA,&orientationA);\n" +	"}\n" +	"__kernel void   clipHullHullKernel( __global int4* pairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const b3GpuFace_t* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const float4* separatingNormals,\n" +	"																					__global const int* hasSeparatingAxis,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int numPairs,\n" +	"																					int contactCapacity)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"			\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"		\n" +	"			int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" +	"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +	"														rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +	"													  rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +	"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +	"														minDist, maxDist,\n" +	"														vertices,faces,indices,\n" +	"														localContactsOut,localContactCapacity);\n" +	"												\n" +	"		if (numLocalContactsOut>0)\n" +	"		{\n" +	"				float4 normal = -separatingNormals[i];\n" +	"				int nPoints = numLocalContactsOut;\n" +	"				float4* pointsIn = localContactsOut;\n" +	"				int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"				contactIdx[0] = -1;\n" +	"				contactIdx[1] = -1;\n" +	"				contactIdx[2] = -1;\n" +	"				contactIdx[3] = -1;\n" +	"		\n" +	"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +	"		\n" +	"				\n" +	"				int mprContactIndex = pairs[pairIndex].z;\n" +	"				int dstIdx = mprContactIndex;\n" +	"				if (dstIdx<0)\n" +	"				{\n" +	"					AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				}\n" +	"				if (dstIdx<contactCapacity)\n" +	"				{\n" +	"					pairs[pairIndex].z = dstIdx;\n" +	"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +	"					c->m_worldNormalOnB = -normal;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = -1;\n" +	"					for (int i=0;i<nReducedContacts;i++)\n" +	"					{\n" +	"					//this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" +	"						if (i>0||(mprContactIndex<0))\n" +	"						{\n" +	"							c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +	"						}\n" +	"					}\n" +	"					GET_NPOINTS(*c) = nReducedContacts;\n" +	"				}\n" +	"				\n" +	"			}//		if (numContactsOut>0)\n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"}\n" +	"__kernel void   clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const b3GpuFace_t* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" +	"																					__global const float4* gpuCompoundSepNormalsOut,\n" +	"																					__global const int* gpuHasCompoundSepNormalsOut,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int numCompoundPairs, int maxContactCapacity)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numCompoundPairs)\n" +	"	{\n" +	"		if (gpuHasCompoundSepNormalsOut[i])\n" +	"		{\n" +	"			int bodyIndexA = gpuCompoundPairs[i].x;\n" +	"			int bodyIndexB = gpuCompoundPairs[i].y;\n" +	"			\n" +	"			int childShapeIndexA = gpuCompoundPairs[i].z;\n" +	"			int childShapeIndexB = gpuCompoundPairs[i].w;\n" +	"			\n" +	"			int collidableIndexA = -1;\n" +	"			int collidableIndexB = -1;\n" +	"			\n" +	"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			\n" +	"			float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"								\n" +	"			if (childShapeIndexA >= 0)\n" +	"			{\n" +	"				collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"				float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"				float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"				float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"				float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"				posA = newPosA;\n" +	"				ornA = newOrnA;\n" +	"			} else\n" +	"			{\n" +	"				collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			}\n" +	"			\n" +	"			if (childShapeIndexB>=0)\n" +	"			{\n" +	"				collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"				float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"				float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"				float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"				float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"				posB = newPosB;\n" +	"				ornB = newOrnB;\n" +	"			} else\n" +	"			{\n" +	"				collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" +	"			}\n" +	"			\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"			int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" +	"														&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +	"														posA,ornA,\n" +	"													  posB,ornB,\n" +	"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +	"														minDist, maxDist,\n" +	"														vertices,faces,indices,\n" +	"														localContactsOut,localContactCapacity);\n" +	"												\n" +	"		if (numLocalContactsOut>0)\n" +	"		{\n" +	"				float4 normal = -gpuCompoundSepNormalsOut[i];\n" +	"				int nPoints = numLocalContactsOut;\n" +	"				float4* pointsIn = localContactsOut;\n" +	"				int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"				contactIdx[0] = -1;\n" +	"				contactIdx[1] = -1;\n" +	"				contactIdx[2] = -1;\n" +	"				contactIdx[3] = -1;\n" +	"		\n" +	"				int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +	"		\n" +	"				int dstIdx;\n" +	"				AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +	"					c->m_worldNormalOnB = -normal;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = gpuCompoundPairs[pairIndex].x;\n" +	"					int bodyB = gpuCompoundPairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_childIndexA = childShapeIndexA;\n" +	"					c->m_childIndexB = childShapeIndexB;\n" +	"					for (int i=0;i<nReducedContacts;i++)\n" +	"					{\n" +	"						c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +	"					}\n" +	"					GET_NPOINTS(*c) = nReducedContacts;\n" +	"				}\n" +	"				\n" +	"			}//		if (numContactsOut>0)\n" +	"		}//		if (gpuHasCompoundSepNormalsOut[i])\n" +	"	}//	if (i<numCompoundPairs)\n" +	"}\n" +	"__kernel void   sphereSphereCollisionKernel( __global const int4* pairs, \n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const float4* separatingNormals,\n" +	"																					__global const int* hasSeparatingAxis,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int contactCapacity,\n" +	"																					int numPairs)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +	"			collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +	"		{\n" +	"			//sphere-sphere\n" +	"			float radiusA = collidables[collidableIndexA].m_radius;\n" +	"			float radiusB = collidables[collidableIndexB].m_radius;\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			float4 diff = posA-posB;\n" +	"			float len = length(diff);\n" +	"			\n" +	"			///iff distance positive, don't generate a new contact\n" +	"			if ( len <= (radiusA+radiusB))\n" +	"			{\n" +	"				///distance (negative means penetration)\n" +	"				float dist = len - (radiusA+radiusB);\n" +	"				float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" +	"				if (len > 0.00001)\n" +	"				{\n" +	"					normalOnSurfaceB = diff / len;\n" +	"				}\n" +	"				float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +	"				contactPosB.w = dist;\n" +	"								\n" +	"				int dstIdx;\n" +	"				AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				if (dstIdx < contactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = -normalOnSurfaceB;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"					c->m_worldPosB[0] = contactPosB;\n" +	"					c->m_childIndexA = -1;\n" +	"					c->m_childIndexB = -1;\n" +	"					GET_NPOINTS(*c) = 1;\n" +	"				}//if (dstIdx < numPairs)\n" +	"			}//if ( len <= (radiusA+radiusB))\n" +	"		}//SHAPE_SPHERE SHAPE_SPHERE\n" +	"	}//if (i<numPairs)\n" +	"}				\n" +	"__kernel void   clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" +	"																					__global const b3RigidBodyData_t* rigidBodies, \n" +	"																					__global const b3Collidable_t* collidables,\n" +	"																					__global const b3ConvexPolyhedronData_t* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const b3GpuFace_t* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const b3GpuChildShape_t* gpuChildShapes,\n" +	"																					__global const float4* separatingNormals,\n" +	"																					__global struct b3Contact4Data* restrict globalContactsOut,\n" +	"																					counter32_t nGlobalContactsOut,\n" +	"																					int contactCapacity,\n" +	"																					int numConcavePairs)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"	float4 worldVertsB1[64];\n" +	"	float4 worldVertsB2[64];\n" +	"	int capacityWorldVerts = 64;	\n" +	"	float4 localContactsOut[64];\n" +	"	int localContactCapacity=64;\n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"	if (i<numConcavePairs)\n" +	"	{\n" +	"		//negative value means that the pair is invalid\n" +	"		if (concavePairsIn[i].w<0)\n" +	"			return;\n" +	"		int bodyIndexA = concavePairsIn[i].x;\n" +	"		int bodyIndexB = concavePairsIn[i].y;\n" +	"		int f = concavePairsIn[i].z;\n" +	"		int childShapeIndexA = f;\n" +	"		\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"		///////////////////////////////////////////////////////////////\n" +	"		\n" +	"	\n" +	"		bool overlap = false;\n" +	"		\n" +	"		b3ConvexPolyhedronData_t convexPolyhedronA;\n" +	"	//add 3 vertices of the triangle\n" +	"		convexPolyhedronA.m_numVertices = 3;\n" +	"		convexPolyhedronA.m_vertexOffset = 0;\n" +	"		float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"		b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"		\n" +	"		float4 verticesA[3];\n" +	"		for (int i=0;i<3;i++)\n" +	"		{\n" +	"			int index = indices[face.m_indexOffset+i];\n" +	"			float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"			verticesA[i] = vert;\n" +	"			localCenter += vert;\n" +	"		}\n" +	"		float dmin = FLT_MAX;\n" +	"		int localCC=0;\n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"                                  \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"                             \n" +	"		b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot3F4(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                                            \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot3F4(edgeNormal,v0);\n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                                            \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 sepAxis = separatingNormals[i];\n" +	"		\n" +	"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"		int childShapeIndexB =-1;\n" +	"		if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			///////////////////\n" +	"			///compound shape support\n" +	"			\n" +	"			childShapeIndexB = concavePairsIn[pairIndex].w;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			\n" +	"		}\n" +	"		\n" +	"		////////////////////////////////////////\n" +	"		\n" +	"		\n" +	"		\n" +	"		int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" +	"														&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"														posA,ornA,\n" +	"													  posB,ornB,\n" +	"													  worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +	"														minDist, maxDist,\n" +	"														&verticesA,&facesA,&indicesA,\n" +	"														vertices,faces,indices,\n" +	"														localContactsOut,localContactCapacity);\n" +	"												\n" +	"		if (numLocalContactsOut>0)\n" +	"		{\n" +	"			float4 normal = -separatingNormals[i];\n" +	"			int nPoints = numLocalContactsOut;\n" +	"			float4* pointsIn = localContactsOut;\n" +	"			int contactIdx[4];// = {-1,-1,-1,-1};\n" +	"			contactIdx[0] = -1;\n" +	"			contactIdx[1] = -1;\n" +	"			contactIdx[2] = -1;\n" +	"			contactIdx[3] = -1;\n" +	"	\n" +	"			int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +	"	\n" +	"			int dstIdx;\n" +	"			AppendInc( nGlobalContactsOut, dstIdx );\n" +	"			if (dstIdx<contactCapacity)\n" +	"			{\n" +	"				__global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +	"				c->m_worldNormalOnB = -normal;\n" +	"				c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"				c->m_batchIdx = pairIndex;\n" +	"				int bodyA = concavePairsIn[pairIndex].x;\n" +	"				int bodyB = concavePairsIn[pairIndex].y;\n" +	"				c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"				c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"				c->m_childIndexA = childShapeIndexA;\n" +	"				c->m_childIndexB = childShapeIndexB;\n" +	"				for (int i=0;i<nReducedContacts;i++)\n" +	"				{\n" +	"					c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +	"				}\n" +	"				GET_NPOINTS(*c) = nReducedContacts;\n" +	"			}\n" +	"				\n" +	"		}//		if (numContactsOut>0)\n" +	"	}//	if (i<numPairs)\n" +	"}\n" +	"int	findClippingFaces(const float4 separatingNormal,\n" +	"                      __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" +	"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +	"                       __global float4* worldVertsA1,\n" +	"                      __global float4* worldNormalsA1,\n" +	"                      __global float4* worldVertsB1,\n" +	"                      int capacityWorldVerts,\n" +	"                      const float minDist, float maxDist,\n" +	"                      __global const float4* vertices,\n" +	"                      __global const b3GpuFace_t* faces,\n" +	"                      __global const int* indices,\n" +	"                      __global int4* clippingFaces, int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"    \n" +	"    \n" +	"	int closestFaceB=-1;\n" +	"	float dmax = -FLT_MAX;\n" +	"    \n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" +	"                                              faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"	{\n" +	"		const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +	"		const int numVertices = polyB.m_numIndices;\n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"			const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +	"			worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int closestFaceA=-1;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"                                              faces[hullA->m_faceOffset+face].m_plane.x,\n" +	"                                              faces[hullA->m_faceOffset+face].m_plane.y,\n" +	"                                              faces[hullA->m_faceOffset+face].m_plane.z,\n" +	"                                              0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"            \n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"                worldNormalsA1[pairIndex] = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"        const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +	"        worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +	"    }\n" +	"    \n" +	"    clippingFaces[pairIndex].x = closestFaceA;\n" +	"    clippingFaces[pairIndex].y = closestFaceB;\n" +	"    clippingFaces[pairIndex].z = numVerticesA;\n" +	"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" +	"    \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"int clipFaces(__global float4* worldVertsA1,\n" +	"              __global float4* worldNormalsA1,\n" +	"              __global float4* worldVertsB1,\n" +	"              __global float4* worldVertsB2, \n" +	"              int capacityWorldVertsB2,\n" +	"              const float minDist, float maxDist,\n" +	"              __global int4* clippingFaces,\n" +	"              int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"    \n" +	"    int closestFaceA = clippingFaces[pairIndex].x;\n" +	"    int closestFaceB = clippingFaces[pairIndex].y;\n" +	"	int numVertsInA = clippingFaces[pairIndex].z;\n" +	"	int numVertsInB = clippingFaces[pairIndex].w;\n" +	"    \n" +	"	int numVertsOut = 0;\n" +	"    \n" +	"	if (closestFaceA<0)\n" +	"		return numContactsOut;\n" +	"    \n" +	"    __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +	"    __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +	"    \n" +	"    \n" +	"	\n" +	"	// clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"    \n" +	"	for(int e0=0;e0<numVertsInA;e0++)\n" +	"	{\n" +	"		const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" +	"		const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" +	"		const float4 WorldEdge0 = aw - bw;\n" +	"		float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" +	"		float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"		float4 worldA1 = aw;\n" +	"		float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"		float4 planeNormalWS = planeNormalWS1;\n" +	"		float planeEqWS=planeEqWS1;\n" +	"		numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" +	"		__global float4* tmp = pVtxOut;\n" +	"		pVtxOut = pVtxIn;\n" +	"		pVtxIn = tmp;\n" +	"		numVertsInB = numVertsOut;\n" +	"		numVertsOut = 0;\n" +	"	}\n" +	"    \n" +	"    //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +	"    //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +	"    \n" +	"    /*for (int i=0;i<numVertsInB;i++)\n" +	"    {\n" +	"        pVtxOut[i] = pVtxIn[i];\n" +	"    }*/\n" +	"    \n" +	"    \n" +	"    \n" +	"    \n" +	"    //numVertsInB=0;\n" +	"	\n" +	"    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +	"    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +	"    for (int i=0;i<numVertsInB;i++)\n" +	"    {\n" +	"        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"        if (depth <=minDist)\n" +	"        {\n" +	"            depth = minDist;\n" +	"        }\n" +	"        \n" +	"        if (depth <=maxDist)\n" +	"        {\n" +	"            float4 pointInWorld = pVtxIn[i];\n" +	"            pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"        }\n" +	"    }\n" +	"   \n" +	"    clippingFaces[pairIndex].w =numContactsOut;\n" +	"   \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"__kernel void   findClippingFacesKernel(  __global const int4* pairs,\n" +	"                                        __global const b3RigidBodyData_t* rigidBodies,\n" +	"                                        __global const b3Collidable_t* collidables,\n" +	"                                        __global const b3ConvexPolyhedronData_t* convexShapes,\n" +	"                                        __global const float4* vertices,\n" +	"                                        __global const float4* uniqueEdges,\n" +	"                                        __global const b3GpuFace_t* faces,\n" +	"                                        __global const int* indices,\n" +	"                                        __global const float4* separatingNormals,\n" +	"                                        __global const int* hasSeparatingAxis,\n" +	"                                        __global int4* clippingFacesOut,\n" +	"                                        __global float4* worldVertsA1,\n" +	"                                        __global float4* worldNormalsA1,\n" +	"                                        __global float4* worldVertsB1,\n" +	"                                        int capacityWorldVerts,\n" +	"                                        int numPairs\n" +	"                                        )\n" +	"{\n" +	"    \n" +	"	int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"    \n" +	"	\n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"    \n" +	"	if (i<numPairs)\n" +	"	{\n" +	"        \n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"            \n" +	"			int bodyIndexA = pairs[i].x;\n" +	"			int bodyIndexB = pairs[i].y;\n" +	"			\n" +	"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"			\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"            \n" +	"            \n" +	"			int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" +	"                                                        &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +	"                                                        rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +	"                                                        rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +	"                                                        worldVertsA1,\n" +	"                                                        worldNormalsA1,\n" +	"                                                        worldVertsB1,capacityWorldVerts,\n" +	"                                                        minDist, maxDist,\n" +	"                                                        vertices,faces,indices,\n" +	"                                                        clippingFacesOut,i);\n" +	"            \n" +	"            \n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"    \n" +	"}\n" +	"__kernel void   clipFacesAndFindContactsKernel(    __global const float4* separatingNormals,\n" +	"                                                   __global const int* hasSeparatingAxis,\n" +	"                                                   __global int4* clippingFacesOut,\n" +	"                                                   __global float4* worldVertsA1,\n" +	"                                                   __global float4* worldNormalsA1,\n" +	"                                                   __global float4* worldVertsB1,\n" +	"                                                   __global float4* worldVertsB2,\n" +	"                                                    int vertexFaceCapacity,\n" +	"                                                   int numPairs,\n" +	"					                                        int debugMode\n" +	"                                                   )\n" +	"{\n" +	"    int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"    \n" +	"	float minDist = -1e30f;\n" +	"	float maxDist = 0.02f;\n" +	"    \n" +	"	if (i<numPairs)\n" +	"	{\n" +	"        \n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"            \n" +	"//			int bodyIndexA = pairs[i].x;\n" +	"	//		int bodyIndexB = pairs[i].y;\n" +	"		    \n" +	"            int numLocalContactsOut = 0;\n" +	"            int capacityWorldVertsB2 = vertexFaceCapacity;\n" +	"            \n" +	"            __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +	"            __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +	"            \n" +	"            {\n" +	"                __global int4* clippingFaces = clippingFacesOut;\n" +	"            \n" +	"                \n" +	"                int closestFaceA = clippingFaces[pairIndex].x;\n" +	"                int closestFaceB = clippingFaces[pairIndex].y;\n" +	"                int numVertsInA = clippingFaces[pairIndex].z;\n" +	"                int numVertsInB = clippingFaces[pairIndex].w;\n" +	"                \n" +	"                int numVertsOut = 0;\n" +	"                \n" +	"                if (closestFaceA>=0)\n" +	"                {\n" +	"                    \n" +	"                    \n" +	"                    \n" +	"                    // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +	"                    \n" +	"                    for(int e0=0;e0<numVertsInA;e0++)\n" +	"                    {\n" +	"                        const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" +	"                        const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" +	"                        const float4 WorldEdge0 = aw - bw;\n" +	"                        float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" +	"                        float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +	"                        float4 worldA1 = aw;\n" +	"                        float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +	"                        float4 planeNormalWS = planeNormalWS1;\n" +	"                        float planeEqWS=planeEqWS1;\n" +	"                        numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" +	"                        __global float4* tmp = pVtxOut;\n" +	"                        pVtxOut = pVtxIn;\n" +	"                        pVtxIn = tmp;\n" +	"                        numVertsInB = numVertsOut;\n" +	"                        numVertsOut = 0;\n" +	"                    }\n" +	"                    \n" +	"                    float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +	"                    float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +	"                    \n" +	"                    for (int i=0;i<numVertsInB;i++)\n" +	"                    {\n" +	"                        float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +	"                        if (depth <=minDist)\n" +	"                        {\n" +	"                            depth = minDist;\n" +	"                        }\n" +	"                        \n" +	"                        if (depth <=maxDist)\n" +	"                        {\n" +	"                            float4 pointInWorld = pVtxIn[i];\n" +	"                            pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +	"                        }\n" +	"                    }\n" +	"                    \n" +	"                }\n" +	"                clippingFaces[pairIndex].w =numLocalContactsOut;\n" +	"                \n" +	"            }\n" +	"            \n" +	"            for (int i=0;i<numLocalContactsOut;i++)\n" +	"                pVtxIn[i] = pVtxOut[i];\n" +	"                \n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"    \n" +	"}\n" +	"__kernel void   newContactReductionKernel( __global int4* pairs,\n" +	"                                                   __global const b3RigidBodyData_t* rigidBodies,\n" +	"                                                   __global const float4* separatingNormals,\n" +	"                                                   __global const int* hasSeparatingAxis,\n" +	"                                                   __global struct b3Contact4Data* globalContactsOut,\n" +	"                                                   __global int4* clippingFaces,\n" +	"                                                   __global float4* worldVertsB2,\n" +	"                                                   volatile __global int* nGlobalContactsOut,\n" +	"                                                   int vertexFaceCapacity,\n" +	"												   int contactCapacity,\n" +	"                                                   int numPairs\n" +	"                                                   )\n" +	"{\n" +	"    int i = get_global_id(0);\n" +	"	int pairIndex = i;\n" +	"	\n" +	"    int4 contactIdx;\n" +	"    contactIdx=make_int4(0,1,2,3);\n" +	"    \n" +	"	if (i<numPairs)\n" +	"	{\n" +	"        \n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"            \n" +	"			\n" +	"            \n" +	"            \n" +	"			int nPoints = clippingFaces[pairIndex].w;\n" +	"           \n" +	"            if (nPoints>0)\n" +	"            {\n" +	"                 __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" +	"                float4 normal = -separatingNormals[i];\n" +	"                \n" +	"                int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" +	"            \n" +	"				int mprContactIndex = pairs[pairIndex].z;\n" +	"                int dstIdx = mprContactIndex;\n" +	"				if (dstIdx<0)\n" +	"				{\n" +	"	                AppendInc( nGlobalContactsOut, dstIdx );\n" +	"				}\n" +	"//#if 0\n" +	"                \n" +	"				if (dstIdx < contactCapacity)\n" +	"				{\n" +	"					__global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +	"					c->m_worldNormalOnB = -normal;\n" +	"					c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +	"					c->m_batchIdx = pairIndex;\n" +	"					int bodyA = pairs[pairIndex].x;\n" +	"					int bodyB = pairs[pairIndex].y;\n" +	"					pairs[pairIndex].w = dstIdx;\n" +	"					c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +	"					c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +	"                    c->m_childIndexA =-1;\n" +	"					c->m_childIndexB =-1;\n" +	"                    switch (nReducedContacts)\n" +	"                    {\n" +	"                        case 4:\n" +	"                            c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" +	"                        case 3:\n" +	"                            c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" +	"                        case 2:\n" +	"                            c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" +	"                        case 1:\n" +	"							if (mprContactIndex<0)//test\n" +	"	                            c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" +	"                        default:\n" +	"                        {\n" +	"                        }\n" +	"                    };\n" +	"                    \n" +	"					GET_NPOINTS(*c) = nReducedContacts;\n" +	"                    \n" +	"                 }\n" +	"                 \n" +	"                \n" +	"//#endif\n" +	"				\n" +	"			}//		if (numContactsOut>0)\n" +	"		}//		if (hasSeparatingAxis[i])\n" +	"	}//	if (i<numPairs)\n" +	"    \n" +	"    \n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h index 611569cacf..a60702ca62 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h @@ -1,1457 +1,1456 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satConcaveKernelsCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define B3_MAX_STACK_DEPTH 256\n" -"typedef unsigned int u32;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes\n" -"	int	m_escapeIndexOrTriangleIndex;\n" -"} b3QuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -"	float4		m_aabbMin;\n" -"	float4		m_aabbMax;\n" -"	float4		m_quantization;\n" -"	int			m_numNodes;\n" -"	int			m_numSubTrees;\n" -"	int			m_nodeOffset;\n" -"	int			m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"	\n" -"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes, points to the root of the subtree\n" -"	int			m_rootNodeIndex;\n" -"	//4 bytes\n" -"	int			m_subtreeSize;\n" -"	int			m_padding[3];\n" -"} b3BvhSubtreeInfo;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct  \n" -"{\n" -"	float4		m_localCenter;\n" -"	float4		m_extents;\n" -"	float4		mC;\n" -"	float4		mE;\n" -"	\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -"	union\n" -"	{\n" -"		float m_min[4];\n" -"		b3Float4 m_minVec;\n" -"		int m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float	m_max[4];\n" -"		b3Float4 m_maxVec;\n" -"		int m_signedMaxIndices[4];\n" -"	};\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -"						b3Float4ConstArg pos,\n" -"						b3QuatConstArg orn,\n" -"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -"		b3Mat3x3 m;\n" -"		m = b3QuatGetRotationMatrix(orn);\n" -"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -"		\n" -"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -"										 0.f);\n" -"		*aabbMinOut = center-extent;\n" -"		*aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -"	bool overlap = true;\n" -"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -"	return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"#ifndef B3_INT2_H\n" -"#define B3_INT2_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define b3UnsignedInt2 uint2\n" -"#define b3Int2 int2\n" -"#define b3MakeInt2 (int2)\n" -"#endif //__cplusplus\n" -"#endif\n" -"typedef struct\n" -"{\n" -"	float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"} btGpuFace;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"	\n" -"//	float4 a1 = make_float4(a.xyz,0.f);\n" -"//	float4 b1 = make_float4(b.xyz,0.f);\n" -"//	return cross(a1,b1);\n" -"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//return c;\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	v = make_float4(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA,const float4 ornA,\n" -"	const float4 posB,const float4 ornB,\n" -"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" -"{\n" -"	float Min0,Max0;\n" -"	float Min1,Max1;\n" -"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" -"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" -"	if(Max0<Min1 || Max1<Min0)\n" -"		return false;\n" -"	float d0 = Max0 - Min1;\n" -"	float d1 = Max1 - Min0;\n" -"	*depth = d0<d1 ? d0:d1;\n" -"	return true;\n" -"}\n" -"inline bool IsAlmostZero(const float4 v)\n" -"{\n" -"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" -"		return false;\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS*=-1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* verticesA, \n" -"	__global const float4* uniqueEdgesA, \n" -"	__global const btGpuFace* facesA,\n" -"	__global const int*  indicesA,\n" -"	const float4* verticesB,\n" -"	const float4* uniqueEdgesB, \n" -"	const btGpuFace* facesB,\n" -"	const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS *= -1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"		float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test edges\n" -"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -"	{\n" -"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" -"		float4 edge0World = qtRotate(ornA,edge0);\n" -"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -"		{\n" -"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" -"			float4 edge1World = qtRotate(ornB,edge1);\n" -"			float4 crossje = cross3(edge0World,edge1World);\n" -"			curEdgeEdge++;\n" -"			if(!IsAlmostZero(crossje))\n" -"			{\n" -"				crossje = normalize3(crossje);\n" -"				if (dot3F4(DeltaC2,crossje)<0)\n" -"					crossje *= -1.f;\n" -"				float dist;\n" -"				bool result = true;\n" -"				{\n" -"					float Min0,Max0;\n" -"					float Min1,Max1;\n" -"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" -"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" -"				\n" -"					if(Max0<Min1 || Max1<Min0)\n" -"						result = false;\n" -"				\n" -"					float d0 = Max0 - Min1;\n" -"					float d1 = Max1 - Min0;\n" -"					dist = d0<d1 ? d0:d1;\n" -"					result = true;\n" -"				}\n" -"				\n" -"				if(dist<*dmin)\n" -"				{\n" -"					*dmin = dist;\n" -"					*sep = crossje;\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"inline int	findClippingFaces(const float4 separatingNormal,\n" -"                      const ConvexPolyhedronCL* hullA, \n" -"					  __global const ConvexPolyhedronCL* hullB,\n" -"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -"                       __global float4* worldVertsA1,\n" -"                      __global float4* worldNormalsA1,\n" -"                      __global float4* worldVertsB1,\n" -"                      int capacityWorldVerts,\n" -"                      const float minDist, float maxDist,\n" -"					  const float4* verticesA,\n" -"                      const btGpuFace* facesA,\n" -"                      const int* indicesA,\n" -"					  __global const float4* verticesB,\n" -"                      __global const btGpuFace* facesB,\n" -"                      __global const int* indicesB,\n" -"                      __global int4* clippingFaces, int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"    \n" -"    \n" -"	int closestFaceB=0;\n" -"	float dmax = -FLT_MAX;\n" -"    \n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" -"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"	{\n" -"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -"		int numVertices = polyB.m_numIndices;\n" -"        if (numVertices>capacityWorldVerts)\n" -"            numVertices = capacityWorldVerts;\n" -"        if (numVertices<0)\n" -"            numVertices = 0;\n" -"        \n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"            if (e0<capacityWorldVerts)\n" -"            {\n" -"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"            }\n" -"		}\n" -"	}\n" -"    \n" -"    int closestFaceA=0;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" -"                                              0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"            \n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"                worldNormalsA1[pairIndex] = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -"    if (numVerticesA>capacityWorldVerts)\n" -"       numVerticesA = capacityWorldVerts;\n" -"    if (numVerticesA<0)\n" -"        numVerticesA=0;\n" -"    \n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"        if (e0<capacityWorldVerts)\n" -"        {\n" -"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -"        }\n" -"    }\n" -"    \n" -"    clippingFaces[pairIndex].x = closestFaceA;\n" -"    clippingFaces[pairIndex].y = closestFaceB;\n" -"    clippingFaces[pairIndex].z = numVerticesA;\n" -"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" -"    \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" -"                                                __global const BodyData* rigidBodies,\n" -"                                                __global const btCollidableGpu* collidables,\n" -"                                                __global const ConvexPolyhedronCL* convexShapes,\n" -"                                                __global const float4* vertices,\n" -"                                                __global const float4* uniqueEdges,\n" -"                                                __global const btGpuFace* faces,\n" -"                                                __global const int* indices,\n" -"                                                __global const btGpuChildShape* gpuChildShapes,\n" -"                                                __global btAabbCL* aabbs,\n" -"                                                __global float4* concaveSeparatingNormalsOut,\n" -"                                                __global int* concaveHasSeparatingNormals,\n" -"                                                __global int4* clippingFacesOut,\n" -"                                                __global float4* worldVertsA1GPU,\n" -"                                                __global float4*  worldNormalsAGPU,\n" -"                                                __global float4* worldVertsB1GPU,\n" -"                                                __global float* dmins,\n" -"                                                int vertexFaceCapacity,\n" -"                                                int numConcavePairs\n" -"                                                )\n" -"{\n" -"    \n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"    \n" -"	concaveHasSeparatingNormals[i] = 0;\n" -"    \n" -"	int pairIdx = i;\n" -"    \n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"    \n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"    \n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"    \n" -"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" -"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"	{\n" -"		concavePairs[pairIdx].w = -1;\n" -"		return;\n" -"	}\n" -"    \n" -"    \n" -"    \n" -"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	int numActualConcaveConvexTests = 0;\n" -"	\n" -"	int f = concavePairs[i].z;\n" -"	\n" -"	bool overlap = false;\n" -"	\n" -"	ConvexPolyhedronCL convexPolyhedronA;\n" -"    \n" -"	//add 3 vertices of the triangle\n" -"	convexPolyhedronA.m_numVertices = 3;\n" -"	convexPolyhedronA.m_vertexOffset = 0;\n" -"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"    \n" -"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"	float4 triMinAabb, triMaxAabb;\n" -"	btAabbCL triAabb;\n" -"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -"	\n" -"	float4 verticesA[3];\n" -"	for (int i=0;i<3;i++)\n" -"	{\n" -"		int index = indices[face.m_indexOffset+i];\n" -"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"		verticesA[i] = vert;\n" -"		localCenter += vert;\n" -"        \n" -"		triAabb.m_min = min(triAabb.m_min,vert);\n" -"		triAabb.m_max = max(triAabb.m_max,vert);\n" -"        \n" -"	}\n" -"    \n" -"	overlap = true;\n" -"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -"    \n" -"	if (overlap)\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		int hasSeparatingAxis=5;\n" -"		float4 sepAxis=make_float4(1,2,3,4);\n" -"        \n" -"		int localCC=0;\n" -"		numActualConcaveConvexTests++;\n" -"        \n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"        \n" -"        \n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"        \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"        \n" -"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"        \n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"        \n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot(edgeNormal,v0);\n" -"                \n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"        \n" -"        \n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"        \n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"        \n" -"		\n" -"        \n" -"        \n" -"		///////////////////\n" -"		///compound shape support\n" -"        \n" -"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			int compoundChild = concavePairs[pairIdx].w;\n" -"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"		}\n" -"		//////////////////\n" -"        \n" -"		float4 c0local = convexPolyhedronA.m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"        \n" -"        \n" -"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"                                             posA,ornA,\n" -"                                             posB,ornB,\n" -"                                             DeltaC2,\n" -"                                             verticesA,uniqueEdgesA,facesA,indicesA,\n" -"                                             vertices,uniqueEdges,faces,indices,\n" -"                                             &sepAxis,&dmin);\n" -"		hasSeparatingAxis = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" -"                                                 posB,ornB,\n" -"                                                 posA,ornA,\n" -"                                                 DeltaC2,\n" -"                                                 vertices,uniqueEdges,faces,indices,\n" -"                                                 verticesA,uniqueEdgesA,facesA,indicesA,\n" -"                                                 &sepAxis,&dmin);\n" -"            \n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else\n" -"			{\n" -"				hasSeparatingAxis = 1;\n" -"			}\n" -"		}	\n" -"		\n" -"		if (hasSeparatingAxis)\n" -"		{\n" -"            dmins[i] = dmin;\n" -"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -"			concaveHasSeparatingNormals[i]=1;\n" -"            \n" -"		} else\n" -"		{	\n" -"			//mark this pair as in-active\n" -"			concavePairs[pairIdx].w = -1;\n" -"		}\n" -"	}\n" -"	else\n" -"	{	\n" -"		//mark this pair as in-active\n" -"		concavePairs[pairIdx].w = -1;\n" -"	}\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" -"                                                          __global const BodyData* rigidBodies,\n" -"                                                          __global const btCollidableGpu* collidables,\n" -"                                                          __global const ConvexPolyhedronCL* convexShapes,\n" -"                                                          __global const float4* vertices,\n" -"                                                          __global const float4* uniqueEdges,\n" -"                                                          __global const btGpuFace* faces,\n" -"                                                          __global const int* indices,\n" -"                                                          __global const btGpuChildShape* gpuChildShapes,\n" -"                                                          __global btAabbCL* aabbs,\n" -"                                                          __global float4* concaveSeparatingNormalsOut,\n" -"                                                          __global int* concaveHasSeparatingNormals,\n" -"                                                          __global int4* clippingFacesOut,\n" -"                                                          __global float4* worldVertsA1GPU,\n" -"                                                          __global float4*  worldNormalsAGPU,\n" -"                                                          __global float4* worldVertsB1GPU,\n" -"                                                          __global float* dmins,\n" -"                                                          int vertexFaceCapacity,\n" -"                                                          int numConcavePairs\n" -"                                                          )\n" -"{\n" -"    \n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"    \n" -"	if (!concaveHasSeparatingNormals[i])\n" -"        return;\n" -"    \n" -"	int pairIdx = i;\n" -"    \n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"    \n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"    \n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"    \n" -"    \n" -"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	int numActualConcaveConvexTests = 0;\n" -"	\n" -"	int f = concavePairs[i].z;\n" -"	\n" -"	bool overlap = false;\n" -"	\n" -"	ConvexPolyhedronCL convexPolyhedronA;\n" -"    \n" -"	//add 3 vertices of the triangle\n" -"	convexPolyhedronA.m_numVertices = 3;\n" -"	convexPolyhedronA.m_vertexOffset = 0;\n" -"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"    \n" -"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"	float4 triMinAabb, triMaxAabb;\n" -"	btAabbCL triAabb;\n" -"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -"	\n" -"	float4 verticesA[3];\n" -"	for (int i=0;i<3;i++)\n" -"	{\n" -"		int index = indices[face.m_indexOffset+i];\n" -"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"		verticesA[i] = vert;\n" -"		localCenter += vert;\n" -"        \n" -"		triAabb.m_min = min(triAabb.m_min,vert);\n" -"		triAabb.m_max = max(triAabb.m_max,vert);\n" -"        \n" -"	}\n" -"    \n" -"	overlap = true;\n" -"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -"    \n" -"	if (overlap)\n" -"	{\n" -"		float dmin = dmins[i];\n" -"		int hasSeparatingAxis=5;\n" -"		float4 sepAxis=make_float4(1,2,3,4);\n" -"        sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" -"        \n" -"		int localCC=0;\n" -"		numActualConcaveConvexTests++;\n" -"        \n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"        \n" -"        \n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"        \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"        \n" -"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"        \n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"        \n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot(edgeNormal,v0);\n" -"                \n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"        \n" -"        \n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"        \n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"        \n" -"		\n" -"        \n" -"        \n" -"		///////////////////\n" -"		///compound shape support\n" -"        \n" -"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			int compoundChild = concavePairs[pairIdx].w;\n" -"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"		}\n" -"		//////////////////\n" -"        \n" -"		float4 c0local = convexPolyhedronA.m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"        \n" -"        \n" -"		{\n" -"			bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"                                                              posA,ornA,\n" -"                                                              posB,ornB,\n" -"                                                              DeltaC2,\n" -"                                                              verticesA,uniqueEdgesA,facesA,indicesA,\n" -"                                                              vertices,uniqueEdges,faces,indices,\n" -"                                                              &sepAxis,&dmin);\n" -"                \n" -"			if (!sepEE)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else\n" -"			{\n" -"				hasSeparatingAxis = 1;\n" -"			}\n" -"		}\n" -"		\n" -"		\n" -"		if (hasSeparatingAxis)\n" -"		{\n" -"			sepAxis.w = dmin;\n" -"            dmins[i] = dmin;\n" -"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -"			concaveHasSeparatingNormals[i]=1;\n" -"           \n" -" 	float minDist = -1e30f;\n" -"			float maxDist = 0.02f;\n" -"            \n" -"            findClippingFaces(sepAxis,\n" -"                              &convexPolyhedronA,\n" -"                              &convexShapes[shapeIndexB],\n" -"                              posA,ornA,\n" -"                              posB,ornB,\n" -"                              worldVertsA1GPU,\n" -"                              worldNormalsAGPU,\n" -"                              worldVertsB1GPU,\n" -"                              vertexFaceCapacity,\n" -"                              minDist, maxDist,\n" -"                              verticesA,\n" -"                              facesA,\n" -"                              indicesA,\n" -"                              vertices,\n" -"                              faces,\n" -"                              indices,\n" -"                              clippingFacesOut, pairIdx);\n" -"	           \n" -"            \n" -"		} else\n" -"		{	\n" -"			//mark this pair as in-active\n" -"			concavePairs[pairIdx].w = -1;\n" -"		}\n" -"	}\n" -"	else\n" -"	{	\n" -"		//mark this pair as in-active\n" -"		concavePairs[pairIdx].w = -1;\n" -"	}\n" -"	\n" -"	concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" -"}\n" -; +static const char* satConcaveKernelsCL = +	"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +	"//written by Erwin Coumans\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define B3_MAX_STACK_DEPTH 256\n" +	"typedef unsigned int u32;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"#define MAX_NUM_PARTS_IN_BITS 10\n" +	"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +	"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes\n" +	"	int	m_escapeIndexOrTriangleIndex;\n" +	"} b3QuantizedBvhNode;\n" +	"typedef struct\n" +	"{\n" +	"	float4		m_aabbMin;\n" +	"	float4		m_aabbMax;\n" +	"	float4		m_quantization;\n" +	"	int			m_numNodes;\n" +	"	int			m_numSubTrees;\n" +	"	int			m_nodeOffset;\n" +	"	int			m_subTreeOffset;\n" +	"} b3BvhInfo;\n" +	"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"	\n" +	"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes, points to the root of the subtree\n" +	"	int			m_rootNodeIndex;\n" +	"	//4 bytes\n" +	"	int			m_subtreeSize;\n" +	"	int			m_padding[3];\n" +	"} b3BvhSubtreeInfo;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct  \n" +	"{\n" +	"	float4		m_localCenter;\n" +	"	float4		m_extents;\n" +	"	float4		mC;\n" +	"	float4		mE;\n" +	"	\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"} ConvexPolyhedronCL;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"#ifndef B3_AABB_H\n" +	"#define B3_AABB_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3Aabb b3Aabb_t;\n" +	"struct b3Aabb\n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float m_min[4];\n" +	"		b3Float4 m_minVec;\n" +	"		int m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float	m_max[4];\n" +	"		b3Float4 m_maxVec;\n" +	"		int m_signedMaxIndices[4];\n" +	"	};\n" +	"};\n" +	"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +	"						b3Float4ConstArg pos,\n" +	"						b3QuatConstArg orn,\n" +	"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +	"{\n" +	"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +	"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +	"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +	"		b3Mat3x3 m;\n" +	"		m = b3QuatGetRotationMatrix(orn);\n" +	"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +	"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +	"		\n" +	"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +	"										 0.f);\n" +	"		*aabbMinOut = center-extent;\n" +	"		*aabbMaxOut = center+extent;\n" +	"}\n" +	"/// conservative test for overlap between two aabbs\n" +	"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +	"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +	"{\n" +	"	bool overlap = true;\n" +	"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +	"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +	"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +	"	return overlap;\n" +	"}\n" +	"#endif //B3_AABB_H\n" +	"/*\n" +	"Bullet Continuous Collision Detection and Physics Library\n" +	"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" +	"This software is provided 'as-is', without any express or implied warranty.\n" +	"In no event will the authors be held liable for any damages arising from the use of this software.\n" +	"Permission is granted to anyone to use this software for any purpose,\n" +	"including commercial applications, and to alter it and redistribute it freely,\n" +	"subject to the following restrictions:\n" +	"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +	"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +	"3. This notice may not be removed or altered from any source distribution.\n" +	"*/\n" +	"#ifndef B3_INT2_H\n" +	"#define B3_INT2_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#define b3UnsignedInt2 uint2\n" +	"#define b3Int2 int2\n" +	"#define b3MakeInt2 (int2)\n" +	"#endif //__cplusplus\n" +	"#endif\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"} btGpuFace;\n" +	"#define make_float4 (float4)\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"	\n" +	"//	float4 a1 = make_float4(a.xyz,0.f);\n" +	"//	float4 b1 = make_float4(b.xyz,0.f);\n" +	"//	return cross(a1,b1);\n" +	"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//return c;\n" +	"}\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	v = make_float4(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA,const float4 ornA,\n" +	"	const float4 posB,const float4 ornB,\n" +	"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +	"{\n" +	"	float Min0,Max0;\n" +	"	float Min1,Max1;\n" +	"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +	"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +	"	if(Max0<Min1 || Max1<Min0)\n" +	"		return false;\n" +	"	float d0 = Max0 - Min1;\n" +	"	float d1 = Max1 - Min0;\n" +	"	*depth = d0<d1 ? d0:d1;\n" +	"	return true;\n" +	"}\n" +	"inline bool IsAlmostZero(const float4 v)\n" +	"{\n" +	"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS*=-1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* verticesA, \n" +	"	__global const float4* uniqueEdgesA, \n" +	"	__global const btGpuFace* facesA,\n" +	"	__global const int*  indicesA,\n" +	"	const float4* verticesB,\n" +	"	const float4* uniqueEdgesB, \n" +	"	const btGpuFace* facesB,\n" +	"	const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS *= -1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"		float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test edges\n" +	"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +	"	{\n" +	"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +	"		float4 edge0World = qtRotate(ornA,edge0);\n" +	"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +	"		{\n" +	"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +	"			float4 edge1World = qtRotate(ornB,edge1);\n" +	"			float4 crossje = cross3(edge0World,edge1World);\n" +	"			curEdgeEdge++;\n" +	"			if(!IsAlmostZero(crossje))\n" +	"			{\n" +	"				crossje = normalize3(crossje);\n" +	"				if (dot3F4(DeltaC2,crossje)<0)\n" +	"					crossje *= -1.f;\n" +	"				float dist;\n" +	"				bool result = true;\n" +	"				{\n" +	"					float Min0,Max0;\n" +	"					float Min1,Max1;\n" +	"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +	"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +	"				\n" +	"					if(Max0<Min1 || Max1<Min0)\n" +	"						result = false;\n" +	"				\n" +	"					float d0 = Max0 - Min1;\n" +	"					float d1 = Max1 - Min0;\n" +	"					dist = d0<d1 ? d0:d1;\n" +	"					result = true;\n" +	"				}\n" +	"				\n" +	"				if(dist<*dmin)\n" +	"				{\n" +	"					*dmin = dist;\n" +	"					*sep = crossje;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"inline int	findClippingFaces(const float4 separatingNormal,\n" +	"                      const ConvexPolyhedronCL* hullA, \n" +	"					  __global const ConvexPolyhedronCL* hullB,\n" +	"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +	"                       __global float4* worldVertsA1,\n" +	"                      __global float4* worldNormalsA1,\n" +	"                      __global float4* worldVertsB1,\n" +	"                      int capacityWorldVerts,\n" +	"                      const float minDist, float maxDist,\n" +	"					  const float4* verticesA,\n" +	"                      const btGpuFace* facesA,\n" +	"                      const int* indicesA,\n" +	"					  __global const float4* verticesB,\n" +	"                      __global const btGpuFace* facesB,\n" +	"                      __global const int* indicesB,\n" +	"                      __global int4* clippingFaces, int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"    \n" +	"    \n" +	"	int closestFaceB=0;\n" +	"	float dmax = -FLT_MAX;\n" +	"    \n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +	"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"	{\n" +	"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +	"		int numVertices = polyB.m_numIndices;\n" +	"        if (numVertices>capacityWorldVerts)\n" +	"            numVertices = capacityWorldVerts;\n" +	"        if (numVertices<0)\n" +	"            numVertices = 0;\n" +	"        \n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"            if (e0<capacityWorldVerts)\n" +	"            {\n" +	"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +	"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"            }\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int closestFaceA=0;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" +	"                                              0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"            \n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"                worldNormalsA1[pairIndex] = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +	"    if (numVerticesA>capacityWorldVerts)\n" +	"       numVerticesA = capacityWorldVerts;\n" +	"    if (numVerticesA<0)\n" +	"        numVerticesA=0;\n" +	"    \n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"        if (e0<capacityWorldVerts)\n" +	"        {\n" +	"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +	"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +	"        }\n" +	"    }\n" +	"    \n" +	"    clippingFaces[pairIndex].x = closestFaceA;\n" +	"    clippingFaces[pairIndex].y = closestFaceB;\n" +	"    clippingFaces[pairIndex].z = numVerticesA;\n" +	"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" +	"    \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" +	"                                                __global const BodyData* rigidBodies,\n" +	"                                                __global const btCollidableGpu* collidables,\n" +	"                                                __global const ConvexPolyhedronCL* convexShapes,\n" +	"                                                __global const float4* vertices,\n" +	"                                                __global const float4* uniqueEdges,\n" +	"                                                __global const btGpuFace* faces,\n" +	"                                                __global const int* indices,\n" +	"                                                __global const btGpuChildShape* gpuChildShapes,\n" +	"                                                __global btAabbCL* aabbs,\n" +	"                                                __global float4* concaveSeparatingNormalsOut,\n" +	"                                                __global int* concaveHasSeparatingNormals,\n" +	"                                                __global int4* clippingFacesOut,\n" +	"                                                __global float4* worldVertsA1GPU,\n" +	"                                                __global float4*  worldNormalsAGPU,\n" +	"                                                __global float4* worldVertsB1GPU,\n" +	"                                                __global float* dmins,\n" +	"                                                int vertexFaceCapacity,\n" +	"                                                int numConcavePairs\n" +	"                                                )\n" +	"{\n" +	"    \n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"    \n" +	"	concaveHasSeparatingNormals[i] = 0;\n" +	"    \n" +	"	int pairIdx = i;\n" +	"    \n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"    \n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"    \n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"    \n" +	"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" +	"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"	{\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"		return;\n" +	"	}\n" +	"    \n" +	"    \n" +	"    \n" +	"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	int numActualConcaveConvexTests = 0;\n" +	"	\n" +	"	int f = concavePairs[i].z;\n" +	"	\n" +	"	bool overlap = false;\n" +	"	\n" +	"	ConvexPolyhedronCL convexPolyhedronA;\n" +	"    \n" +	"	//add 3 vertices of the triangle\n" +	"	convexPolyhedronA.m_numVertices = 3;\n" +	"	convexPolyhedronA.m_vertexOffset = 0;\n" +	"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"    \n" +	"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"	float4 triMinAabb, triMaxAabb;\n" +	"	btAabbCL triAabb;\n" +	"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +	"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +	"	\n" +	"	float4 verticesA[3];\n" +	"	for (int i=0;i<3;i++)\n" +	"	{\n" +	"		int index = indices[face.m_indexOffset+i];\n" +	"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"		verticesA[i] = vert;\n" +	"		localCenter += vert;\n" +	"        \n" +	"		triAabb.m_min = min(triAabb.m_min,vert);\n" +	"		triAabb.m_max = max(triAabb.m_max,vert);\n" +	"        \n" +	"	}\n" +	"    \n" +	"	overlap = true;\n" +	"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +	"    \n" +	"	if (overlap)\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		int hasSeparatingAxis=5;\n" +	"		float4 sepAxis=make_float4(1,2,3,4);\n" +	"        \n" +	"		int localCC=0;\n" +	"		numActualConcaveConvexTests++;\n" +	"        \n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"        \n" +	"        \n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"        \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"        \n" +	"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"        \n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"        \n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot(edgeNormal,v0);\n" +	"                \n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"        \n" +	"        \n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"        \n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"        \n" +	"		\n" +	"        \n" +	"        \n" +	"		///////////////////\n" +	"		///compound shape support\n" +	"        \n" +	"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			int compoundChild = concavePairs[pairIdx].w;\n" +	"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"		}\n" +	"		//////////////////\n" +	"        \n" +	"		float4 c0local = convexPolyhedronA.m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"        \n" +	"        \n" +	"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"                                             posA,ornA,\n" +	"                                             posB,ornB,\n" +	"                                             DeltaC2,\n" +	"                                             verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"                                             vertices,uniqueEdges,faces,indices,\n" +	"                                             &sepAxis,&dmin);\n" +	"		hasSeparatingAxis = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" +	"                                                 posB,ornB,\n" +	"                                                 posA,ornA,\n" +	"                                                 DeltaC2,\n" +	"                                                 vertices,uniqueEdges,faces,indices,\n" +	"                                                 verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"                                                 &sepAxis,&dmin);\n" +	"            \n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else\n" +	"			{\n" +	"				hasSeparatingAxis = 1;\n" +	"			}\n" +	"		}	\n" +	"		\n" +	"		if (hasSeparatingAxis)\n" +	"		{\n" +	"            dmins[i] = dmin;\n" +	"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +	"			concaveHasSeparatingNormals[i]=1;\n" +	"            \n" +	"		} else\n" +	"		{	\n" +	"			//mark this pair as in-active\n" +	"			concavePairs[pairIdx].w = -1;\n" +	"		}\n" +	"	}\n" +	"	else\n" +	"	{	\n" +	"		//mark this pair as in-active\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"	}\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" +	"                                                          __global const BodyData* rigidBodies,\n" +	"                                                          __global const btCollidableGpu* collidables,\n" +	"                                                          __global const ConvexPolyhedronCL* convexShapes,\n" +	"                                                          __global const float4* vertices,\n" +	"                                                          __global const float4* uniqueEdges,\n" +	"                                                          __global const btGpuFace* faces,\n" +	"                                                          __global const int* indices,\n" +	"                                                          __global const btGpuChildShape* gpuChildShapes,\n" +	"                                                          __global btAabbCL* aabbs,\n" +	"                                                          __global float4* concaveSeparatingNormalsOut,\n" +	"                                                          __global int* concaveHasSeparatingNormals,\n" +	"                                                          __global int4* clippingFacesOut,\n" +	"                                                          __global float4* worldVertsA1GPU,\n" +	"                                                          __global float4*  worldNormalsAGPU,\n" +	"                                                          __global float4* worldVertsB1GPU,\n" +	"                                                          __global float* dmins,\n" +	"                                                          int vertexFaceCapacity,\n" +	"                                                          int numConcavePairs\n" +	"                                                          )\n" +	"{\n" +	"    \n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"    \n" +	"	if (!concaveHasSeparatingNormals[i])\n" +	"        return;\n" +	"    \n" +	"	int pairIdx = i;\n" +	"    \n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"    \n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"    \n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"    \n" +	"    \n" +	"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	int numActualConcaveConvexTests = 0;\n" +	"	\n" +	"	int f = concavePairs[i].z;\n" +	"	\n" +	"	bool overlap = false;\n" +	"	\n" +	"	ConvexPolyhedronCL convexPolyhedronA;\n" +	"    \n" +	"	//add 3 vertices of the triangle\n" +	"	convexPolyhedronA.m_numVertices = 3;\n" +	"	convexPolyhedronA.m_vertexOffset = 0;\n" +	"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"    \n" +	"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"	float4 triMinAabb, triMaxAabb;\n" +	"	btAabbCL triAabb;\n" +	"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +	"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +	"	\n" +	"	float4 verticesA[3];\n" +	"	for (int i=0;i<3;i++)\n" +	"	{\n" +	"		int index = indices[face.m_indexOffset+i];\n" +	"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"		verticesA[i] = vert;\n" +	"		localCenter += vert;\n" +	"        \n" +	"		triAabb.m_min = min(triAabb.m_min,vert);\n" +	"		triAabb.m_max = max(triAabb.m_max,vert);\n" +	"        \n" +	"	}\n" +	"    \n" +	"	overlap = true;\n" +	"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +	"    \n" +	"	if (overlap)\n" +	"	{\n" +	"		float dmin = dmins[i];\n" +	"		int hasSeparatingAxis=5;\n" +	"		float4 sepAxis=make_float4(1,2,3,4);\n" +	"        sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" +	"        \n" +	"		int localCC=0;\n" +	"		numActualConcaveConvexTests++;\n" +	"        \n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"        \n" +	"        \n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"        \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"        \n" +	"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"        \n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"        \n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot(edgeNormal,v0);\n" +	"                \n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"        \n" +	"        \n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"        \n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"        \n" +	"		\n" +	"        \n" +	"        \n" +	"		///////////////////\n" +	"		///compound shape support\n" +	"        \n" +	"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			int compoundChild = concavePairs[pairIdx].w;\n" +	"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"		}\n" +	"		//////////////////\n" +	"        \n" +	"		float4 c0local = convexPolyhedronA.m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"        \n" +	"        \n" +	"		{\n" +	"			bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"                                                              posA,ornA,\n" +	"                                                              posB,ornB,\n" +	"                                                              DeltaC2,\n" +	"                                                              verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"                                                              vertices,uniqueEdges,faces,indices,\n" +	"                                                              &sepAxis,&dmin);\n" +	"                \n" +	"			if (!sepEE)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else\n" +	"			{\n" +	"				hasSeparatingAxis = 1;\n" +	"			}\n" +	"		}\n" +	"		\n" +	"		\n" +	"		if (hasSeparatingAxis)\n" +	"		{\n" +	"			sepAxis.w = dmin;\n" +	"            dmins[i] = dmin;\n" +	"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +	"			concaveHasSeparatingNormals[i]=1;\n" +	"           \n" +	" 	float minDist = -1e30f;\n" +	"			float maxDist = 0.02f;\n" +	"            \n" +	"            findClippingFaces(sepAxis,\n" +	"                              &convexPolyhedronA,\n" +	"                              &convexShapes[shapeIndexB],\n" +	"                              posA,ornA,\n" +	"                              posB,ornB,\n" +	"                              worldVertsA1GPU,\n" +	"                              worldNormalsAGPU,\n" +	"                              worldVertsB1GPU,\n" +	"                              vertexFaceCapacity,\n" +	"                              minDist, maxDist,\n" +	"                              verticesA,\n" +	"                              facesA,\n" +	"                              indicesA,\n" +	"                              vertices,\n" +	"                              faces,\n" +	"                              indices,\n" +	"                              clippingFacesOut, pairIdx);\n" +	"	           \n" +	"            \n" +	"		} else\n" +	"		{	\n" +	"			//mark this pair as in-active\n" +	"			concavePairs[pairIdx].w = -1;\n" +	"		}\n" +	"	}\n" +	"	else\n" +	"	{	\n" +	"		//mark this pair as in-active\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"	}\n" +	"	\n" +	"	concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" +	"}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h index 6f8b0a90db..e627af2799 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -1,2104 +1,2103 @@  //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satKernelsCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define B3_MAX_STACK_DEPTH 256\n" -"typedef unsigned int u32;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -"	union {\n" -"		int m_numChildShapes;\n" -"		int m_bvhIndex;\n" -"	};\n" -"	union\n" -"	{\n" -"		float m_radius;\n" -"		int	m_compoundBvhIndex;\n" -"	};\n" -"	\n" -"	int m_shapeType;\n" -"	int m_shapeIndex;\n" -"	\n" -"} btCollidableGpu;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes\n" -"	int	m_escapeIndexOrTriangleIndex;\n" -"} b3QuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -"	float4		m_aabbMin;\n" -"	float4		m_aabbMax;\n" -"	float4		m_quantization;\n" -"	int			m_numNodes;\n" -"	int			m_numSubTrees;\n" -"	int			m_nodeOffset;\n" -"	int			m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	unsigned int x=0;\n" -"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -"	// Get only the lower bits where the triangle index is stored\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"	\n" -"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -"	//12 bytes\n" -"	unsigned short int	m_quantizedAabbMin[3];\n" -"	unsigned short int	m_quantizedAabbMax[3];\n" -"	//4 bytes, points to the root of the subtree\n" -"	int			m_rootNodeIndex;\n" -"	//4 bytes\n" -"	int			m_subtreeSize;\n" -"	int			m_padding[3];\n" -"} b3BvhSubtreeInfo;\n" -"typedef struct\n" -"{\n" -"	float4	m_childPosition;\n" -"	float4	m_childOrientation;\n" -"	int m_shapeIndex;\n" -"	int m_unused0;\n" -"	int m_unused1;\n" -"	int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -"	float4 m_pos;\n" -"	float4 m_quat;\n" -"	float4 m_linVel;\n" -"	float4 m_angVel;\n" -"	u32 m_collidableIdx;\n" -"	float m_invMass;\n" -"	float m_restituitionCoeff;\n" -"	float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct  \n" -"{\n" -"	float4		m_localCenter;\n" -"	float4		m_extents;\n" -"	float4		mC;\n" -"	float4		mE;\n" -"	\n" -"	float			m_radius;\n" -"	int	m_faceOffset;\n" -"	int m_numFaces;\n" -"	int	m_numVertices;\n" -"	int m_vertexOffset;\n" -"	int	m_uniqueEdgesOffset;\n" -"	int	m_numUniqueEdges;\n" -"	int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct \n" -"{\n" -"	union\n" -"	{\n" -"		float4	m_min;\n" -"		float   m_minElems[4];\n" -"		int			m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float4	m_max;\n" -"		float   m_maxElems[4];\n" -"		int			m_maxIndices[4];\n" -"	};\n" -"} btAabbCL;\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -"	int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Float4;\n" -"	#define b3Float4ConstArg const b3Float4\n" -"	#define b3MakeFloat4 (float4)\n" -"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return dot(a1, b1);\n" -"	}\n" -"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -"	{\n" -"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -"		return cross(a1, b1);\n" -"	}\n" -"	#define b3MinFloat4 min\n" -"	#define b3MaxFloat4 max\n" -"	#define b3Normalized(a) normalize(a)\n" -"#endif \n" -"		\n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" -"		return false;\n" -"	return true;\n" -"}\n" -"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -"    float maxDot = -B3_INFINITY;\n" -"    int i = 0;\n" -"    int ptIndex = -1;\n" -"    for( i = 0; i < vecLen; i++ )\n" -"    {\n" -"        float dot = b3Dot3F4(vecArray[i],vec);\n" -"            \n" -"        if( dot > maxDot )\n" -"        {\n" -"            maxDot = dot;\n" -"            ptIndex = i;\n" -"        }\n" -"    }\n" -"	b3Assert(ptIndex>=0);\n" -"    if (ptIndex<0)\n" -"	{\n" -"		ptIndex = 0;\n" -"	}\n" -"    *dotOut = maxDot;\n" -"    return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"	typedef float4	b3Quat;\n" -"	#define b3QuatConstArg const b3Quat\n" -"	\n" -"	\n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -"	v = (float4)(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"	\n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -"	b3Quat ans;\n" -"	ans = b3Cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -"	b3Quat q;\n" -"	q=in;\n" -"	//return b3FastNormalize4(in);\n" -"	float len = native_sqrt(dot(q, q));\n" -"	if(len > 0.f)\n" -"	{\n" -"		q *= 1.f / len;\n" -"	}\n" -"	else\n" -"	{\n" -"		q.x = q.y = q.z = 0.f;\n" -"		q.w = 1.f;\n" -"	}\n" -"	return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	b3Quat qInv = b3QuatInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -"	return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" -"{\n" -"	return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -"	\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -"	b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -"	out.m_row[0].w = 0.f;\n" -"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -"	out.m_row[1].w = 0.f;\n" -"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -"	out.m_row[2].w = 0.f;\n" -"	return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = fabs(matIn.m_row[0]);\n" -"	out.m_row[1] = fabs(matIn.m_row[1]);\n" -"	out.m_row[2] = fabs(matIn.m_row[2]);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(0.f);\n" -"	m.m_row[1] = (b3Float4)(0.f);\n" -"	m.m_row[2] = (b3Float4)(0.f);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -"	b3Mat3x3 m;\n" -"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" -"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" -"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" -"	return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -"	b3Mat3x3 out;\n" -"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -"	return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -"	b3Mat3x3 transB;\n" -"	transB = mtTranspose( b );\n" -"	b3Mat3x3 ans;\n" -"	//	why this doesn't run when 0ing in the for{}\n" -"	a.m_row[0].w = 0.f;\n" -"	a.m_row[1].w = 0.f;\n" -"	a.m_row[2].w = 0.f;\n" -"	for(int i=0; i<3; i++)\n" -"	{\n" -"//	a.m_row[i].w = 0.f;\n" -"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -"		ans.m_row[i].w = 0.f;\n" -"	}\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a.m_row[0], b );\n" -"	ans.y = b3Dot3F4( a.m_row[1], b );\n" -"	ans.z = b3Dot3F4( a.m_row[2], b );\n" -"	ans.w = 0.f;\n" -"	return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -"	b3Float4 ans;\n" -"	ans.x = b3Dot3F4( a, colx );\n" -"	ans.y = b3Dot3F4( a, coly );\n" -"	ans.z = b3Dot3F4( a, colz );\n" -"	return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -"	union\n" -"	{\n" -"		float m_min[4];\n" -"		b3Float4 m_minVec;\n" -"		int m_minIndices[4];\n" -"	};\n" -"	union\n" -"	{\n" -"		float	m_max[4];\n" -"		b3Float4 m_maxVec;\n" -"		int m_signedMaxIndices[4];\n" -"	};\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -"						b3Float4ConstArg pos,\n" -"						b3QuatConstArg orn,\n" -"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -"		b3Mat3x3 m;\n" -"		m = b3QuatGetRotationMatrix(orn);\n" -"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -"		\n" -"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -"										 0.f);\n" -"		*aabbMinOut = center-extent;\n" -"		*aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -"	bool overlap = true;\n" -"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -"	return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"#ifndef B3_INT2_H\n" -"#define B3_INT2_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define b3UnsignedInt2 uint2\n" -"#define b3Int2 int2\n" -"#define b3MakeInt2 (int2)\n" -"#endif //__cplusplus\n" -"#endif\n" -"typedef struct\n" -"{\n" -"	float4 m_plane;\n" -"	int m_indexOffset;\n" -"	int m_numIndices;\n" -"} btGpuFace;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -"	return cross(a,b);\n" -"	\n" -"//	float4 a1 = make_float4(a.xyz,0.f);\n" -"//	float4 b1 = make_float4(b.xyz,0.f);\n" -"//	return cross(a1,b1);\n" -"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" -"	\n" -"	//return c;\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -"	float4 a1 = make_float4(a.xyz,0.f);\n" -"	float4 b1 = make_float4(b.xyz,0.f);\n" -"	return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -"	v = make_float4(v.xyz,0.f);\n" -"	return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"//	Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -"	Quaternion ans;\n" -"	ans = cross3( a, b );\n" -"	ans += a.w*b+b.w*a;\n" -"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -"	ans.w = a.w*b.w - dot3F4(a, b);\n" -"	return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -"	return fastNormalize4(in);\n" -"//	in /= length( in );\n" -"//	return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -"	Quaternion qInv = qtInvert( q );\n" -"	float4 vcpy = vec;\n" -"	vcpy.w = 0.f;\n" -"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -"	return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -"	return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -"	return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -"	return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -"	return fastNormalize4( n );\n" -"}\n" -"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -"	min[0] = FLT_MAX;\n" -"	max[0] = -FLT_MAX;\n" -"	int numVerts = hull->m_numVertices;\n" -"	const float4 localDir = qtInvRotate(orn,*dir);\n" -"	float offset = dot(pos,*dir);\n" -"	for(int i=0;i<numVerts;i++)\n" -"	{\n" -"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -"		if(dp < min[0])	\n" -"			min[0] = dp;\n" -"		if(dp > max[0])	\n" -"			max[0] = dp;\n" -"	}\n" -"	if(min[0]>max[0])\n" -"	{\n" -"		float tmp = min[0];\n" -"		min[0] = max[0];\n" -"		max[0] = tmp;\n" -"	}\n" -"	min[0] += offset;\n" -"	max[0] += offset;\n" -"}\n" -"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA,const float4 ornA,\n" -"	const float4 posB,const float4 ornB,\n" -"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" -"{\n" -"	float Min0,Max0;\n" -"	float Min1,Max1;\n" -"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" -"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" -"	if(Max0<Min1 || Max1<Min0)\n" -"		return false;\n" -"	float d0 = Max0 - Min1;\n" -"	float d1 = Max1 - Min0;\n" -"	*depth = d0<d1 ? d0:d1;\n" -"	return true;\n" -"}\n" -"inline bool IsAlmostZero(const float4 v)\n" -"{\n" -"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" -"		return false;\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS*=-1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* verticesA, \n" -"	__global const float4* uniqueEdgesA, \n" -"	__global const btGpuFace* facesA,\n" -"	__global const int*  indicesA,\n" -"	const float4* verticesB,\n" -"	const float4* uniqueEdgesB, \n" -"	const btGpuFace* facesB,\n" -"	const int*  indicesB,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS *= -1.f;\n" -"			curPlaneTests++;\n" -"			float d;\n" -"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" -"				return false;\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	const float4* verticesA, \n" -"	const float4* uniqueEdgesA, \n" -"	const btGpuFace* facesA,\n" -"	const int*  indicesA,\n" -"	__global const float4* verticesB, \n" -"	__global const float4* uniqueEdgesB, \n" -"	__global const btGpuFace* facesB,\n" -"	__global const int*  indicesB,\n" -"		float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test edges\n" -"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -"	{\n" -"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" -"		float4 edge0World = qtRotate(ornA,edge0);\n" -"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -"		{\n" -"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" -"			float4 edge1World = qtRotate(ornB,edge1);\n" -"			float4 crossje = cross3(edge0World,edge1World);\n" -"			curEdgeEdge++;\n" -"			if(!IsAlmostZero(crossje))\n" -"			{\n" -"				crossje = normalize3(crossje);\n" -"				if (dot3F4(DeltaC2,crossje)<0)\n" -"					crossje *= -1.f;\n" -"				float dist;\n" -"				bool result = true;\n" -"				{\n" -"					float Min0,Max0;\n" -"					float Min1,Max1;\n" -"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" -"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" -"				\n" -"					if(Max0<Min1 || Max1<Min0)\n" -"						result = false;\n" -"				\n" -"					float d0 = Max0 - Min1;\n" -"					float d1 = Max1 - Min0;\n" -"					dist = d0<d1 ? d0:d1;\n" -"					result = true;\n" -"				}\n" -"				\n" -"				if(dist<*dmin)\n" -"				{\n" -"					*dmin = dist;\n" -"					*sep = crossje;\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA,const float4 ornA,\n" -"	const float4 posB,const float4 ornB,\n" -"	float4* sep_axis, __global const float4* vertices,float* depth)\n" -"{\n" -"	float Min0,Max0;\n" -"	float Min1,Max1;\n" -"	project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" -"	project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" -"	if(Max0<Min1 || Max1<Min0)\n" -"		return false;\n" -"	float d0 = Max0 - Min1;\n" -"	float d1 = Max1 - Min0;\n" -"	*depth = d0<d1 ? d0:d1;\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxis(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices, \n" -"	__global const float4* uniqueEdges, \n" -"	__global const btGpuFace* faces,\n" -"	__global const int*  indices,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	\n" -"	int curPlaneTests=0;\n" -"	{\n" -"		int numFacesA = hullA->m_numFaces;\n" -"		// Test normals from hullA\n" -"		for(int i=0;i<numFacesA;i++)\n" -"		{\n" -"			const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" -"			float4 faceANormalWS = qtRotate(ornA,normal);\n" -"	\n" -"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -"				faceANormalWS*=-1.f;\n" -"				\n" -"			curPlaneTests++;\n" -"	\n" -"			float d;\n" -"			if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" -"				return false;\n" -"	\n" -"			if(d<*dmin)\n" -"			{\n" -"				*dmin = d;\n" -"				*sep = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"		if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"		{\n" -"			*sep = -(*sep);\n" -"		}\n" -"	\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisUnitSphere(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices,\n" -"	__global const float4* unitSphereDirections,\n" -"	int numUnitSphereDirections,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test unit sphere directions\n" -"	for (int i=0;i<numUnitSphereDirections;i++)\n" -"	{\n" -"		float4 crossje;\n" -"		crossje = unitSphereDirections[i];	\n" -"		if (dot3F4(DeltaC2,crossje)>0)\n" -"			crossje *= -1.f;\n" -"		{\n" -"			float dist;\n" -"			bool result = true;\n" -"			float Min0,Max0;\n" -"			float Min1,Max1;\n" -"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -"		\n" -"			if(Max0<Min1 || Max1<Min0)\n" -"				return false;\n" -"		\n" -"			float d0 = Max0 - Min1;\n" -"			float d1 = Max1 - Min0;\n" -"			dist = d0<d1 ? d0:d1;\n" -"			result = true;\n" -"	\n" -"			if(dist<*dmin)\n" -"			{\n" -"				*dmin = dist;\n" -"				*sep = crossje;\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdge(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -"	const float4 posA1,\n" -"	const float4 ornA,\n" -"	const float4 posB1,\n" -"	const float4 ornB,\n" -"	const float4 DeltaC2,\n" -"	__global const float4* vertices, \n" -"	__global const float4* uniqueEdges, \n" -"	__global const btGpuFace* faces,\n" -"	__global const int*  indices,\n" -"	float4* sep,\n" -"	float* dmin)\n" -"{\n" -"	\n" -"	float4 posA = posA1;\n" -"	posA.w = 0.f;\n" -"	float4 posB = posB1;\n" -"	posB.w = 0.f;\n" -"	int curPlaneTests=0;\n" -"	int curEdgeEdge = 0;\n" -"	// Test edges\n" -"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -"	{\n" -"		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" -"		float4 edge0World = qtRotate(ornA,edge0);\n" -"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -"		{\n" -"			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" -"			float4 edge1World = qtRotate(ornB,edge1);\n" -"			float4 crossje = cross3(edge0World,edge1World);\n" -"			curEdgeEdge++;\n" -"			if(!IsAlmostZero(crossje))\n" -"			{\n" -"				crossje = normalize3(crossje);\n" -"				if (dot3F4(DeltaC2,crossje)<0)\n" -"					crossje*=-1.f;\n" -"					\n" -"				float dist;\n" -"				bool result = true;\n" -"				{\n" -"					float Min0,Max0;\n" -"					float Min1,Max1;\n" -"					project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -"					project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -"				\n" -"					if(Max0<Min1 || Max1<Min0)\n" -"						return false;\n" -"				\n" -"					float d0 = Max0 - Min1;\n" -"					float d1 = Max1 - Min0;\n" -"					dist = d0<d1 ? d0:d1;\n" -"					result = true;\n" -"				}\n" -"				\n" -"				if(dist<*dmin)\n" -"				{\n" -"					*dmin = dist;\n" -"					*sep = crossje;\n" -"				}\n" -"			}\n" -"		}\n" -"	}\n" -"	\n" -"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -"	{\n" -"		*sep = -(*sep);\n" -"	}\n" -"	return true;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global const btGpuChildShape* gpuChildShapes,\n" -"																					__global volatile float4* gpuCompoundSepNormalsOut,\n" -"																					__global volatile int* gpuHasCompoundSepNormalsOut,\n" -"																					int numCompoundPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i<numCompoundPairs)\n" -"	{\n" -"		int bodyIndexA = gpuCompoundPairs[i].x;\n" -"		int bodyIndexB = gpuCompoundPairs[i].y;\n" -"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" -"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" -"		\n" -"		int collidableIndexA = -1;\n" -"		int collidableIndexB = -1;\n" -"		\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		\n" -"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							\n" -"		if (childShapeIndexA >= 0)\n" -"		{\n" -"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"			float4 newOrnA = qtMul(ornA,childOrnA);\n" -"			posA = newPosA;\n" -"			ornA = newOrnA;\n" -"		} else\n" -"		{\n" -"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		}\n" -"		\n" -"		if (childShapeIndexB>=0)\n" -"		{\n" -"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"		} else\n" -"		{\n" -"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" -"		}\n" -"	\n" -"		gpuHasCompoundSepNormalsOut[i] = 0;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	\n" -"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" -"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -"	\n" -"		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" -"		{\n" -"			return;\n" -"		}\n" -"		int hasSeparatingAxis = 5;\n" -"							\n" -"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"		float dmin = FLT_MAX;\n" -"		posA.w = 0.f;\n" -"		posB.w = 0.f;\n" -"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		float4 sepNormal = make_float4(1,0,0,0);\n" -"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -"		hasSeparatingAxis = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else//(!sepB)\n" -"			{\n" -"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -"				if (sepEE)\n" -"				{\n" -"						gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" -"						gpuHasCompoundSepNormalsOut[i] = 1;\n" -"				}//sepEE\n" -"			}//(!sepB)\n" -"		}//(!sepA)\n" -"		\n" -"		\n" -"	}\n" -"		\n" -"}\n" -"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" -"{\n" -"		b3Float4 vecOut;\n" -"		vecOut = b3MakeFloat4(\n" -"			(float)(vecIn[0]) / (quantization.x),\n" -"			(float)(vecIn[1]) / (quantization.y),\n" -"			(float)(vecIn[2]) / (quantization.z),\n" -"			0.f);\n" -"		vecOut += bvhAabbMin;\n" -"		return vecOut;\n" -"}\n" -"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" -"{\n" -"		b3Float4 vecOut;\n" -"		vecOut = b3MakeFloat4(\n" -"			(float)(vecIn[0]) / (quantization.x),\n" -"			(float)(vecIn[1]) / (quantization.y),\n" -"			(float)(vecIn[2]) / (quantization.z),\n" -"			0.f);\n" -"		vecOut += bvhAabbMin;\n" -"		return vecOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findCompoundPairsKernel( __global const int4* pairs, \n" -"	__global const BodyData* rigidBodies, \n" -"	__global const btCollidableGpu* collidables,\n" -"	__global const ConvexPolyhedronCL* convexShapes, \n" -"	__global const float4* vertices,\n" -"	__global const float4* uniqueEdges,\n" -"	__global const btGpuFace* faces,\n" -"	__global const int* indices,\n" -"	__global b3Aabb_t* aabbLocalSpace,\n" -"	__global const btGpuChildShape* gpuChildShapes,\n" -"	__global volatile int4* gpuCompoundPairsOut,\n" -"	__global volatile int* numCompoundPairsOut,\n" -"	__global const b3BvhSubtreeInfo* subtrees,\n" -"	__global const b3QuantizedBvhNode* quantizedNodes,\n" -"	__global const b3BvhInfo* bvhInfos,\n" -"	int numPairs,\n" -"	int maxNumCompoundPairsCapacity\n" -"	)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i<numPairs)\n" -"	{\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			return;\n" -"		}\n" -"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"		{\n" -"			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" -"			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" -"			int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" -"			int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" -"			int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" -"			int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" -"			\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			\n" -"			for (int p=0;p<numSubTreesA;p++)\n" -"			{\n" -"				b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" -"				//bvhInfos[bvhA].m_quantization\n" -"				b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"				b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"				b3Float4 aabbAMinOut,aabbAMaxOut;\n" -"				float margin=0.f;\n" -"				b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" -"				\n" -"				for (int q=0;q<numSubTreesB;q++)\n" -"				{\n" -"					b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" -"					b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"					b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"					b3Float4 aabbBMinOut,aabbBMaxOut;\n" -"					float margin=0.f;\n" -"					b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" -"					\n" -"					\n" -"					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" -"					if (aabbOverlap)\n" -"					{\n" -"						\n" -"						int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" -"						int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" -"						int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" -"						int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" -"						b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" -"						b3Int2 node0;\n" -"						node0.x = startNodeIndexA;\n" -"						node0.y = startNodeIndexB;\n" -"						int maxStackDepth = B3_MAX_STACK_DEPTH;\n" -"						int depth=0;\n" -"						nodeStack[depth++]=node0;\n" -"						do\n" -"						{\n" -"							b3Int2 node = nodeStack[--depth];\n" -"							b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"							b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -"							b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"							b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -"							float margin=0.f;\n" -"							b3Float4 aabbAMinOut,aabbAMaxOut;\n" -"							b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" -"							b3Float4 aabbBMinOut,aabbBMaxOut;\n" -"							b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" -"							\n" -"							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" -"							if (nodeOverlap)\n" -"							{\n" -"								bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" -"								bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" -"								bool isInternalA = !isLeafA;\n" -"								bool isInternalB = !isLeafB;\n" -"								//fail, even though it might hit two leaf nodes\n" -"								if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" -"								{\n" -"									//printf(\"Error: traversal exceeded maxStackDepth\");\n" -"									continue;\n" -"								}\n" -"								if(isInternalA)\n" -"								{\n" -"									int nodeAleftChild = node.x+1;\n" -"									bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" -"									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" -"									if(isInternalB)\n" -"									{					\n" -"										int nodeBleftChild = node.y+1;\n" -"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" -"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" -"									}\n" -"									else\n" -"									{\n" -"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" -"										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" -"									}\n" -"								}\n" -"								else\n" -"								{\n" -"									if(isInternalB)\n" -"									{\n" -"										int nodeBleftChild = node.y+1;\n" -"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" -"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" -"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" -"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" -"									}\n" -"									else\n" -"									{\n" -"										int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"										if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"										{\n" -"											int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" -"											int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" -"											gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" -"										}\n" -"									}\n" -"								}\n" -"							}\n" -"						} while (depth);\n" -"					}\n" -"				}\n" -"			}\n" -"			\n" -"			return;\n" -"		}\n" -"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"		{\n" -"			if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" -"			{\n" -"				int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" -"				for (int c=0;c<numChildrenA;c++)\n" -"				{\n" -"					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" -"					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -"					float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"					float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -"					float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -"					float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -"					float4 newOrnA = qtMul(ornA,childOrnA);\n" -"					int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" -"					b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" -"					float margin = 0.f;\n" -"					\n" -"					b3Float4 aabbAMinWS;\n" -"					b3Float4 aabbAMaxWS;\n" -"					\n" -"					b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" -"						newPosA,\n" -"						newOrnA,\n" -"						&aabbAMinWS,&aabbAMaxWS);\n" -"						\n" -"					\n" -"					if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"					{\n" -"						int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -"						for (int b=0;b<numChildrenB;b++)\n" -"						{\n" -"							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -"							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"							float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"							float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"							float4 newOrnB = qtMul(ornB,childOrnB);\n" -"							int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"							b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" -"							\n" -"							b3Float4 aabbBMinWS;\n" -"							b3Float4 aabbBMaxWS;\n" -"							\n" -"							b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" -"								newPosB,\n" -"								newOrnB,\n" -"								&aabbBMinWS,&aabbBMaxWS);\n" -"								\n" -"								\n" -"							\n" -"							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" -"							if (aabbOverlap)\n" -"							{\n" -"								int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"								float dmin = FLT_MAX;\n" -"								float4 posA = newPosA;\n" -"								posA.w = 0.f;\n" -"								float4 posB = newPosB;\n" -"								posB.w = 0.f;\n" -"								float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"								float4 ornA = newOrnA;\n" -"								float4 c0 = transform(&c0local, &posA, &ornA);\n" -"								float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"								float4 ornB =newOrnB;\n" -"								float4 c1 = transform(&c1local,&posB,&ornB);\n" -"								const float4 DeltaC2 = c0 - c1;\n" -"								{//\n" -"									int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"									if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"									{\n" -"										gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" -"									}\n" -"								}//\n" -"							}//fi(1)\n" -"						} //for (int b=0\n" -"					}//if (collidables[collidableIndexB].\n" -"					else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"					{\n" -"						if (1)\n" -"						{\n" -"							int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"							float dmin = FLT_MAX;\n" -"							float4 posA = newPosA;\n" -"							posA.w = 0.f;\n" -"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"							posB.w = 0.f;\n" -"							float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"							float4 ornA = newOrnA;\n" -"							float4 c0 = transform(&c0local, &posA, &ornA);\n" -"							float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"							float4 c1 = transform(&c1local,&posB,&ornB);\n" -"							const float4 DeltaC2 = c0 - c1;\n" -"							{\n" -"								int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"								if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"								{\n" -"									gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" -"								}//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"							}//\n" -"						}//fi (1)\n" -"					}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"				}//for (int b=0;b<numChildrenB;b++)	\n" -"				return;\n" -"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"			if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" -"				&& (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"			{\n" -"				int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -"				for (int b=0;b<numChildrenB;b++)\n" -"				{\n" -"					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -"					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"					float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -"					float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"					float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"					float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" -"					float4 newOrnB = qtMul(ornB,childOrnB);\n" -"					int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"					//////////////////////////////////////\n" -"					if (1)\n" -"					{\n" -"						int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"						float dmin = FLT_MAX;\n" -"						float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"						posA.w = 0.f;\n" -"						float4 posB = newPosB;\n" -"						posB.w = 0.f;\n" -"						float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"						float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"						float4 c0 = transform(&c0local, &posA, &ornA);\n" -"						float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"						float4 ornB =newOrnB;\n" -"						float4 c1 = transform(&c1local,&posB,&ornB);\n" -"						const float4 DeltaC2 = c0 - c1;\n" -"						{//\n" -"							int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -"							if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"							{\n" -"								gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" -"							}//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -"						}//\n" -"					}//fi (1)	\n" -"				}//for (int b=0;b<numChildrenB;b++)\n" -"				return;\n" -"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"			return;\n" -"		}//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -"	}//i<numPairs\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findSeparatingAxisKernel( __global const int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global volatile float4* separatingNormals,\n" -"																					__global volatile int* hasSeparatingAxis,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"	\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"		\n" -"		\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"			return;\n" -"		}\n" -"		\n" -"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"			return;\n" -"		}\n" -"			\n" -"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"			return;\n" -"		}\n" -"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"		float dmin = FLT_MAX;\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		float4 sepNormal;\n" -"		\n" -"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																								posB,ornB,\n" -"																								DeltaC2,\n" -"																								vertices,uniqueEdges,faces,\n" -"																								indices,&sepNormal,&dmin);\n" -"		hasSeparatingAxis[i] = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" -"																									posA,ornA,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis[i] = 0;\n" -"			} else\n" -"			{\n" -"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																									posB,ornB,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"				if (!sepEE)\n" -"				{\n" -"					hasSeparatingAxis[i] = 0;\n" -"				} else\n" -"				{\n" -"					hasSeparatingAxis[i] = 1;\n" -"					separatingNormals[i] = sepNormal;\n" -"				}\n" -"			}\n" -"		}\n" -"		\n" -"	}\n" -"}\n" -"__kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global volatile float4* separatingNormals,\n" -"																					__global volatile int* hasSeparatingAxis,\n" -"																					__global  float* dmins,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"	\n" -"		int bodyIndexA = pairs[i].x;\n" -"		int bodyIndexB = pairs[i].y;\n" -"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	\n" -"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	\n" -"		hasSeparatingAxis[i] = 0;	\n" -"		\n" -"		//once the broadphase avoids static-static pairs, we can remove this test\n" -"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -"		{\n" -"			return;\n" -"		}\n" -"		\n" -"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -"		{\n" -"			return;\n" -"		}\n" -"			\n" -"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"		float dmin = FLT_MAX;\n" -"		dmins[i] = dmin;\n" -"		\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		float4 sepNormal;\n" -"		\n" -"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																								posB,ornB,\n" -"																								DeltaC2,\n" -"																								vertices,uniqueEdges,faces,\n" -"																								indices,&sepNormal,&dmin);\n" -"		hasSeparatingAxis[i] = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis[i] = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" -"																									posA,ornA,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"			if (sepB)\n" -"			{\n" -"				dmins[i] = dmin;\n" -"				hasSeparatingAxis[i] = 1;\n" -"				separatingNormals[i] = sepNormal;\n" -"			}\n" -"		}\n" -"		\n" -"	}\n" -"}\n" -"__kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" -"																					__global const BodyData* rigidBodies, \n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global  float4* separatingNormals,\n" -"																					__global  int* hasSeparatingAxis,\n" -"																					__global  float* dmins,\n" -"																					__global const float4* unitSphereDirections,\n" -"																					int numUnitSphereDirections,\n" -"																					int numPairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	\n" -"	if (i<numPairs)\n" -"	{\n" -"		if (hasSeparatingAxis[i])\n" -"		{\n" -"	\n" -"			int bodyIndexA = pairs[i].x;\n" -"			int bodyIndexB = pairs[i].y;\n" -"	\n" -"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"		\n" -"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"			\n" -"			\n" -"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	\n" -"			float dmin = dmins[i];\n" -"	\n" -"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"			posA.w = 0.f;\n" -"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"			posB.w = 0.f;\n" -"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"			float4 c0 = transform(&c0local, &posA, &ornA);\n" -"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"			float4 c1 = transform(&c1local,&posB,&ornB);\n" -"			const float4 DeltaC2 = c0 - c1;\n" -"			float4 sepNormal = separatingNormals[i];\n" -"			\n" -"			\n" -"			\n" -"			bool sepEE = false;\n" -"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" -"			if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" -"			{\n" -"				sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																									posB,ornB,\n" -"																									DeltaC2,\n" -"																									vertices,uniqueEdges,faces,\n" -"																									indices,&sepNormal,&dmin);\n" -"																									\n" -"					if (!sepEE)\n" -"					{\n" -"						hasSeparatingAxis[i] = 0;\n" -"					} else\n" -"					{\n" -"						hasSeparatingAxis[i] = 1;\n" -"						separatingNormals[i] = sepNormal;\n" -"					}\n" -"			}\n" -"			/*\n" -"			///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" -"			else\n" -"			{\n" -"				sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -"																									posB,ornB,\n" -"																									DeltaC2,\n" -"																									vertices,unitSphereDirections,numUnitSphereDirections,\n" -"																									&sepNormal,&dmin);\n" -"					if (!sepEE)\n" -"					{\n" -"						hasSeparatingAxis[i] = 0;\n" -"					} else\n" -"					{\n" -"						hasSeparatingAxis[i] = 1;\n" -"						separatingNormals[i] = sepNormal;\n" -"					}\n" -"			}\n" -"			*/\n" -"		}		//if (hasSeparatingAxis[i])\n" -"	}//(i<numPairs)\n" -"}\n" -"inline int	findClippingFaces(const float4 separatingNormal,\n" -"                      const ConvexPolyhedronCL* hullA, \n" -"					  __global const ConvexPolyhedronCL* hullB,\n" -"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -"                       __global float4* worldVertsA1,\n" -"                      __global float4* worldNormalsA1,\n" -"                      __global float4* worldVertsB1,\n" -"                      int capacityWorldVerts,\n" -"                      const float minDist, float maxDist,\n" -"					  const float4* verticesA,\n" -"                      const btGpuFace* facesA,\n" -"                      const int* indicesA,\n" -"					  __global const float4* verticesB,\n" -"                      __global const btGpuFace* facesB,\n" -"                      __global const int* indicesB,\n" -"                      __global int4* clippingFaces, int pairIndex)\n" -"{\n" -"	int numContactsOut = 0;\n" -"	int numWorldVertsB1= 0;\n" -"    \n" -"    \n" -"	int closestFaceB=0;\n" -"	float dmax = -FLT_MAX;\n" -"    \n" -"	{\n" -"		for(int face=0;face<hullB->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" -"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" -"			float d = dot3F4(WorldNormal,separatingNormal);\n" -"			if (d > dmax)\n" -"			{\n" -"				dmax = d;\n" -"				closestFaceB = face;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"	{\n" -"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -"		int numVertices = polyB.m_numIndices;\n" -"        if (numVertices>capacityWorldVerts)\n" -"            numVertices = capacityWorldVerts;\n" -"        \n" -"		for(int e0=0;e0<numVertices;e0++)\n" -"		{\n" -"            if (e0<capacityWorldVerts)\n" -"            {\n" -"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -"            }\n" -"		}\n" -"	}\n" -"    \n" -"    int closestFaceA=0;\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		for(int face=0;face<hullA->m_numFaces;face++)\n" -"		{\n" -"			const float4 Normal = make_float4(\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" -"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" -"                                              0.f);\n" -"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -"            \n" -"			float d = dot3F4(faceANormalWS,separatingNormal);\n" -"			if (d < dmin)\n" -"			{\n" -"				dmin = d;\n" -"				closestFaceA = face;\n" -"                worldNormalsA1[pairIndex] = faceANormalWS;\n" -"			}\n" -"		}\n" -"	}\n" -"    \n" -"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -"    if (numVerticesA>capacityWorldVerts)\n" -"       numVerticesA = capacityWorldVerts;\n" -"    \n" -"	for(int e0=0;e0<numVerticesA;e0++)\n" -"	{\n" -"        if (e0<capacityWorldVerts)\n" -"        {\n" -"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -"        }\n" -"    }\n" -"    \n" -"    clippingFaces[pairIndex].x = closestFaceA;\n" -"    clippingFaces[pairIndex].y = closestFaceB;\n" -"    clippingFaces[pairIndex].z = numVerticesA;\n" -"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" -"    \n" -"    \n" -"	return numContactsOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" -"																					__global const BodyData* rigidBodies,\n" -"																					__global const btCollidableGpu* collidables,\n" -"																					__global const ConvexPolyhedronCL* convexShapes, \n" -"																					__global const float4* vertices,\n" -"																					__global const float4* uniqueEdges,\n" -"																					__global const btGpuFace* faces,\n" -"																					__global const int* indices,\n" -"																					__global const btGpuChildShape* gpuChildShapes,\n" -"																					__global btAabbCL* aabbs,\n" -"																					__global float4* concaveSeparatingNormalsOut,\n" -"																					__global int* concaveHasSeparatingNormals,\n" -"																					__global int4* clippingFacesOut,\n" -"																					__global float4* worldVertsA1GPU,\n" -"																					__global float4*  worldNormalsAGPU,\n" -"																					__global float4* worldVertsB1GPU,\n" -"																					int vertexFaceCapacity,\n" -"																					int numConcavePairs\n" -"																					)\n" -"{\n" -"	int i = get_global_id(0);\n" -"	if (i>=numConcavePairs)\n" -"		return;\n" -"	concaveHasSeparatingNormals[i] = 0;\n" -"	int pairIdx = i;\n" -"	int bodyIndexA = concavePairs[i].x;\n" -"	int bodyIndexB = concavePairs[i].y;\n" -"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" -"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"	{\n" -"		concavePairs[pairIdx].w = -1;\n" -"		return;\n" -"	}\n" -"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -"	int numActualConcaveConvexTests = 0;\n" -"	\n" -"	int f = concavePairs[i].z;\n" -"	\n" -"	bool overlap = false;\n" -"	\n" -"	ConvexPolyhedronCL convexPolyhedronA;\n" -"	//add 3 vertices of the triangle\n" -"	convexPolyhedronA.m_numVertices = 3;\n" -"	convexPolyhedronA.m_vertexOffset = 0;\n" -"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -"	float4 triMinAabb, triMaxAabb;\n" -"	btAabbCL triAabb;\n" -"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -"	\n" -"	float4 verticesA[3];\n" -"	for (int i=0;i<3;i++)\n" -"	{\n" -"		int index = indices[face.m_indexOffset+i];\n" -"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -"		verticesA[i] = vert;\n" -"		localCenter += vert;\n" -"			\n" -"		triAabb.m_min = min(triAabb.m_min,vert);		\n" -"		triAabb.m_max = max(triAabb.m_max,vert);		\n" -"	}\n" -"	overlap = true;\n" -"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -"		\n" -"	if (overlap)\n" -"	{\n" -"		float dmin = FLT_MAX;\n" -"		int hasSeparatingAxis=5;\n" -"		float4 sepAxis=make_float4(1,2,3,4);\n" -"		int localCC=0;\n" -"		numActualConcaveConvexTests++;\n" -"		//a triangle has 3 unique edges\n" -"		convexPolyhedronA.m_numUniqueEdges = 3;\n" -"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -"		float4 uniqueEdgesA[3];\n" -"		\n" -"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -"		convexPolyhedronA.m_faceOffset = 0;\n" -"                                  \n" -"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -"                             \n" -"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -"		int indicesA[3+3+2+2+2];\n" -"		int curUsedIndices=0;\n" -"		int fidx=0;\n" -"		//front size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[0] = 0;\n" -"			indicesA[1] = 1;\n" -"			indicesA[2] = 2;\n" -"			curUsedIndices+=3;\n" -"			float c = face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = normal.x;\n" -"			facesA[fidx].m_plane.y = normal.y;\n" -"			facesA[fidx].m_plane.z = normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		//back size of triangle\n" -"		{\n" -"			facesA[fidx].m_indexOffset=curUsedIndices;\n" -"			indicesA[3]=2;\n" -"			indicesA[4]=1;\n" -"			indicesA[5]=0;\n" -"			curUsedIndices+=3;\n" -"			float c = dot(normal,verticesA[0]);\n" -"			float c1 = -face.m_plane.w;\n" -"			facesA[fidx].m_plane.x = -normal.x;\n" -"			facesA[fidx].m_plane.y = -normal.y;\n" -"			facesA[fidx].m_plane.z = -normal.z;\n" -"			facesA[fidx].m_plane.w = c;\n" -"			facesA[fidx].m_numIndices=3;\n" -"		}\n" -"		fidx++;\n" -"		bool addEdgePlanes = true;\n" -"		if (addEdgePlanes)\n" -"		{\n" -"			int numVertices=3;\n" -"			int prevVertex = numVertices-1;\n" -"			for (int i=0;i<numVertices;i++)\n" -"			{\n" -"				float4 v0 = verticesA[i];\n" -"				float4 v1 = verticesA[prevVertex];\n" -"                                            \n" -"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -"				float c = -dot(edgeNormal,v0);\n" -"				facesA[fidx].m_numIndices = 2;\n" -"				facesA[fidx].m_indexOffset=curUsedIndices;\n" -"				indicesA[curUsedIndices++]=i;\n" -"				indicesA[curUsedIndices++]=prevVertex;\n" -"                                            \n" -"				facesA[fidx].m_plane.x = edgeNormal.x;\n" -"				facesA[fidx].m_plane.y = edgeNormal.y;\n" -"				facesA[fidx].m_plane.z = edgeNormal.z;\n" -"				facesA[fidx].m_plane.w = c;\n" -"				fidx++;\n" -"				prevVertex = i;\n" -"			}\n" -"		}\n" -"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -"		posA.w = 0.f;\n" -"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -"		posB.w = 0.f;\n" -"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -"		\n" -"		///////////////////\n" -"		///compound shape support\n" -"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -"		{\n" -"			int compoundChild = concavePairs[pairIdx].w;\n" -"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -"			float4 newOrnB = qtMul(ornB,childOrnB);\n" -"			posB = newPosB;\n" -"			ornB = newOrnB;\n" -"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -"		}\n" -"		//////////////////\n" -"		float4 c0local = convexPolyhedronA.m_localCenter;\n" -"		float4 c0 = transform(&c0local, &posA, &ornA);\n" -"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -"		float4 c1 = transform(&c1local,&posB,&ornB);\n" -"		const float4 DeltaC2 = c0 - c1;\n" -"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"												posA,ornA,\n" -"												posB,ornB,\n" -"												DeltaC2,\n" -"												verticesA,uniqueEdgesA,facesA,indicesA,\n" -"												vertices,uniqueEdges,faces,indices,\n" -"												&sepAxis,&dmin);\n" -"		hasSeparatingAxis = 4;\n" -"		if (!sepA)\n" -"		{\n" -"			hasSeparatingAxis = 0;\n" -"		} else\n" -"		{\n" -"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" -"												posB,ornB,\n" -"												posA,ornA,\n" -"												DeltaC2,\n" -"												vertices,uniqueEdges,faces,indices,\n" -"												verticesA,uniqueEdgesA,facesA,indicesA,\n" -"												&sepAxis,&dmin);\n" -"			if (!sepB)\n" -"			{\n" -"				hasSeparatingAxis = 0;\n" -"			} else\n" -"			{\n" -"				bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" -"															posA,ornA,\n" -"															posB,ornB,\n" -"															DeltaC2,\n" -"															verticesA,uniqueEdgesA,facesA,indicesA,\n" -"															vertices,uniqueEdges,faces,indices,\n" -"															&sepAxis,&dmin);\n" -"	\n" -"				if (!sepEE)\n" -"				{\n" -"					hasSeparatingAxis = 0;\n" -"				} else\n" -"				{\n" -"					hasSeparatingAxis = 1;\n" -"				}\n" -"			}\n" -"		}	\n" -"		\n" -"		if (hasSeparatingAxis)\n" -"		{\n" -"			sepAxis.w = dmin;\n" -"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -"			concaveHasSeparatingNormals[i]=1;\n" -"			float minDist = -1e30f;\n" -"			float maxDist = 0.02f;\n" -"		\n" -"			findClippingFaces(sepAxis,\n" -"                     &convexPolyhedronA,\n" -"					 &convexShapes[shapeIndexB],\n" -"					 posA,ornA,\n" -"					 posB,ornB,\n" -"                      worldVertsA1GPU,\n" -"                      worldNormalsAGPU,\n" -"                      worldVertsB1GPU,\n" -"					  vertexFaceCapacity,\n" -"                      minDist, maxDist,\n" -"                      verticesA,\n" -"                      facesA,\n" -"                      indicesA,\n" -" 					  vertices,\n" -"                      faces,\n" -"                      indices,\n" -"                      clippingFacesOut, pairIdx);\n" -"		} else\n" -"		{	\n" -"			//mark this pair as in-active\n" -"			concavePairs[pairIdx].w = -1;\n" -"		}\n" -"	}\n" -"	else\n" -"	{	\n" -"		//mark this pair as in-active\n" -"		concavePairs[pairIdx].w = -1;\n" -"	}\n" -"	\n" -"	concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" -"}\n" -; +static const char* satKernelsCL = +	"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +	"//written by Erwin Coumans\n" +	"#define SHAPE_CONVEX_HULL 3\n" +	"#define SHAPE_CONCAVE_TRIMESH 5\n" +	"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +	"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +	"#define B3_MAX_STACK_DEPTH 256\n" +	"typedef unsigned int u32;\n" +	"///keep this in sync with btCollidable.h\n" +	"typedef struct\n" +	"{\n" +	"	union {\n" +	"		int m_numChildShapes;\n" +	"		int m_bvhIndex;\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float m_radius;\n" +	"		int	m_compoundBvhIndex;\n" +	"	};\n" +	"	\n" +	"	int m_shapeType;\n" +	"	int m_shapeIndex;\n" +	"	\n" +	"} btCollidableGpu;\n" +	"#define MAX_NUM_PARTS_IN_BITS 10\n" +	"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +	"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes\n" +	"	int	m_escapeIndexOrTriangleIndex;\n" +	"} b3QuantizedBvhNode;\n" +	"typedef struct\n" +	"{\n" +	"	float4		m_aabbMin;\n" +	"	float4		m_aabbMax;\n" +	"	float4		m_quantization;\n" +	"	int			m_numNodes;\n" +	"	int			m_numSubTrees;\n" +	"	int			m_nodeOffset;\n" +	"	int			m_subTreeOffset;\n" +	"} b3BvhInfo;\n" +	"int	getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int	getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	unsigned int x=0;\n" +	"	unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +	"	// Get only the lower bits where the triangle index is stored\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +	"}\n" +	"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	//skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +	"	return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +	"}\n" +	"	\n" +	"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +	"{\n" +	"	return -rootNode->m_escapeIndexOrTriangleIndex;\n" +	"}\n" +	"typedef struct\n" +	"{\n" +	"	//12 bytes\n" +	"	unsigned short int	m_quantizedAabbMin[3];\n" +	"	unsigned short int	m_quantizedAabbMax[3];\n" +	"	//4 bytes, points to the root of the subtree\n" +	"	int			m_rootNodeIndex;\n" +	"	//4 bytes\n" +	"	int			m_subtreeSize;\n" +	"	int			m_padding[3];\n" +	"} b3BvhSubtreeInfo;\n" +	"typedef struct\n" +	"{\n" +	"	float4	m_childPosition;\n" +	"	float4	m_childOrientation;\n" +	"	int m_shapeIndex;\n" +	"	int m_unused0;\n" +	"	int m_unused1;\n" +	"	int m_unused2;\n" +	"} btGpuChildShape;\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_pos;\n" +	"	float4 m_quat;\n" +	"	float4 m_linVel;\n" +	"	float4 m_angVel;\n" +	"	u32 m_collidableIdx;\n" +	"	float m_invMass;\n" +	"	float m_restituitionCoeff;\n" +	"	float m_frictionCoeff;\n" +	"} BodyData;\n" +	"typedef struct  \n" +	"{\n" +	"	float4		m_localCenter;\n" +	"	float4		m_extents;\n" +	"	float4		mC;\n" +	"	float4		mE;\n" +	"	\n" +	"	float			m_radius;\n" +	"	int	m_faceOffset;\n" +	"	int m_numFaces;\n" +	"	int	m_numVertices;\n" +	"	int m_vertexOffset;\n" +	"	int	m_uniqueEdgesOffset;\n" +	"	int	m_numUniqueEdges;\n" +	"	int m_unused;\n" +	"} ConvexPolyhedronCL;\n" +	"typedef struct \n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float4	m_min;\n" +	"		float   m_minElems[4];\n" +	"		int			m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float4	m_max;\n" +	"		float   m_maxElems[4];\n" +	"		int			m_maxIndices[4];\n" +	"	};\n" +	"} btAabbCL;\n" +	"#ifndef B3_AABB_H\n" +	"#define B3_AABB_H\n" +	"#ifndef B3_FLOAT4_H\n" +	"#define B3_FLOAT4_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#define B3_PLATFORM_DEFINITIONS_H\n" +	"struct MyTest\n" +	"{\n" +	"	int bla;\n" +	"};\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +	"#define B3_LARGE_FLOAT 1e18f\n" +	"#define B3_INFINITY 1e18f\n" +	"#define b3Assert(a)\n" +	"#define b3ConstArray(a) __global const a*\n" +	"#define b3AtomicInc atomic_inc\n" +	"#define b3AtomicAdd atomic_add\n" +	"#define b3Fabs fabs\n" +	"#define b3Sqrt native_sqrt\n" +	"#define b3Sin native_sin\n" +	"#define b3Cos native_cos\n" +	"#define B3_STATIC\n" +	"#endif\n" +	"#endif\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Float4;\n" +	"	#define b3Float4ConstArg const b3Float4\n" +	"	#define b3MakeFloat4 (float4)\n" +	"	float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return dot(a1, b1);\n" +	"	}\n" +	"	b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +	"	{\n" +	"		float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +	"		float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +	"		return cross(a1, b1);\n" +	"	}\n" +	"	#define b3MinFloat4 min\n" +	"	#define b3MaxFloat4 max\n" +	"	#define b3Normalized(a) normalize(a)\n" +	"#endif \n" +	"		\n" +	"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +	"{\n" +	"	if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6)	\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"inline int    b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +	"{\n" +	"    float maxDot = -B3_INFINITY;\n" +	"    int i = 0;\n" +	"    int ptIndex = -1;\n" +	"    for( i = 0; i < vecLen; i++ )\n" +	"    {\n" +	"        float dot = b3Dot3F4(vecArray[i],vec);\n" +	"            \n" +	"        if( dot > maxDot )\n" +	"        {\n" +	"            maxDot = dot;\n" +	"            ptIndex = i;\n" +	"        }\n" +	"    }\n" +	"	b3Assert(ptIndex>=0);\n" +	"    if (ptIndex<0)\n" +	"	{\n" +	"		ptIndex = 0;\n" +	"	}\n" +	"    *dotOut = maxDot;\n" +	"    return ptIndex;\n" +	"}\n" +	"#endif //B3_FLOAT4_H\n" +	"#ifndef B3_MAT3x3_H\n" +	"#define B3_MAT3x3_H\n" +	"#ifndef B3_QUAT_H\n" +	"#define B3_QUAT_H\n" +	"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif\n" +	"#endif\n" +	"#ifndef B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#endif \n" +	"#endif //B3_FLOAT4_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"	typedef float4	b3Quat;\n" +	"	#define b3QuatConstArg const b3Quat\n" +	"	\n" +	"	\n" +	"inline float4 b3FastNormalize4(float4 v)\n" +	"{\n" +	"	v = (float4)(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"	\n" +	"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +	"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +	"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +	"{\n" +	"	b3Quat ans;\n" +	"	ans = b3Cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +	"{\n" +	"	b3Quat q;\n" +	"	q=in;\n" +	"	//return b3FastNormalize4(in);\n" +	"	float len = native_sqrt(dot(q, q));\n" +	"	if(len > 0.f)\n" +	"	{\n" +	"		q *= 1.f / len;\n" +	"	}\n" +	"	else\n" +	"	{\n" +	"		q.x = q.y = q.z = 0.f;\n" +	"		q.w = 1.f;\n" +	"	}\n" +	"	return q;\n" +	"}\n" +	"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	b3Quat qInv = b3QuatInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +	"{\n" +	"	return (b3Quat)(-q.xyz, q.w);\n" +	"}\n" +	"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +	"{\n" +	"	return b3QuatRotate( b3QuatInvert( q ), vec );\n" +	"}\n" +	"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg  orientation)\n" +	"{\n" +	"	return b3QuatRotate( orientation, point ) + (translation);\n" +	"}\n" +	"	\n" +	"#endif \n" +	"#endif //B3_QUAT_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"typedef struct\n" +	"{\n" +	"	b3Float4 m_row[3];\n" +	"}b3Mat3x3;\n" +	"#define b3Mat3x3ConstArg const b3Mat3x3\n" +	"#define b3GetRow(m,row) (m.m_row[row])\n" +	"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +	"{\n" +	"	b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +	"	out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +	"	out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +	"	out.m_row[0].w = 0.f;\n" +	"	out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +	"	out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +	"	out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +	"	out.m_row[1].w = 0.f;\n" +	"	out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +	"	out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +	"	out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +	"	out.m_row[2].w = 0.f;\n" +	"	return out;\n" +	"}\n" +	"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = fabs(matIn.m_row[0]);\n" +	"	out.m_row[1] = fabs(matIn.m_row[1]);\n" +	"	out.m_row[2] = fabs(matIn.m_row[2]);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtZero();\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity();\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +	"__inline\n" +	"b3Mat3x3 mtZero()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(0.f);\n" +	"	m.m_row[1] = (b3Float4)(0.f);\n" +	"	m.m_row[2] = (b3Float4)(0.f);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtIdentity()\n" +	"{\n" +	"	b3Mat3x3 m;\n" +	"	m.m_row[0] = (b3Float4)(1,0,0,0);\n" +	"	m.m_row[1] = (b3Float4)(0,1,0,0);\n" +	"	m.m_row[2] = (b3Float4)(0,0,1,0);\n" +	"	return m;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +	"{\n" +	"	b3Mat3x3 out;\n" +	"	out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +	"	out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +	"	out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Mat3x3 transB;\n" +	"	transB = mtTranspose( b );\n" +	"	b3Mat3x3 ans;\n" +	"	//	why this doesn't run when 0ing in the for{}\n" +	"	a.m_row[0].w = 0.f;\n" +	"	a.m_row[1].w = 0.f;\n" +	"	a.m_row[2].w = 0.f;\n" +	"	for(int i=0; i<3; i++)\n" +	"	{\n" +	"//	a.m_row[i].w = 0.f;\n" +	"		ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +	"		ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +	"		ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +	"		ans.m_row[i].w = 0.f;\n" +	"	}\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +	"{\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a.m_row[0], b );\n" +	"	ans.y = b3Dot3F4( a.m_row[1], b );\n" +	"	ans.z = b3Dot3F4( a.m_row[2], b );\n" +	"	ans.w = 0.f;\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +	"{\n" +	"	b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +	"	b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +	"	b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +	"	b3Float4 ans;\n" +	"	ans.x = b3Dot3F4( a, colx );\n" +	"	ans.y = b3Dot3F4( a, coly );\n" +	"	ans.z = b3Dot3F4( a, colz );\n" +	"	return ans;\n" +	"}\n" +	"#endif\n" +	"#endif //B3_MAT3x3_H\n" +	"typedef struct b3Aabb b3Aabb_t;\n" +	"struct b3Aabb\n" +	"{\n" +	"	union\n" +	"	{\n" +	"		float m_min[4];\n" +	"		b3Float4 m_minVec;\n" +	"		int m_minIndices[4];\n" +	"	};\n" +	"	union\n" +	"	{\n" +	"		float	m_max[4];\n" +	"		b3Float4 m_maxVec;\n" +	"		int m_signedMaxIndices[4];\n" +	"	};\n" +	"};\n" +	"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +	"						b3Float4ConstArg pos,\n" +	"						b3QuatConstArg orn,\n" +	"						b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +	"{\n" +	"		b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +	"		localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +	"		b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +	"		b3Mat3x3 m;\n" +	"		m = b3QuatGetRotationMatrix(orn);\n" +	"		b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +	"		b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +	"		\n" +	"		b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +	"										 b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +	"										 0.f);\n" +	"		*aabbMinOut = center-extent;\n" +	"		*aabbMaxOut = center+extent;\n" +	"}\n" +	"/// conservative test for overlap between two aabbs\n" +	"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +	"								b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +	"{\n" +	"	bool overlap = true;\n" +	"	overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +	"	overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +	"	overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +	"	return overlap;\n" +	"}\n" +	"#endif //B3_AABB_H\n" +	"/*\n" +	"Bullet Continuous Collision Detection and Physics Library\n" +	"Copyright (c) 2003-2013 Erwin Coumans  http://bulletphysics.org\n" +	"This software is provided 'as-is', without any express or implied warranty.\n" +	"In no event will the authors be held liable for any damages arising from the use of this software.\n" +	"Permission is granted to anyone to use this software for any purpose,\n" +	"including commercial applications, and to alter it and redistribute it freely,\n" +	"subject to the following restrictions:\n" +	"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +	"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +	"3. This notice may not be removed or altered from any source distribution.\n" +	"*/\n" +	"#ifndef B3_INT2_H\n" +	"#define B3_INT2_H\n" +	"#ifdef __cplusplus\n" +	"#else\n" +	"#define b3UnsignedInt2 uint2\n" +	"#define b3Int2 int2\n" +	"#define b3MakeInt2 (int2)\n" +	"#endif //__cplusplus\n" +	"#endif\n" +	"typedef struct\n" +	"{\n" +	"	float4 m_plane;\n" +	"	int m_indexOffset;\n" +	"	int m_numIndices;\n" +	"} btGpuFace;\n" +	"#define make_float4 (float4)\n" +	"__inline\n" +	"float4 cross3(float4 a, float4 b)\n" +	"{\n" +	"	return cross(a,b);\n" +	"	\n" +	"//	float4 a1 = make_float4(a.xyz,0.f);\n" +	"//	float4 b1 = make_float4(b.xyz,0.f);\n" +	"//	return cross(a1,b1);\n" +	"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//	float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" +	"	\n" +	"	//return c;\n" +	"}\n" +	"__inline\n" +	"float dot3F4(float4 a, float4 b)\n" +	"{\n" +	"	float4 a1 = make_float4(a.xyz,0.f);\n" +	"	float4 b1 = make_float4(b.xyz,0.f);\n" +	"	return dot(a1, b1);\n" +	"}\n" +	"__inline\n" +	"float4 fastNormalize4(float4 v)\n" +	"{\n" +	"	v = make_float4(v.xyz,0.f);\n" +	"	return fast_normalize(v);\n" +	"}\n" +	"///////////////////////////////////////\n" +	"//	Quaternion\n" +	"///////////////////////////////////////\n" +	"typedef float4 Quaternion;\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b);\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in);\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec);\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q);\n" +	"__inline\n" +	"Quaternion qtMul(Quaternion a, Quaternion b)\n" +	"{\n" +	"	Quaternion ans;\n" +	"	ans = cross3( a, b );\n" +	"	ans += a.w*b+b.w*a;\n" +	"//	ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +	"	ans.w = a.w*b.w - dot3F4(a, b);\n" +	"	return ans;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtNormalize(Quaternion in)\n" +	"{\n" +	"	return fastNormalize4(in);\n" +	"//	in /= length( in );\n" +	"//	return in;\n" +	"}\n" +	"__inline\n" +	"float4 qtRotate(Quaternion q, float4 vec)\n" +	"{\n" +	"	Quaternion qInv = qtInvert( q );\n" +	"	float4 vcpy = vec;\n" +	"	vcpy.w = 0.f;\n" +	"	float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +	"	return out;\n" +	"}\n" +	"__inline\n" +	"Quaternion qtInvert(Quaternion q)\n" +	"{\n" +	"	return (Quaternion)(-q.xyz, q.w);\n" +	"}\n" +	"__inline\n" +	"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +	"{\n" +	"	return qtRotate( qtInvert( q ), vec );\n" +	"}\n" +	"__inline\n" +	"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +	"{\n" +	"	return qtRotate( *orientation, *p ) + (*translation);\n" +	"}\n" +	"__inline\n" +	"float4 normalize3(const float4 a)\n" +	"{\n" +	"	float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +	"	return fastNormalize4( n );\n" +	"}\n" +	"inline void projectLocal(const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline void project(__global const ConvexPolyhedronCL* hull,  const float4 pos, const float4 orn, \n" +	"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +	"{\n" +	"	min[0] = FLT_MAX;\n" +	"	max[0] = -FLT_MAX;\n" +	"	int numVerts = hull->m_numVertices;\n" +	"	const float4 localDir = qtInvRotate(orn,*dir);\n" +	"	float offset = dot(pos,*dir);\n" +	"	for(int i=0;i<numVerts;i++)\n" +	"	{\n" +	"		float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +	"		if(dp < min[0])	\n" +	"			min[0] = dp;\n" +	"		if(dp > max[0])	\n" +	"			max[0] = dp;\n" +	"	}\n" +	"	if(min[0]>max[0])\n" +	"	{\n" +	"		float tmp = min[0];\n" +	"		min[0] = max[0];\n" +	"		max[0] = tmp;\n" +	"	}\n" +	"	min[0] += offset;\n" +	"	max[0] += offset;\n" +	"}\n" +	"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA,const float4 ornA,\n" +	"	const float4 posB,const float4 ornB,\n" +	"	float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +	"{\n" +	"	float Min0,Max0;\n" +	"	float Min1,Max1;\n" +	"	projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +	"	project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +	"	if(Max0<Min1 || Max1<Min0)\n" +	"		return false;\n" +	"	float d0 = Max0 - Min1;\n" +	"	float d1 = Max1 - Min0;\n" +	"	*depth = d0<d1 ? d0:d1;\n" +	"	return true;\n" +	"}\n" +	"inline bool IsAlmostZero(const float4 v)\n" +	"{\n" +	"	if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +	"		return false;\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS*=-1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisLocalB(	__global const ConvexPolyhedronCL* hullA,  const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* verticesA, \n" +	"	__global const float4* uniqueEdgesA, \n" +	"	__global const btGpuFace* facesA,\n" +	"	__global const int*  indicesA,\n" +	"	const float4* verticesB,\n" +	"	const float4* uniqueEdgesB, \n" +	"	const btGpuFace* facesB,\n" +	"	const int*  indicesB,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS *= -1.f;\n" +	"			curPlaneTests++;\n" +	"			float d;\n" +	"			if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +	"				return false;\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisEdgeEdgeLocalA(	const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	const float4* verticesA, \n" +	"	const float4* uniqueEdgesA, \n" +	"	const btGpuFace* facesA,\n" +	"	const int*  indicesA,\n" +	"	__global const float4* verticesB, \n" +	"	__global const float4* uniqueEdgesB, \n" +	"	__global const btGpuFace* facesB,\n" +	"	__global const int*  indicesB,\n" +	"		float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test edges\n" +	"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +	"	{\n" +	"		const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +	"		float4 edge0World = qtRotate(ornA,edge0);\n" +	"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +	"		{\n" +	"			const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +	"			float4 edge1World = qtRotate(ornB,edge1);\n" +	"			float4 crossje = cross3(edge0World,edge1World);\n" +	"			curEdgeEdge++;\n" +	"			if(!IsAlmostZero(crossje))\n" +	"			{\n" +	"				crossje = normalize3(crossje);\n" +	"				if (dot3F4(DeltaC2,crossje)<0)\n" +	"					crossje *= -1.f;\n" +	"				float dist;\n" +	"				bool result = true;\n" +	"				{\n" +	"					float Min0,Max0;\n" +	"					float Min1,Max1;\n" +	"					projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +	"					project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +	"				\n" +	"					if(Max0<Min1 || Max1<Min0)\n" +	"						result = false;\n" +	"				\n" +	"					float d0 = Max0 - Min1;\n" +	"					float d1 = Max1 - Min0;\n" +	"					dist = d0<d1 ? d0:d1;\n" +	"					result = true;\n" +	"				}\n" +	"				\n" +	"				if(dist<*dmin)\n" +	"				{\n" +	"					*dmin = dist;\n" +	"					*sep = crossje;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA,const float4 ornA,\n" +	"	const float4 posB,const float4 ornB,\n" +	"	float4* sep_axis, __global const float4* vertices,float* depth)\n" +	"{\n" +	"	float Min0,Max0;\n" +	"	float Min1,Max1;\n" +	"	project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" +	"	project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" +	"	if(Max0<Min1 || Max1<Min0)\n" +	"		return false;\n" +	"	float d0 = Max0 - Min1;\n" +	"	float d1 = Max1 - Min0;\n" +	"	*depth = d0<d1 ? d0:d1;\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxis(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices, \n" +	"	__global const float4* uniqueEdges, \n" +	"	__global const btGpuFace* faces,\n" +	"	__global const int*  indices,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	\n" +	"	int curPlaneTests=0;\n" +	"	{\n" +	"		int numFacesA = hullA->m_numFaces;\n" +	"		// Test normals from hullA\n" +	"		for(int i=0;i<numFacesA;i++)\n" +	"		{\n" +	"			const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" +	"			float4 faceANormalWS = qtRotate(ornA,normal);\n" +	"	\n" +	"			if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +	"				faceANormalWS*=-1.f;\n" +	"				\n" +	"			curPlaneTests++;\n" +	"	\n" +	"			float d;\n" +	"			if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" +	"				return false;\n" +	"	\n" +	"			if(d<*dmin)\n" +	"			{\n" +	"				*dmin = d;\n" +	"				*sep = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"		if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"		{\n" +	"			*sep = -(*sep);\n" +	"		}\n" +	"	\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisUnitSphere(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices,\n" +	"	__global const float4* unitSphereDirections,\n" +	"	int numUnitSphereDirections,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test unit sphere directions\n" +	"	for (int i=0;i<numUnitSphereDirections;i++)\n" +	"	{\n" +	"		float4 crossje;\n" +	"		crossje = unitSphereDirections[i];	\n" +	"		if (dot3F4(DeltaC2,crossje)>0)\n" +	"			crossje *= -1.f;\n" +	"		{\n" +	"			float dist;\n" +	"			bool result = true;\n" +	"			float Min0,Max0;\n" +	"			float Min1,Max1;\n" +	"			project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +	"			project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +	"		\n" +	"			if(Max0<Min1 || Max1<Min0)\n" +	"				return false;\n" +	"		\n" +	"			float d0 = Max0 - Min1;\n" +	"			float d1 = Max1 - Min0;\n" +	"			dist = d0<d1 ? d0:d1;\n" +	"			result = true;\n" +	"	\n" +	"			if(dist<*dmin)\n" +	"			{\n" +	"				*dmin = dist;\n" +	"				*sep = crossje;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"bool findSeparatingAxisEdgeEdge(	__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +	"	const float4 posA1,\n" +	"	const float4 ornA,\n" +	"	const float4 posB1,\n" +	"	const float4 ornB,\n" +	"	const float4 DeltaC2,\n" +	"	__global const float4* vertices, \n" +	"	__global const float4* uniqueEdges, \n" +	"	__global const btGpuFace* faces,\n" +	"	__global const int*  indices,\n" +	"	float4* sep,\n" +	"	float* dmin)\n" +	"{\n" +	"	\n" +	"	float4 posA = posA1;\n" +	"	posA.w = 0.f;\n" +	"	float4 posB = posB1;\n" +	"	posB.w = 0.f;\n" +	"	int curPlaneTests=0;\n" +	"	int curEdgeEdge = 0;\n" +	"	// Test edges\n" +	"	for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +	"	{\n" +	"		const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" +	"		float4 edge0World = qtRotate(ornA,edge0);\n" +	"		for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +	"		{\n" +	"			const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" +	"			float4 edge1World = qtRotate(ornB,edge1);\n" +	"			float4 crossje = cross3(edge0World,edge1World);\n" +	"			curEdgeEdge++;\n" +	"			if(!IsAlmostZero(crossje))\n" +	"			{\n" +	"				crossje = normalize3(crossje);\n" +	"				if (dot3F4(DeltaC2,crossje)<0)\n" +	"					crossje*=-1.f;\n" +	"					\n" +	"				float dist;\n" +	"				bool result = true;\n" +	"				{\n" +	"					float Min0,Max0;\n" +	"					float Min1,Max1;\n" +	"					project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +	"					project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +	"				\n" +	"					if(Max0<Min1 || Max1<Min0)\n" +	"						return false;\n" +	"				\n" +	"					float d0 = Max0 - Min1;\n" +	"					float d1 = Max1 - Min0;\n" +	"					dist = d0<d1 ? d0:d1;\n" +	"					result = true;\n" +	"				}\n" +	"				\n" +	"				if(dist<*dmin)\n" +	"				{\n" +	"					*dmin = dist;\n" +	"					*sep = crossje;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"	\n" +	"	if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +	"	{\n" +	"		*sep = -(*sep);\n" +	"	}\n" +	"	return true;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global const btGpuChildShape* gpuChildShapes,\n" +	"																					__global volatile float4* gpuCompoundSepNormalsOut,\n" +	"																					__global volatile int* gpuHasCompoundSepNormalsOut,\n" +	"																					int numCompoundPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i<numCompoundPairs)\n" +	"	{\n" +	"		int bodyIndexA = gpuCompoundPairs[i].x;\n" +	"		int bodyIndexB = gpuCompoundPairs[i].y;\n" +	"		int childShapeIndexA = gpuCompoundPairs[i].z;\n" +	"		int childShapeIndexB = gpuCompoundPairs[i].w;\n" +	"		\n" +	"		int collidableIndexA = -1;\n" +	"		int collidableIndexB = -1;\n" +	"		\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		\n" +	"		float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							\n" +	"		if (childShapeIndexA >= 0)\n" +	"		{\n" +	"			collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"			float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"			float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"			float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"			float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"			posA = newPosA;\n" +	"			ornA = newOrnA;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		}\n" +	"		\n" +	"		if (childShapeIndexB>=0)\n" +	"		{\n" +	"			collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"		} else\n" +	"		{\n" +	"			collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;	\n" +	"		}\n" +	"	\n" +	"		gpuHasCompoundSepNormalsOut[i] = 0;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	\n" +	"		int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" +	"		int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +	"	\n" +	"		if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		int hasSeparatingAxis = 5;\n" +	"							\n" +	"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"		float dmin = FLT_MAX;\n" +	"		posA.w = 0.f;\n" +	"		posB.w = 0.f;\n" +	"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		float4 sepNormal = make_float4(1,0,0,0);\n" +	"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +	"		hasSeparatingAxis = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else//(!sepB)\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +	"				if (sepEE)\n" +	"				{\n" +	"						gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" +	"						gpuHasCompoundSepNormalsOut[i] = 1;\n" +	"				}//sepEE\n" +	"			}//(!sepB)\n" +	"		}//(!sepA)\n" +	"		\n" +	"		\n" +	"	}\n" +	"		\n" +	"}\n" +	"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +	"{\n" +	"		b3Float4 vecOut;\n" +	"		vecOut = b3MakeFloat4(\n" +	"			(float)(vecIn[0]) / (quantization.x),\n" +	"			(float)(vecIn[1]) / (quantization.y),\n" +	"			(float)(vecIn[2]) / (quantization.z),\n" +	"			0.f);\n" +	"		vecOut += bvhAabbMin;\n" +	"		return vecOut;\n" +	"}\n" +	"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +	"{\n" +	"		b3Float4 vecOut;\n" +	"		vecOut = b3MakeFloat4(\n" +	"			(float)(vecIn[0]) / (quantization.x),\n" +	"			(float)(vecIn[1]) / (quantization.y),\n" +	"			(float)(vecIn[2]) / (quantization.z),\n" +	"			0.f);\n" +	"		vecOut += bvhAabbMin;\n" +	"		return vecOut;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findCompoundPairsKernel( __global const int4* pairs, \n" +	"	__global const BodyData* rigidBodies, \n" +	"	__global const btCollidableGpu* collidables,\n" +	"	__global const ConvexPolyhedronCL* convexShapes, \n" +	"	__global const float4* vertices,\n" +	"	__global const float4* uniqueEdges,\n" +	"	__global const btGpuFace* faces,\n" +	"	__global const int* indices,\n" +	"	__global b3Aabb_t* aabbLocalSpace,\n" +	"	__global const btGpuChildShape* gpuChildShapes,\n" +	"	__global volatile int4* gpuCompoundPairsOut,\n" +	"	__global volatile int* numCompoundPairsOut,\n" +	"	__global const b3BvhSubtreeInfo* subtrees,\n" +	"	__global const b3QuantizedBvhNode* quantizedNodes,\n" +	"	__global const b3BvhInfo* bvhInfos,\n" +	"	int numPairs,\n" +	"	int maxNumCompoundPairsCapacity\n" +	"	)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"		{\n" +	"			int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" +	"			int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" +	"			int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" +	"			int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" +	"			int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" +	"			int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" +	"			\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			\n" +	"			for (int p=0;p<numSubTreesA;p++)\n" +	"			{\n" +	"				b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" +	"				//bvhInfos[bvhA].m_quantization\n" +	"				b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"				b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"				b3Float4 aabbAMinOut,aabbAMaxOut;\n" +	"				float margin=0.f;\n" +	"				b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" +	"				\n" +	"				for (int q=0;q<numSubTreesB;q++)\n" +	"				{\n" +	"					b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" +	"					b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"					b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"					b3Float4 aabbBMinOut,aabbBMaxOut;\n" +	"					float margin=0.f;\n" +	"					b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" +	"					\n" +	"					\n" +	"					bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" +	"					if (aabbOverlap)\n" +	"					{\n" +	"						\n" +	"						int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" +	"						int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" +	"						int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" +	"						int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" +	"						b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" +	"						b3Int2 node0;\n" +	"						node0.x = startNodeIndexA;\n" +	"						node0.y = startNodeIndexB;\n" +	"						int maxStackDepth = B3_MAX_STACK_DEPTH;\n" +	"						int depth=0;\n" +	"						nodeStack[depth++]=node0;\n" +	"						do\n" +	"						{\n" +	"							b3Int2 node = nodeStack[--depth];\n" +	"							b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"							b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +	"							b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"							b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +	"							float margin=0.f;\n" +	"							b3Float4 aabbAMinOut,aabbAMaxOut;\n" +	"							b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" +	"							b3Float4 aabbBMinOut,aabbBMaxOut;\n" +	"							b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" +	"							\n" +	"							bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" +	"							if (nodeOverlap)\n" +	"							{\n" +	"								bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" +	"								bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" +	"								bool isInternalA = !isLeafA;\n" +	"								bool isInternalB = !isLeafB;\n" +	"								//fail, even though it might hit two leaf nodes\n" +	"								if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" +	"								{\n" +	"									//printf(\"Error: traversal exceeded maxStackDepth\");\n" +	"									continue;\n" +	"								}\n" +	"								if(isInternalA)\n" +	"								{\n" +	"									int nodeAleftChild = node.x+1;\n" +	"									bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" +	"									int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" +	"									if(isInternalB)\n" +	"									{					\n" +	"										int nodeBleftChild = node.y+1;\n" +	"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +	"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" +	"									}\n" +	"									else\n" +	"									{\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" +	"										nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" +	"									}\n" +	"								}\n" +	"								else\n" +	"								{\n" +	"									if(isInternalB)\n" +	"									{\n" +	"										int nodeBleftChild = node.y+1;\n" +	"										bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +	"										int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +	"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" +	"										nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" +	"									}\n" +	"									else\n" +	"									{\n" +	"										int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"										if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"										{\n" +	"											int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" +	"											int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" +	"											gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" +	"										}\n" +	"									}\n" +	"								}\n" +	"							}\n" +	"						} while (depth);\n" +	"					}\n" +	"				}\n" +	"			}\n" +	"			\n" +	"			return;\n" +	"		}\n" +	"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"		{\n" +	"			if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" +	"			{\n" +	"				int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" +	"				for (int c=0;c<numChildrenA;c++)\n" +	"				{\n" +	"					int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" +	"					int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +	"					float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"					float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"					float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +	"					float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +	"					float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +	"					float4 newOrnA = qtMul(ornA,childOrnA);\n" +	"					int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" +	"					b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" +	"					float margin = 0.f;\n" +	"					\n" +	"					b3Float4 aabbAMinWS;\n" +	"					b3Float4 aabbAMaxWS;\n" +	"					\n" +	"					b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" +	"						newPosA,\n" +	"						newOrnA,\n" +	"						&aabbAMinWS,&aabbAMaxWS);\n" +	"						\n" +	"					\n" +	"					if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"					{\n" +	"						int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +	"						for (int b=0;b<numChildrenB;b++)\n" +	"						{\n" +	"							int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +	"							int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"							float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"							float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"							float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"							int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"							b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" +	"							\n" +	"							b3Float4 aabbBMinWS;\n" +	"							b3Float4 aabbBMaxWS;\n" +	"							\n" +	"							b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" +	"								newPosB,\n" +	"								newOrnB,\n" +	"								&aabbBMinWS,&aabbBMaxWS);\n" +	"								\n" +	"								\n" +	"							\n" +	"							bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" +	"							if (aabbOverlap)\n" +	"							{\n" +	"								int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"								float dmin = FLT_MAX;\n" +	"								float4 posA = newPosA;\n" +	"								posA.w = 0.f;\n" +	"								float4 posB = newPosB;\n" +	"								posB.w = 0.f;\n" +	"								float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"								float4 ornA = newOrnA;\n" +	"								float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"								float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"								float4 ornB =newOrnB;\n" +	"								float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"								const float4 DeltaC2 = c0 - c1;\n" +	"								{//\n" +	"									int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"									if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"									{\n" +	"										gpuCompoundPairsOut[compoundPairIdx]  = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" +	"									}\n" +	"								}//\n" +	"							}//fi(1)\n" +	"						} //for (int b=0\n" +	"					}//if (collidables[collidableIndexB].\n" +	"					else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"					{\n" +	"						if (1)\n" +	"						{\n" +	"							int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"							float dmin = FLT_MAX;\n" +	"							float4 posA = newPosA;\n" +	"							posA.w = 0.f;\n" +	"							float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"							posB.w = 0.f;\n" +	"							float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"							float4 ornA = newOrnA;\n" +	"							float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"							float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"							float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"							float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"							const float4 DeltaC2 = c0 - c1;\n" +	"							{\n" +	"								int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"								if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"								{\n" +	"									gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" +	"								}//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"							}//\n" +	"						}//fi (1)\n" +	"					}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"				}//for (int b=0;b<numChildrenB;b++)	\n" +	"				return;\n" +	"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"			if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" +	"				&& (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"			{\n" +	"				int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +	"				for (int b=0;b<numChildrenB;b++)\n" +	"				{\n" +	"					int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +	"					int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"					float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +	"					float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"					float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"					float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"					float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" +	"					float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"					int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"					//////////////////////////////////////\n" +	"					if (1)\n" +	"					{\n" +	"						int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"						float dmin = FLT_MAX;\n" +	"						float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"						posA.w = 0.f;\n" +	"						float4 posB = newPosB;\n" +	"						posB.w = 0.f;\n" +	"						float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"						float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"						float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"						float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"						float4 ornB =newOrnB;\n" +	"						float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"						const float4 DeltaC2 = c0 - c1;\n" +	"						{//\n" +	"							int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +	"							if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"							{\n" +	"								gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" +	"							}//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +	"						}//\n" +	"					}//fi (1)	\n" +	"				}//for (int b=0;b<numChildrenB;b++)\n" +	"				return;\n" +	"			}//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"			return;\n" +	"		}//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +	"	}//i<numPairs\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findSeparatingAxisKernel( __global const int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global volatile float4* separatingNormals,\n" +	"																					__global volatile int* hasSeparatingAxis,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"	\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"		\n" +	"		\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"			return;\n" +	"		}\n" +	"			\n" +	"		if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"			return;\n" +	"		}\n" +	"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"		float dmin = FLT_MAX;\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		float4 sepNormal;\n" +	"		\n" +	"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																								posB,ornB,\n" +	"																								DeltaC2,\n" +	"																								vertices,uniqueEdges,faces,\n" +	"																								indices,&sepNormal,&dmin);\n" +	"		hasSeparatingAxis[i] = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" +	"																									posA,ornA,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis[i] = 0;\n" +	"			} else\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																									posB,ornB,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"				if (!sepEE)\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 0;\n" +	"				} else\n" +	"				{\n" +	"					hasSeparatingAxis[i] = 1;\n" +	"					separatingNormals[i] = sepNormal;\n" +	"				}\n" +	"			}\n" +	"		}\n" +	"		\n" +	"	}\n" +	"}\n" +	"__kernel void   findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global volatile float4* separatingNormals,\n" +	"																					__global volatile int* hasSeparatingAxis,\n" +	"																					__global  float* dmins,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"	\n" +	"		int bodyIndexA = pairs[i].x;\n" +	"		int bodyIndexB = pairs[i].y;\n" +	"		int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"		int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	\n" +	"		int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"		int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	\n" +	"		hasSeparatingAxis[i] = 0;	\n" +	"		\n" +	"		//once the broadphase avoids static-static pairs, we can remove this test\n" +	"		if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"		\n" +	"		if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +	"		{\n" +	"			return;\n" +	"		}\n" +	"			\n" +	"		int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"		float dmin = FLT_MAX;\n" +	"		dmins[i] = dmin;\n" +	"		\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		float4 sepNormal;\n" +	"		\n" +	"		bool sepA = findSeparatingAxis(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																								posB,ornB,\n" +	"																								DeltaC2,\n" +	"																								vertices,uniqueEdges,faces,\n" +	"																								indices,&sepNormal,&dmin);\n" +	"		hasSeparatingAxis[i] = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis[i] = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxis(	&convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" +	"																									posA,ornA,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"			if (sepB)\n" +	"			{\n" +	"				dmins[i] = dmin;\n" +	"				hasSeparatingAxis[i] = 1;\n" +	"				separatingNormals[i] = sepNormal;\n" +	"			}\n" +	"		}\n" +	"		\n" +	"	}\n" +	"}\n" +	"__kernel void   findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" +	"																					__global const BodyData* rigidBodies, \n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global  float4* separatingNormals,\n" +	"																					__global  int* hasSeparatingAxis,\n" +	"																					__global  float* dmins,\n" +	"																					__global const float4* unitSphereDirections,\n" +	"																					int numUnitSphereDirections,\n" +	"																					int numPairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	\n" +	"	if (i<numPairs)\n" +	"	{\n" +	"		if (hasSeparatingAxis[i])\n" +	"		{\n" +	"	\n" +	"			int bodyIndexA = pairs[i].x;\n" +	"			int bodyIndexB = pairs[i].y;\n" +	"	\n" +	"			int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"			int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"		\n" +	"			int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"			int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"			\n" +	"			\n" +	"			int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	\n" +	"			float dmin = dmins[i];\n" +	"	\n" +	"			float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"			posA.w = 0.f;\n" +	"			float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"			posB.w = 0.f;\n" +	"			float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +	"			float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"			float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"			float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"			float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"			float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"			const float4 DeltaC2 = c0 - c1;\n" +	"			float4 sepNormal = separatingNormals[i];\n" +	"			\n" +	"			\n" +	"			\n" +	"			bool sepEE = false;\n" +	"			int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" +	"			if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" +	"			{\n" +	"				sepEE = findSeparatingAxisEdgeEdge(	&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																									posB,ornB,\n" +	"																									DeltaC2,\n" +	"																									vertices,uniqueEdges,faces,\n" +	"																									indices,&sepNormal,&dmin);\n" +	"																									\n" +	"					if (!sepEE)\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 0;\n" +	"					} else\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 1;\n" +	"						separatingNormals[i] = sepNormal;\n" +	"					}\n" +	"			}\n" +	"			/*\n" +	"			///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" +	"			else\n" +	"			{\n" +	"				sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +	"																									posB,ornB,\n" +	"																									DeltaC2,\n" +	"																									vertices,unitSphereDirections,numUnitSphereDirections,\n" +	"																									&sepNormal,&dmin);\n" +	"					if (!sepEE)\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 0;\n" +	"					} else\n" +	"					{\n" +	"						hasSeparatingAxis[i] = 1;\n" +	"						separatingNormals[i] = sepNormal;\n" +	"					}\n" +	"			}\n" +	"			*/\n" +	"		}		//if (hasSeparatingAxis[i])\n" +	"	}//(i<numPairs)\n" +	"}\n" +	"inline int	findClippingFaces(const float4 separatingNormal,\n" +	"                      const ConvexPolyhedronCL* hullA, \n" +	"					  __global const ConvexPolyhedronCL* hullB,\n" +	"                      const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +	"                       __global float4* worldVertsA1,\n" +	"                      __global float4* worldNormalsA1,\n" +	"                      __global float4* worldVertsB1,\n" +	"                      int capacityWorldVerts,\n" +	"                      const float minDist, float maxDist,\n" +	"					  const float4* verticesA,\n" +	"                      const btGpuFace* facesA,\n" +	"                      const int* indicesA,\n" +	"					  __global const float4* verticesB,\n" +	"                      __global const btGpuFace* facesB,\n" +	"                      __global const int* indicesB,\n" +	"                      __global int4* clippingFaces, int pairIndex)\n" +	"{\n" +	"	int numContactsOut = 0;\n" +	"	int numWorldVertsB1= 0;\n" +	"    \n" +	"    \n" +	"	int closestFaceB=0;\n" +	"	float dmax = -FLT_MAX;\n" +	"    \n" +	"	{\n" +	"		for(int face=0;face<hullB->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +	"                                              facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +	"			const float4 WorldNormal = qtRotate(ornB, Normal);\n" +	"			float d = dot3F4(WorldNormal,separatingNormal);\n" +	"			if (d > dmax)\n" +	"			{\n" +	"				dmax = d;\n" +	"				closestFaceB = face;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"	{\n" +	"		const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +	"		int numVertices = polyB.m_numIndices;\n" +	"        if (numVertices>capacityWorldVerts)\n" +	"            numVertices = capacityWorldVerts;\n" +	"        \n" +	"		for(int e0=0;e0<numVertices;e0++)\n" +	"		{\n" +	"            if (e0<capacityWorldVerts)\n" +	"            {\n" +	"                const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +	"                worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +	"            }\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int closestFaceA=0;\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		for(int face=0;face<hullA->m_numFaces;face++)\n" +	"		{\n" +	"			const float4 Normal = make_float4(\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.x,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.y,\n" +	"                                              facesA[hullA->m_faceOffset+face].m_plane.z,\n" +	"                                              0.f);\n" +	"			const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +	"            \n" +	"			float d = dot3F4(faceANormalWS,separatingNormal);\n" +	"			if (d < dmin)\n" +	"			{\n" +	"				dmin = d;\n" +	"				closestFaceA = face;\n" +	"                worldNormalsA1[pairIndex] = faceANormalWS;\n" +	"			}\n" +	"		}\n" +	"	}\n" +	"    \n" +	"    int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +	"    if (numVerticesA>capacityWorldVerts)\n" +	"       numVerticesA = capacityWorldVerts;\n" +	"    \n" +	"	for(int e0=0;e0<numVerticesA;e0++)\n" +	"	{\n" +	"        if (e0<capacityWorldVerts)\n" +	"        {\n" +	"            const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +	"            worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +	"        }\n" +	"    }\n" +	"    \n" +	"    clippingFaces[pairIndex].x = closestFaceA;\n" +	"    clippingFaces[pairIndex].y = closestFaceB;\n" +	"    clippingFaces[pairIndex].z = numVerticesA;\n" +	"    clippingFaces[pairIndex].w = numWorldVertsB1;\n" +	"    \n" +	"    \n" +	"	return numContactsOut;\n" +	"}\n" +	"// work-in-progress\n" +	"__kernel void   findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" +	"																					__global const BodyData* rigidBodies,\n" +	"																					__global const btCollidableGpu* collidables,\n" +	"																					__global const ConvexPolyhedronCL* convexShapes, \n" +	"																					__global const float4* vertices,\n" +	"																					__global const float4* uniqueEdges,\n" +	"																					__global const btGpuFace* faces,\n" +	"																					__global const int* indices,\n" +	"																					__global const btGpuChildShape* gpuChildShapes,\n" +	"																					__global btAabbCL* aabbs,\n" +	"																					__global float4* concaveSeparatingNormalsOut,\n" +	"																					__global int* concaveHasSeparatingNormals,\n" +	"																					__global int4* clippingFacesOut,\n" +	"																					__global float4* worldVertsA1GPU,\n" +	"																					__global float4*  worldNormalsAGPU,\n" +	"																					__global float4* worldVertsB1GPU,\n" +	"																					int vertexFaceCapacity,\n" +	"																					int numConcavePairs\n" +	"																					)\n" +	"{\n" +	"	int i = get_global_id(0);\n" +	"	if (i>=numConcavePairs)\n" +	"		return;\n" +	"	concaveHasSeparatingNormals[i] = 0;\n" +	"	int pairIdx = i;\n" +	"	int bodyIndexA = concavePairs[i].x;\n" +	"	int bodyIndexB = concavePairs[i].y;\n" +	"	int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +	"	int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +	"	int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +	"	int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +	"	if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" +	"		collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"	{\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"		return;\n" +	"	}\n" +	"	int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +	"	int numActualConcaveConvexTests = 0;\n" +	"	\n" +	"	int f = concavePairs[i].z;\n" +	"	\n" +	"	bool overlap = false;\n" +	"	\n" +	"	ConvexPolyhedronCL convexPolyhedronA;\n" +	"	//add 3 vertices of the triangle\n" +	"	convexPolyhedronA.m_numVertices = 3;\n" +	"	convexPolyhedronA.m_vertexOffset = 0;\n" +	"	float4	localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +	"	btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +	"	float4 triMinAabb, triMaxAabb;\n" +	"	btAabbCL triAabb;\n" +	"	triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +	"	triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +	"	\n" +	"	float4 verticesA[3];\n" +	"	for (int i=0;i<3;i++)\n" +	"	{\n" +	"		int index = indices[face.m_indexOffset+i];\n" +	"		float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +	"		verticesA[i] = vert;\n" +	"		localCenter += vert;\n" +	"			\n" +	"		triAabb.m_min = min(triAabb.m_min,vert);		\n" +	"		triAabb.m_max = max(triAabb.m_max,vert);		\n" +	"	}\n" +	"	overlap = true;\n" +	"	overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +	"	overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +	"		\n" +	"	if (overlap)\n" +	"	{\n" +	"		float dmin = FLT_MAX;\n" +	"		int hasSeparatingAxis=5;\n" +	"		float4 sepAxis=make_float4(1,2,3,4);\n" +	"		int localCC=0;\n" +	"		numActualConcaveConvexTests++;\n" +	"		//a triangle has 3 unique edges\n" +	"		convexPolyhedronA.m_numUniqueEdges = 3;\n" +	"		convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +	"		float4 uniqueEdgesA[3];\n" +	"		\n" +	"		uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +	"		uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +	"		uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +	"		convexPolyhedronA.m_faceOffset = 0;\n" +	"                                  \n" +	"		float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +	"                             \n" +	"		btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +	"		int indicesA[3+3+2+2+2];\n" +	"		int curUsedIndices=0;\n" +	"		int fidx=0;\n" +	"		//front size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[0] = 0;\n" +	"			indicesA[1] = 1;\n" +	"			indicesA[2] = 2;\n" +	"			curUsedIndices+=3;\n" +	"			float c = face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = normal.x;\n" +	"			facesA[fidx].m_plane.y = normal.y;\n" +	"			facesA[fidx].m_plane.z = normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		//back size of triangle\n" +	"		{\n" +	"			facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"			indicesA[3]=2;\n" +	"			indicesA[4]=1;\n" +	"			indicesA[5]=0;\n" +	"			curUsedIndices+=3;\n" +	"			float c = dot(normal,verticesA[0]);\n" +	"			float c1 = -face.m_plane.w;\n" +	"			facesA[fidx].m_plane.x = -normal.x;\n" +	"			facesA[fidx].m_plane.y = -normal.y;\n" +	"			facesA[fidx].m_plane.z = -normal.z;\n" +	"			facesA[fidx].m_plane.w = c;\n" +	"			facesA[fidx].m_numIndices=3;\n" +	"		}\n" +	"		fidx++;\n" +	"		bool addEdgePlanes = true;\n" +	"		if (addEdgePlanes)\n" +	"		{\n" +	"			int numVertices=3;\n" +	"			int prevVertex = numVertices-1;\n" +	"			for (int i=0;i<numVertices;i++)\n" +	"			{\n" +	"				float4 v0 = verticesA[i];\n" +	"				float4 v1 = verticesA[prevVertex];\n" +	"                                            \n" +	"				float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +	"				float c = -dot(edgeNormal,v0);\n" +	"				facesA[fidx].m_numIndices = 2;\n" +	"				facesA[fidx].m_indexOffset=curUsedIndices;\n" +	"				indicesA[curUsedIndices++]=i;\n" +	"				indicesA[curUsedIndices++]=prevVertex;\n" +	"                                            \n" +	"				facesA[fidx].m_plane.x = edgeNormal.x;\n" +	"				facesA[fidx].m_plane.y = edgeNormal.y;\n" +	"				facesA[fidx].m_plane.z = edgeNormal.z;\n" +	"				facesA[fidx].m_plane.w = c;\n" +	"				fidx++;\n" +	"				prevVertex = i;\n" +	"			}\n" +	"		}\n" +	"		convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +	"		convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +	"		float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +	"		posA.w = 0.f;\n" +	"		float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +	"		posB.w = 0.f;\n" +	"		float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +	"		float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +	"		\n" +	"		///////////////////\n" +	"		///compound shape support\n" +	"		if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +	"		{\n" +	"			int compoundChild = concavePairs[pairIdx].w;\n" +	"			int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +	"			int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +	"			float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +	"			float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +	"			float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +	"			float4 newOrnB = qtMul(ornB,childOrnB);\n" +	"			posB = newPosB;\n" +	"			ornB = newOrnB;\n" +	"			shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +	"		}\n" +	"		//////////////////\n" +	"		float4 c0local = convexPolyhedronA.m_localCenter;\n" +	"		float4 c0 = transform(&c0local, &posA, &ornA);\n" +	"		float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +	"		float4 c1 = transform(&c1local,&posB,&ornB);\n" +	"		const float4 DeltaC2 = c0 - c1;\n" +	"		bool sepA = findSeparatingAxisLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"												posA,ornA,\n" +	"												posB,ornB,\n" +	"												DeltaC2,\n" +	"												verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"												vertices,uniqueEdges,faces,indices,\n" +	"												&sepAxis,&dmin);\n" +	"		hasSeparatingAxis = 4;\n" +	"		if (!sepA)\n" +	"		{\n" +	"			hasSeparatingAxis = 0;\n" +	"		} else\n" +	"		{\n" +	"			bool sepB = findSeparatingAxisLocalB(	&convexShapes[shapeIndexB],&convexPolyhedronA,\n" +	"												posB,ornB,\n" +	"												posA,ornA,\n" +	"												DeltaC2,\n" +	"												vertices,uniqueEdges,faces,indices,\n" +	"												verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"												&sepAxis,&dmin);\n" +	"			if (!sepB)\n" +	"			{\n" +	"				hasSeparatingAxis = 0;\n" +	"			} else\n" +	"			{\n" +	"				bool sepEE = findSeparatingAxisEdgeEdgeLocalA(	&convexPolyhedronA, &convexShapes[shapeIndexB],\n" +	"															posA,ornA,\n" +	"															posB,ornB,\n" +	"															DeltaC2,\n" +	"															verticesA,uniqueEdgesA,facesA,indicesA,\n" +	"															vertices,uniqueEdges,faces,indices,\n" +	"															&sepAxis,&dmin);\n" +	"	\n" +	"				if (!sepEE)\n" +	"				{\n" +	"					hasSeparatingAxis = 0;\n" +	"				} else\n" +	"				{\n" +	"					hasSeparatingAxis = 1;\n" +	"				}\n" +	"			}\n" +	"		}	\n" +	"		\n" +	"		if (hasSeparatingAxis)\n" +	"		{\n" +	"			sepAxis.w = dmin;\n" +	"			concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +	"			concaveHasSeparatingNormals[i]=1;\n" +	"			float minDist = -1e30f;\n" +	"			float maxDist = 0.02f;\n" +	"		\n" +	"			findClippingFaces(sepAxis,\n" +	"                     &convexPolyhedronA,\n" +	"					 &convexShapes[shapeIndexB],\n" +	"					 posA,ornA,\n" +	"					 posB,ornB,\n" +	"                      worldVertsA1GPU,\n" +	"                      worldNormalsAGPU,\n" +	"                      worldVertsB1GPU,\n" +	"					  vertexFaceCapacity,\n" +	"                      minDist, maxDist,\n" +	"                      verticesA,\n" +	"                      facesA,\n" +	"                      indicesA,\n" +	" 					  vertices,\n" +	"                      faces,\n" +	"                      indices,\n" +	"                      clippingFacesOut, pairIdx);\n" +	"		} else\n" +	"		{	\n" +	"			//mark this pair as in-active\n" +	"			concavePairs[pairIdx].w = -1;\n" +	"		}\n" +	"	}\n" +	"	else\n" +	"	{	\n" +	"		//mark this pair as in-active\n" +	"		concavePairs[pairIdx].w = -1;\n" +	"	}\n" +	"	\n" +	"	concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" +	"}\n";  |