diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision')
34 files changed, 25885 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h new file mode 100644 index 0000000000..872f039506 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h @@ -0,0 +1,18 @@ +#ifndef B3_BVH_INFO_H +#define B3_BVH_INFO_H + +#include "Bullet3Common/b3Vector3.h" + +struct b3BvhInfo +{ + b3Vector3 m_aabbMin; + b3Vector3 m_aabbMax; + b3Vector3 m_quantization; + int m_numNodes; + int m_numSubTrees; + int m_nodeOffset; + int m_subTreeOffset; + +}; + +#endif //B3_BVH_INFO_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp new file mode 100644 index 0000000000..cb30ee939b --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp @@ -0,0 +1,258 @@ + +#if 0 +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "b3ContactCache.h" +#include "Bullet3Common/b3Transform.h" + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" + +b3Scalar gContactBreakingThreshold = b3Scalar(0.02); + +///gContactCalcArea3Points will approximate the convex hull area using 3 points +///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower +bool gContactCalcArea3Points = true; + + + + +static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3) +{ + // It calculates possible 3 area constructed from random 4 points and returns the biggest one. + + b3Vector3 a[3],b[3]; + a[0] = p0 - p1; + a[1] = p0 - p2; + a[2] = p0 - p3; + b[0] = p2 - p3; + b[1] = p1 - p3; + b[2] = p1 - p2; + + //todo: Following 3 cross production can be easily optimized by SIMD. + b3Vector3 tmp0 = a[0].cross(b[0]); + b3Vector3 tmp1 = a[1].cross(b[1]); + b3Vector3 tmp2 = a[2].cross(b[2]); + + return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2()); +} +#if 0 + +//using localPointA for all points +int b3ContactCache::sortCachedPoints(const b3Vector3& pt) +{ + //calculate 4 possible cases areas, and take biggest area + //also need to keep 'deepest' + + int maxPenetrationIndex = -1; +#define KEEP_DEEPEST_POINT 1 +#ifdef KEEP_DEEPEST_POINT + b3Scalar maxPenetration = pt.getDistance(); + for (int i=0;i<4;i++) + { + if (m_pointCache[i].getDistance() < maxPenetration) + { + maxPenetrationIndex = i; + maxPenetration = m_pointCache[i].getDistance(); + } + } +#endif //KEEP_DEEPEST_POINT + + b3Scalar res0(b3Scalar(0.)),res1(b3Scalar(0.)),res2(b3Scalar(0.)),res3(b3Scalar(0.)); + + if (gContactCalcArea3Points) + { + if (maxPenetrationIndex != 0) + { + b3Vector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA; + b3Vector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; + b3Vector3 cross = a0.cross(b0); + res0 = cross.length2(); + } + if (maxPenetrationIndex != 1) + { + b3Vector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA; + b3Vector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; + b3Vector3 cross = a1.cross(b1); + res1 = cross.length2(); + } + + if (maxPenetrationIndex != 2) + { + b3Vector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA; + b3Vector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA; + b3Vector3 cross = a2.cross(b2); + res2 = cross.length2(); + } + + if (maxPenetrationIndex != 3) + { + b3Vector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA; + b3Vector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA; + b3Vector3 cross = a3.cross(b3); + res3 = cross.length2(); + } + } + else + { + if(maxPenetrationIndex != 0) { + res0 = calcArea4Points(pt.m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); + } + + if(maxPenetrationIndex != 1) { + res1 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); + } + + if(maxPenetrationIndex != 2) { + res2 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[3].m_localPointA); + } + + if(maxPenetrationIndex != 3) { + res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA); + } + } + b3Vector4 maxvec(res0,res1,res2,res3); + int biggestarea = maxvec.closestAxis4(); + return biggestarea; + +} + + +int b3ContactCache::getCacheEntry(const b3Vector3& newPoint) const +{ + b3Scalar shortestDist = getContactBreakingThreshold() * getContactBreakingThreshold(); + int size = getNumContacts(); + int nearestPoint = -1; + for( int i = 0; i < size; i++ ) + { + const b3Vector3 &mp = m_pointCache[i]; + + b3Vector3 diffA = mp.m_localPointA- newPoint.m_localPointA; + const b3Scalar distToManiPoint = diffA.dot(diffA); + if( distToManiPoint < shortestDist ) + { + shortestDist = distToManiPoint; + nearestPoint = i; + } + } + return nearestPoint; +} + +int b3ContactCache::addManifoldPoint(const b3Vector3& newPoint) +{ + b3Assert(validContactDistance(newPoint)); + + int insertIndex = getNumContacts(); + if (insertIndex == MANIFOLD_CACHE_SIZE) + { +#if MANIFOLD_CACHE_SIZE >= 4 + //sort cache so best points come first, based on area + insertIndex = sortCachedPoints(newPoint); +#else + insertIndex = 0; +#endif + clearUserCache(m_pointCache[insertIndex]); + + } else + { + m_cachedPoints++; + + + } + if (insertIndex<0) + insertIndex=0; + + //b3Assert(m_pointCache[insertIndex].m_userPersistentData==0); + m_pointCache[insertIndex] = newPoint; + return insertIndex; +} + +#endif + +bool b3ContactCache::validContactDistance(const b3Vector3& pt) +{ + return pt.w <= gContactBreakingThreshold; +} + +void b3ContactCache::removeContactPoint(struct b3Contact4Data& newContactCache,int i) +{ + int numContacts = b3Contact4Data_getNumPoints(&newContactCache); + if (i!=(numContacts-1)) + { + b3Swap(newContactCache.m_localPosA[i],newContactCache.m_localPosA[numContacts-1]); + b3Swap(newContactCache.m_localPosB[i],newContactCache.m_localPosB[numContacts-1]); + b3Swap(newContactCache.m_worldPosB[i],newContactCache.m_worldPosB[numContacts-1]); + } + b3Contact4Data_setNumPoints(&newContactCache,numContacts-1); + +} + + +void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& contacts) +{ + + int numContacts = b3Contact4Data_getNumPoints(&contacts); + + + int i; + /// first refresh worldspace positions and distance + for (i=numContacts-1;i>=0;i--) + { + b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); + b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); + contacts.m_worldPosB[i] = worldPosB; + float distance = (worldPosA - worldPosB).dot(contacts.m_worldNormalOnB); + contacts.m_worldPosB[i].w = distance; + } + + /// then + b3Scalar distance2d; + b3Vector3 projectedDifference,projectedPoint; + for (i=numContacts-1;i>=0;i--) + { + b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); + b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); + b3Vector3&pt = contacts.m_worldPosB[i]; + //contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction) + if (!validContactDistance(pt)) + { + removeContactPoint(contacts,i); + } else + { + //contact also becomes invalid when relative movement orthogonal to normal exceeds margin + projectedPoint = worldPosA - contacts.m_worldNormalOnB * contacts.m_worldPosB[i].w; + projectedDifference = contacts.m_worldPosB[i] - projectedPoint; + distance2d = projectedDifference.dot(projectedDifference); + if (distance2d > gContactBreakingThreshold*gContactBreakingThreshold ) + { + removeContactPoint(contacts,i); + } else + { + ////contact point processed callback + //if (gContactProcessedCallback) + // (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1); + } + } + } + + +} + + + + + +#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h new file mode 100644 index 0000000000..d6c9b0a07e --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h @@ -0,0 +1,80 @@ + +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef B3_CONTACT_CACHE_H +#define B3_CONTACT_CACHE_H + + +#include "Bullet3Common/b3Vector3.h" +#include "Bullet3Common/b3Transform.h" +#include "Bullet3Common/b3AlignedAllocator.h" + + +///maximum contact breaking and merging threshold +extern b3Scalar gContactBreakingThreshold; + + + +#define MANIFOLD_CACHE_SIZE 4 + +///b3ContactCache is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase. +///Those contact points are created by the collision narrow phase. +///The cache can be empty, or hold 1,2,3 or 4 points. Some collision algorithms (GJK) might only add one point at a time. +///updates/refreshes old contact points, and throw them away if necessary (distance becomes too large) +///reduces the cache to 4 points, when more then 4 points are added, using following rules: +///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points +///note that some pairs of objects might have more then one contact manifold. +B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache +{ + + + + + /// sort cached points so most isolated points come first + int sortCachedPoints(const b3Vector3& pt); + + + +public: + + B3_DECLARE_ALIGNED_ALLOCATOR(); + + + + int addManifoldPoint( const b3Vector3& newPoint); + + /*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex) + { + b3Assert(validContactDistance(newPoint)); + m_pointCache[insertIndex] = newPoint; + } + */ + + + + static bool validContactDistance(const b3Vector3& pt); + + /// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin + static void refreshContactPoints( const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& newContactCache); + + static void removeContactPoint(struct b3Contact4Data& newContactCache,int i); + + +}; + + + +#endif //B3_CONTACT_CACHE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp new file mode 100644 index 0000000000..fb435aa7fd --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -0,0 +1,4733 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +bool findSeparatingAxisOnGpu = true; +bool splitSearchSepAxisConcave = false; +bool splitSearchSepAxisConvex = true; +bool useMprGpu = true;//use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... +bool bvhTraversalKernelGPU = true; +bool findConcaveSeparatingAxisKernelGPU = true; +bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; +bool clipConvexFacesAndFindContactsCPU = false;//false;//true; +bool reduceConcaveContactsOnGPU = true;//false; +bool reduceConvexContactsOnGPU = true;//false; +bool findConvexClippingFacesGPU = true; +bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false +bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false + + +static int myframecount=0;///for testing + +///This file was written by Erwin Coumans +///Separating axis rest based on work from Pierre Terdiman, see +///And contact clipping based on work from Simon Hobbs + +//#define B3_DEBUG_SAT_FACE + +//#define CHECK_ON_HOST + +#ifdef CHECK_ON_HOST +//#define PERSISTENT_CONTACTS_HOST +#endif + +int b3g_actualSATPairTests=0; + +#include "b3ConvexHullContact.h" +#include <string.h>//memcpy +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" + +#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" +#include "Bullet3Geometry/b3AabbUtil.h" + +typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; + + +#include <float.h> //for FLT_MAX +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +//#include "AdlQuaternion.h" + +#include "kernels/satKernels.h" +#include "kernels/mprKernels.h" + +#include "kernels/satConcaveKernels.h" + +#include "kernels/satClipHullContacts.h" +#include "kernels/bvhTraversal.h" +#include "kernels/primitiveContacts.h" + + +#include "Bullet3Geometry/b3AabbUtil.h" + +#define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" +#define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl" + +#define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" + + +#define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" +#define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" +#define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" + + +#ifndef __global +#define __global +#endif + +#ifndef __kernel +#define __kernel +#endif + + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" + + + +#define dot3F4 b3Dot + +GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) +:m_context(ctx), +m_device(device), +m_queue(q), + +m_findSeparatingAxisKernel(0), +m_findSeparatingAxisVertexFaceKernel(0), +m_findSeparatingAxisEdgeEdgeKernel(0), +m_unitSphereDirections(m_context,m_queue), + +m_totalContactsOut(m_context, m_queue), +m_sepNormals(m_context, m_queue), +m_dmins(m_context,m_queue), + +m_hasSeparatingNormals(m_context, m_queue), +m_concaveSepNormals(m_context, m_queue), +m_concaveHasSeparatingNormals(m_context,m_queue), +m_numConcavePairsOut(m_context, m_queue), + + +m_gpuCompoundPairs(m_context, m_queue), + + +m_gpuCompoundSepNormals(m_context, m_queue), +m_gpuHasCompoundSepNormals(m_context, m_queue), + +m_numCompoundPairsOut(m_context, m_queue) +{ + m_totalContactsOut.push_back(0); + + cl_int errNum=0; + + if (1) + { + const char* mprSrc = mprKernelsCL; + + const char* srcConcave = satConcaveKernelsCL; + char flags[1024]={0}; +//#ifdef CL_PLATFORM_INTEL +// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); +//#endif + m_mprPenetrationKernel = 0; + m_findSeparatingAxisUnitSphereKernel = 0; + + if (useMprGpu) + { + cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); + b3Assert(m_mprPenetrationKernel); + b3Assert(errNum==CL_SUCCESS); + + m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); + b3Assert(m_findSeparatingAxisUnitSphereKernel); + b3Assert(errNum==CL_SUCCESS); + + + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + m_unitSphereDirections.resize(numDirections); + m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); + + + } + + + cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,satKernelsCL,&errNum,flags,BT_NARROWPHASE_SAT_PATH); + b3Assert(errNum==CL_SUCCESS); + + cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcConcave,&errNum,flags,BT_NARROWPHASE_SAT_CONCAVE_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisKernel",&errNum,satProg ); + b3Assert(m_findSeparatingAxisKernel); + b3Assert(errNum==CL_SUCCESS); + + + m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisVertexFaceKernel",&errNum,satProg ); + b3Assert(m_findSeparatingAxisVertexFaceKernel); + + m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisEdgeEdgeKernel",&errNum,satProg ); + b3Assert(m_findSeparatingAxisVertexFaceKernel); + + + m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findConcaveSeparatingAxisKernel",&errNum,satProg ); + b3Assert(m_findConcaveSeparatingAxisKernel); + b3Assert(errNum==CL_SUCCESS); + + m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisVertexFaceKernel",&errNum,satConcaveProg ); + b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); + b3Assert(errNum==CL_SUCCESS); + + m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel",&errNum,satConcaveProg ); + b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); + b3Assert(errNum==CL_SUCCESS); + + + + + m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findCompoundPairsKernel",&errNum,satProg ); + b3Assert(m_findCompoundPairsKernel); + b3Assert(errNum==CL_SUCCESS); + m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "processCompoundPairsKernel",&errNum,satProg ); + b3Assert(m_processCompoundPairsKernel); + b3Assert(errNum==CL_SUCCESS); + } + + if (1) + { + const char* srcClip = satClipKernelsCL; + + char flags[1024]={0}; +//#ifdef CL_PLATFORM_INTEL +// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); +//#endif + + cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + + m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndFindContactsKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + +// m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); + // b3Assert(errNum==CL_SUCCESS); + + m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, + "newContactReductionKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + } + else + { + m_clipHullHullKernel=0; + m_clipCompoundsHullHullKernel = 0; + m_findClippingFacesKernel = 0; + m_newContactReductionKernel=0; + m_clipFacesAndFindContacts = 0; + m_clipHullHullConcaveConvexKernel = 0; +// m_extractManifoldAndAddContactKernel = 0; + } + + if (1) + { + const char* srcBvh = bvhTraversalKernelCL; + cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); + b3Assert(errNum==CL_SUCCESS); + + } + + { + const char* primitiveContactsSrc = primitiveContactsKernelsCL; + cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); + b3Assert(errNum==CL_SUCCESS); + + m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg ); + b3Assert(errNum==CL_SUCCESS); + b3Assert(m_findConcaveSphereContactsKernel); + + m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,""); + b3Assert(errNum==CL_SUCCESS); + b3Assert(m_processCompoundPairsPrimitivesKernel); + + } + + +} + +GpuSatCollision::~GpuSatCollision() +{ + + if (m_findSeparatingAxisVertexFaceKernel) + clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); + + if (m_findSeparatingAxisEdgeEdgeKernel) + clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); + + if (m_findSeparatingAxisUnitSphereKernel) + clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); + + if (m_mprPenetrationKernel) + clReleaseKernel(m_mprPenetrationKernel); + + + if (m_findSeparatingAxisKernel) + clReleaseKernel(m_findSeparatingAxisKernel); + + if (m_findConcaveSeparatingAxisVertexFaceKernel) + clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); + + + if (m_findConcaveSeparatingAxisEdgeEdgeKernel) + clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); + + if (m_findConcaveSeparatingAxisKernel) + clReleaseKernel(m_findConcaveSeparatingAxisKernel); + + if (m_findCompoundPairsKernel) + clReleaseKernel(m_findCompoundPairsKernel); + + if (m_processCompoundPairsKernel) + clReleaseKernel(m_processCompoundPairsKernel); + + if (m_findClippingFacesKernel) + clReleaseKernel(m_findClippingFacesKernel); + + if (m_clipFacesAndFindContacts) + clReleaseKernel(m_clipFacesAndFindContacts); + if (m_newContactReductionKernel) + clReleaseKernel(m_newContactReductionKernel); + if (m_primitiveContactsKernel) + clReleaseKernel(m_primitiveContactsKernel); + + if (m_findConcaveSphereContactsKernel) + clReleaseKernel(m_findConcaveSphereContactsKernel); + + if (m_processCompoundPairsPrimitivesKernel) + clReleaseKernel(m_processCompoundPairsPrimitivesKernel); + + if (m_clipHullHullKernel) + clReleaseKernel(m_clipHullHullKernel); + if (m_clipCompoundsHullHullKernel) + clReleaseKernel(m_clipCompoundsHullHullKernel); + + if (m_clipHullHullConcaveConvexKernel) + clReleaseKernel(m_clipHullHullConcaveConvexKernel); +// if (m_extractManifoldAndAddContactKernel) + // clReleaseKernel(m_extractManifoldAndAddContactKernel); + + if (m_bvhTraversalKernel) + clReleaseKernel(m_bvhTraversalKernel); + +} + +struct MyTriangleCallback : public b3NodeOverlapCallback +{ + int m_bodyIndexA; + int m_bodyIndexB; + + virtual void processNode(int subPart, int triangleIndex) + { + printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); + printf("triangleIndex %d\n", triangleIndex); + } +}; + + +#define float4 b3Vector3 +#define make_float4(x,y,z,w) b3MakeVector3(x,y,z,w) + +float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) +{ + float4 n = planeEqn; + n[3] = 0.f; + float dist = dot3F4(n, point) + planeEqn[3]; + *closestPointOnFace = point - dist * n; + return dist; +} + + + +#define cross3(a,b) (a.cross(b)) +b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) +{ + b3Transform tr; + tr.setIdentity(); + tr.setOrigin(*pos); + tr.setRotation(*orn); + b3Vector3 res = tr(*v); + return res; +} + + +inline bool IsPointInPolygon(const float4& p, + const b3GpuFace* face, + const float4* baseVertex, + const int* convexIndices, + float4* out) +{ + float4 a; + float4 b; + float4 ab; + float4 ap; + float4 v; + + float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); + + if (face->m_numIndices<2) + return false; + + + float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; + b = v0; + + for(unsigned i=0; i != face->m_numIndices; ++i) + { + a = b; + float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; + b = vi; + ab = b-a; + ap = p-a; + v = cross3(ab,plane); + + if (b3Dot(ap, v) > 0.f) + { + float ab_m2 = b3Dot(ab, ab); + float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; + if (rt <= 0.f) + { + *out = a; + } + else if (rt >= 1.f) + { + *out = b; + } + else + { + float s = 1.f - rt; + out[0].x = s * a.x + rt * b.x; + out[0].y = s * a.y + rt * b.y; + out[0].z = s * a.z + rt * b.z; + } + return false; + } + } + return true; +} + +#define normalize3(a) (a.normalize()) + + +int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +{ + if( nPoints == 0 ) + return 0; + + if (nPoints <=4) + return nPoints; + + + if (nPoints >64) + nPoints = 64; + + float4 center = b3MakeVector3(0,0,0,0); + { + + for (int i=0;i<nPoints;i++) + center += p[i]; + center /= (float)nPoints; + } + + + + // sample 4 directions + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + + //keep point with deepest penetration + float minW= FLT_MAX; + + int minIndex=-1; + + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for(int ie = 0; ie<nPoints; ie++ ) + { + if (p[ie].w<minW) + { + minW = p[ie].w; + minIndex=ie; + } + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + if (f<maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4( -u, r ); + if (f<maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + + f = dot3F4( v, r ); + if (f<maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4( -v, r ); + if (f<maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; + +} + + + +#define MAX_VERTS 1024 + + +inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max) +{ + min = FLT_MAX; + max = -FLT_MAX; + int numVerts = hull.m_numVertices; + + const float4 localDir = b3QuatRotate(orn.inverse(),dir); + + b3Scalar offset = dot3F4(pos,dir); + + for(int i=0;i<numVerts;i++) + { + //b3Vector3 pt = trans * vertices[m_vertexOffset+i]; + //b3Scalar dp = pt.dot(dir); + //b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; + b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset+i],localDir); + //b3Assert(dp==dpL); + if(dp < min) min = dp; + if(dp > max) max = dp; + } + if(min>max) + { + b3Scalar tmp = min; + min = max; + max = tmp; + } + min += offset; + max += offset; +} + + +static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA,const b3Quaternion& ornA, + const float4& posB,const b3Quaternion& ornB, + const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth) +{ + b3Scalar Min0,Max0; + b3Scalar Min1,Max1; + project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0); + project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + b3Scalar d0 = Max0 - Min1; + assert(d0>=0.0f); + b3Scalar d1 = Max1 - Min0; + assert(d1>=0.0f); + depth = d0<d1 ? d0:d1; + return true; +} + +inline bool IsAlmostZero(const b3Vector3& v) +{ + if(fabsf(v.x)>1e-6 || fabsf(v.y)>1e-6 || fabsf(v.z)>1e-6) return false; + return true; +} + + +static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA1, + const b3Quaternion& ornA, + const float4& posB1, + const b3Quaternion& ornB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, + const b3AlignedObjectArray<b3GpuFace>& facesA, + const b3AlignedObjectArray<int>& indicesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, + const b3AlignedObjectArray<b3GpuFace>& facesB, + const b3AlignedObjectArray<int>& indicesB, + + b3Vector3& sep) +{ + B3_PROFILE("findSeparatingAxis"); + + b3g_actualSATPairTests++; + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; +//#ifdef TEST_INTERNAL_OBJECTS + float4 c0local = (float4&)hullA.m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = (float4&)hullB.m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 deltaC2 = c0 - c1; +//#endif + + b3Scalar dmin = FLT_MAX; + int curPlaneTests=0; + + int numFacesA = hullA.m_numFaces; + // Test normals from hullA + for(int i=0;i<numFacesA;i++) + { + const float4& normal = (float4&)facesA[hullA.m_faceOffset+i].m_plane; + float4 faceANormalWS = b3QuatRotate(ornA,normal); + + if (dot3F4(deltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + + curPlaneTests++; +#ifdef TEST_INTERNAL_OBJECTS + gExpectedNbTests++; + if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) + continue; + gActualNbTests++; +#endif + + + b3Scalar d; + if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,faceANormalWS, verticesA, verticesB,d)) + return false; + + if(d<dmin) + { + dmin = d; + sep = (b3Vector3&)faceANormalWS; + } + } + + int numFacesB = hullB.m_numFaces; + // Test normals from hullB + for(int i=0;i<numFacesB;i++) + { + float4 normal = (float4&)facesB[hullB.m_faceOffset+i].m_plane; + float4 WorldNormal = b3QuatRotate(ornB, normal); + + if (dot3F4(deltaC2,WorldNormal)<0) + { + WorldNormal*=-1.f; + } + curPlaneTests++; +#ifdef TEST_INTERNAL_OBJECTS + gExpectedNbTests++; + if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin)) + continue; + gActualNbTests++; +#endif + + b3Scalar d; + if(!TestSepAxis(hullA, hullB,posA,ornA,posB,ornB,WorldNormal,verticesA,verticesB,d)) + return false; + + if(d<dmin) + { + dmin = d; + sep = (b3Vector3&)WorldNormal; + } + } + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0<hullA.m_numUniqueEdges;e0++) + { + const float4& edge0 = (float4&) uniqueEdgesA[hullA.m_uniqueEdgesOffset+e0]; + float4 edge0World = b3QuatRotate(ornA,(float4&)edge0); + + for(int e1=0;e1<hullB.m_numUniqueEdges;e1++) + { + const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset+e1]; + float4 edge1World = b3QuatRotate(ornB,(float4&)edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero((b3Vector3&)crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(deltaC2,crossje)<0) + crossje*=-1.f; + + +#ifdef TEST_INTERNAL_OBJECTS + gExpectedNbTests++; + if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin)) + continue; + gActualNbTests++; +#endif + + b3Scalar dist; + if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,crossje, verticesA,verticesB,dist)) + return false; + + if(dist<dmin) + { + dmin = dist; + sep = (b3Vector3&)crossje; + } + } + } + + } + + + if((dot3F4(-deltaC2,(float4&)sep))>0.0f) + sep = -sep; + + return true; +} + + +bool findSeparatingAxisEdgeEdge( __global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, + const b3Float4& posA1, + const b3Quat& ornA, + const b3Float4& posB1, + const b3Quat& ornB, + const b3Float4& DeltaC2, + __global const b3AlignedObjectArray<float4>& vertices, + __global const b3AlignedObjectArray<float4>& uniqueEdges, + __global const b3AlignedObjectArray<b3GpuFace>& faces, + __global const b3AlignedObjectArray<int>& indices, + float4* sep, + float* dmin) +{ +// int i = get_global_id(0); + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + //int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = b3QuatRotate(ornA,edge0); + + for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = b3QuatRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje*=-1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + project(*hullA,posA,ornA,crossje,vertices, Min0, Max0); + project(*hullB,posB,ornB,crossje,vertices, Min1, Max1); + + if(Max0<Min1 || Max1<Min0) + result = false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + dist = d0<d1 ? d0:d1; + result = true; + + } + + + if(dist<*dmin) + { + *dmin = dist; + *sep = crossje; + } + } + } + + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + +__inline float4 lerp3(const float4& a,const float4& b, float t) +{ + return b3MakeVector3( a.x + (b.x - a.x) * t, + a.y + (b.y - a.y) * t, + a.z + (b.z - a.z) * t, + 0.f); +} + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float planeEqWS, float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; + if (numVertsIn < 2) + return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + +int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, + const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, + //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = b3MakeVector3( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = b3QuatRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face +// int numContacts = numWorldVertsB1; + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; + const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = b3QuatRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;i<numVertsIn;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + if (numContactsOut<contactCapacity) + { + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + //resultOut.addContactPoint(separatingNormal,point,depth); + contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + //printf("depth=%f\n",depth); + } + } else + { + b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut,contactCapacity); + } + } + } + + return numContactsOut; +} + + + +static int clipHullAgainstHull(const float4& separatingNormal, + const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA, const b3Quaternion& ornA,const float4& posB, const b3Quaternion& ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, + const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB, + + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + B3_PROFILE("clipHullAgainstHull"); + +// float curMaxDist=maxDist; + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + //B3_PROFILE("closestFaceB"); + if (hullB.m_numFaces!=1) + { + //printf("wtf\n"); + } + static bool once = true; + //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); + + for(int face=0;face<hullB.m_numFaces;face++) + { +#ifdef BT_DEBUG_SAT_FACE + if (once) + printf("face %d\n",face); + const b3GpuFace* faceB = &facesB[hullB.m_faceOffset+face]; + if (once) + { + for (int i=0;i<faceB->m_numIndices;i++) + { + float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]]; + printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z); + } + } +#endif //BT_DEBUG_SAT_FACE + //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) + { + const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, + facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = b3QuatRotate(ornB, Normal); +#ifdef BT_DEBUG_SAT_FACE + if (once) + printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z); +#endif + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + once = false; + } + + + b3Assert(closestFaceB>=0); + { + //B3_PROFILE("worldVertsB1"); + const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4& b = verticesB[hullB.m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + //B3_PROFILE("clipFaceAgainstHull"); + numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, + verticesA, facesA, indicesA, + contactsOut,contactCapacity); + } + + return numContactsOut; +} + + + + + + +#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; +#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} +#define REDUCE_MAX(v, n) {int i=0;\ +for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } +#define REDUCE_MIN(v, n) {int i=0;\ +for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } + +int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +{ + if( nPoints == 0 ) + return 0; + + if (nPoints <=4) + return nPoints; + + + if (nPoints >64) + nPoints = 64; + + float4 center = make_float4(0,0,0,0); + { + + for (int i=0;i<nPoints;i++) + center += p[i]; + center /= (float)nPoints; + } + + + + // sample 4 directions + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + + //keep point with deepest penetration + float minW= FLT_MAX; + + int minIndex=-1; + + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for(int ie = 0; ie<nPoints; ie++ ) + { + if (p[ie].w<minW) + { + minW = p[ie].w; + minIndex=ie; + } + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + if (f<maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4( -u, r ); + if (f<maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + + f = dot3F4( v, r ); + if (f<maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4( -v, r ); + if (f<maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; + +} + + + + +int clipHullHullSingle( + int bodyIndexA, int bodyIndexB, + const float4& posA, + const b3Quaternion& ornA, + const float4& posB, + const b3Quaternion& ornB, + + int collidableIndexA, int collidableIndexB, + + const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, + b3AlignedObjectArray<b3Contact4>* globalContactOut, + int& nContacts, + + const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, + const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, + + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, + const b3AlignedObjectArray<b3GpuFace>& facesA, + const b3AlignedObjectArray<int>& indicesA, + + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, + const b3AlignedObjectArray<b3GpuFace>& facesB, + const b3AlignedObjectArray<int>& indicesB, + + const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, + const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, + const b3Vector3& sepNormalWorldSpace, + int maxContactCapacity ) +{ + int contactIndex = -1; + b3ConvexPolyhedronData hullA, hullB; + + b3Collidable colA = hostCollidablesA[collidableIndexA]; + hullA = hostConvexDataA[colA.m_shapeIndex]; + //printf("numvertsA = %d\n",hullA.m_numVertices); + + + b3Collidable colB = hostCollidablesB[collidableIndexB]; + hullB = hostConvexDataB[colB.m_shapeIndex]; + //printf("numvertsB = %d\n",hullB.m_numVertices); + + + float4 contactsOut[MAX_VERTS]; + int localContactCapacity = MAX_VERTS; + +#ifdef _WIN32 + b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); + b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); +#endif + + + { + + float4 worldVertsB1[MAX_VERTS]; + float4 worldVertsB2[MAX_VERTS]; + int capacityWorldVerts = MAX_VERTS; + + float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); + int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; + int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; + + b3Scalar minDist = -1; + b3Scalar maxDist = 0.; + + + + b3Transform trA,trB; + { + //B3_PROFILE("transform computation"); + //trA.setIdentity(); + trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); + trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); + + //trB.setIdentity(); + trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); + trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); + } + + b3Quaternion trAorn = trA.getRotation(); + b3Quaternion trBorn = trB.getRotation(); + + int numContactsOut = clipHullAgainstHull(hostNormal, + hostConvexDataA.at(shapeA), + hostConvexDataB.at(shapeB), + (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, + (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + verticesA, facesA,indicesA, + verticesB, facesB,indicesB, + + contactsOut,localContactCapacity); + + if (numContactsOut>0) + { + B3_PROFILE("overlap"); + + float4 normalOnSurfaceB = (float4&)hostNormal; + + b3Int4 contactIdx; + contactIdx.x = 0; + contactIdx.y = 1; + contactIdx.z = 2; + contactIdx.w = 3; + + int numPoints = 0; + + { + // B3_PROFILE("extractManifold"); + numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); + } + + b3Assert(numPoints); + + if (nContacts<maxContactCapacity) + { + contactIndex = nContacts; + globalContactOut->expand(); + b3Contact4& contact = globalContactOut->at(nContacts); + contact.m_batchIdx = 0;//i; + contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; + contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; + + contact.m_frictionCoeffCmp = 45874; + contact.m_restituitionCoeffCmp = 0; + + // float distance = 0.f; + for (int p=0;p<numPoints;p++) + { + contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];//check if it is actually on B + contact.m_worldNormalOnB = normalOnSurfaceB; + } + //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); + contact.m_worldNormalOnB.w = (b3Scalar)numPoints; + nContacts++; + } else + { + b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts,maxContactCapacity); + } + } + } + return contactIndex; +} + + + + + +void computeContactPlaneConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) +{ + + int shapeIndex = collidables[collidableIndexB].m_shapeIndex; + const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex]; + + b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; + b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; + b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; + b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; + +// int numContactsOut = 0; +// int numWorldVertsB1= 0; + + b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; + b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); + b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); + float planeConstant = planeEq.w; + b3Transform convexWorldTransform; + convexWorldTransform.setIdentity(); + convexWorldTransform.setOrigin(posB); + convexWorldTransform.setRotation(ornB); + b3Transform planeTransform; + planeTransform.setIdentity(); + planeTransform.setOrigin(posA); + planeTransform.setRotation(ornA); + + b3Transform planeInConvex; + planeInConvex= convexWorldTransform.inverse() * planeTransform; + b3Transform convexInPlane; + convexInPlane = planeTransform.inverse() * convexWorldTransform; + + b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; + float maxDot = -1e30; + int hitVertex=-1; + b3Vector3 hitVtx; + +#define MAX_PLANE_CONVEX_POINTS 64 + + b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; + int numPoints = 0; + + b3Int4 contactIdx; + contactIdx.s[0] = 0; + contactIdx.s[1] = 1; + contactIdx.s[2] = 2; + contactIdx.s[3] = 3; + + for (int i=0;i<hullB->m_numVertices;i++) + { + b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; + float curDot = vtx.dot(planeNormalInConvex); + + + if (curDot>maxDot) + { + hitVertex=i; + maxDot=curDot; + hitVtx = vtx; + //make sure the deepest points is always included + if (numPoints==MAX_PLANE_CONVEX_POINTS) + numPoints--; + } + + if (numPoints<MAX_PLANE_CONVEX_POINTS) + { + b3Vector3 vtxWorld = convexWorldTransform*vtx; + b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; + float dist = planeNormal.dot(vtxInPlane)-planeConstant; + if (dist<0.f) + { + vtxWorld.w = dist; + contactPoints[numPoints] = vtxWorld; + numPoints++; + } + } + + } + + int numReducedPoints = 0; + + numReducedPoints = numPoints; + + if (numPoints>4) + { + numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + } + int dstIdx; +// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (numReducedPoints>0) + { + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx=nGlobalContactsOut; + nGlobalContactsOut++; + + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -planeNormalWorld; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + for (int i=0;i<numReducedPoints;i++) + { + b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; + c->m_worldPosB[i] = pOnB1; + } + c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; + }//if (dstIdx < numPairs) + } + + + +// printf("computeContactPlaneConvex\n"); +} + + + +B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) + { + b3Vector3 vecOut; + vecOut.setValue( + (b3Scalar)(vecIn[0]) / (quantization.x), + (b3Scalar)(vecIn[1]) / (quantization.y), + (b3Scalar)(vecIn[2]) / (quantization.z)); + vecOut += bvhAabbMin; + return vecOut; + } + +void traverseTreeTree() +{ + +} + +#include "Bullet3Common/shared/b3Mat3x3.h" + +int numAabbChecks = 0; +int maxNumAabbChecks = 0; +int maxDepth = 0; + +// work-in-progress +__kernel void findCompoundPairsKernel( + int pairIndex, + int bodyIndexA, + int bodyIndexB, + int collidableIndexA, + int collidableIndexB, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray<b3Float4>& vertices, + __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, + __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace, + __global const b3GpuChildShape* gpuChildShapes, + __global b3Int4* gpuCompoundPairsOut, + __global int* numCompoundPairsOut, + int maxNumCompoundPairsCapacity, + b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, + b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, + b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU + ) +{ + numAabbChecks=0; + maxNumAabbChecks=0; +// int i = pairIndex; + { + + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; + int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; + int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees; + int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; + int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; + + + int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + + b3Transform transA; + transA.setIdentity(); + transA.setOrigin(posA); + transA.setRotation(ornA); + + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + b3Transform transB; + transB.setIdentity(); + transB.setOrigin(posB); + transB.setRotation(ornB); + + + + for (int p=0;p<numSubTreesA;p++) + { + b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA+p]; + //bvhInfoCPU[bvhA].m_quantization + b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + + b3Vector3 aabbAMinOut,aabbAMaxOut; + float margin=0.f; + b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + + for (int q=0;q<numSubTreesB;q++) + { + b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB+q]; + + b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + + b3Vector3 aabbBMinOut,aabbBMaxOut; + float margin=0.f; + b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + + + numAabbChecks=0; + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + if (aabbOverlap) + { + + int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfoCPU[bvhA].m_nodeOffset; + // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; + + int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfoCPU[bvhB].m_nodeOffset; + // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; + + b3AlignedObjectArray<b3Int2> nodeStack; + b3Int2 node0; + node0.x = startNodeIndexA; + node0.y = startNodeIndexB; + + int maxStackDepth = 1024; + nodeStack.resize(maxStackDepth); + int depth=0; + nodeStack[depth++]=node0; + + do + { + if (depth > maxDepth) + { + maxDepth=depth; + printf("maxDepth=%d\n",maxDepth); + } + b3Int2 node = nodeStack[--depth]; + + b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + + b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + + float margin=0.f; + b3Vector3 aabbAMinOut,aabbAMaxOut; + b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + + b3Vector3 aabbBMinOut,aabbBMaxOut; + b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + + numAabbChecks++; + bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + if (nodeOverlap) + { + bool isLeafA = treeNodesCPU[node.x].isLeafNode(); + bool isLeafB = treeNodesCPU[node.y].isLeafNode(); + bool isInternalA = !isLeafA; + bool isInternalB = !isLeafB; + + //fail, even though it might hit two leaf nodes + if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) + { + b3Error("Error: traversal exceeded maxStackDepth\n"); + continue; + } + + if(isInternalA) + { + int nodeAleftChild = node.x+1; + bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode(); + int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + treeNodesCPU[node.x+1].getEscapeIndex(); + + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); + + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); + } + else + { + nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); + nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); + } + } + else + { + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); + } + else + { + int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex(); + int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex(); + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); + } + } + } + } + } while (depth); + maxNumAabbChecks = b3Max(numAabbChecks,maxNumAabbChecks); + } + } + } + + return; + } + + if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + + if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + + int numChildrenA = collidables[collidableIndexA].m_numChildShapes; + for (int c=0;c<numChildrenA;c++) + { + int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; + int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; + b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + + + + b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; + + + b3Transform transA; + transA.setIdentity(); + transA.setOrigin(newPosA); + transA.setRotation(newOrnA); + b3Scalar margin=0.0f; + + b3Vector3 aabbAMinOut,aabbAMaxOut; + + b3TransformAabb2((const b3Float4&)aabbA.m_min,(const b3Float4&)aabbA.m_max, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + + if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + for (int b=0;b<numChildrenB;b++) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + + + + b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; + + b3Transform transB; + transB.setIdentity(); + transB.setOrigin(newPosB); + transB.setRotation(newOrnB); + + b3Vector3 aabbBMinOut,aabbBMaxOut; + b3TransformAabb2((const b3Float4&)aabbB.m_min,(const b3Float4&)aabbB.m_max, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + + numAabbChecks++; + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + if (aabbOverlap) + { + /* + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + float4 posA = newPosA; + posA.w = 0.f; + float4 posB = newPosB; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + b3Quat ornA = newOrnA; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + b3Quat ornB =newOrnB; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + */ + {// + int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); + } + }// + }//fi(1) + } //for (int b=0 + }//if (collidables[collidableIndexB]. + else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + if (1) + { + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + // float dmin = FLT_MAX; + float4 posA = newPosA; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + b3Quat ornA = newOrnA; + float4 c0; + c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 c1; + c1 = transform(&c1local,&posB,&ornB); + // const float4 DeltaC2 = c0 - c1; + + { + int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,-1); + }//if (compoundPairIdx<maxNumCompoundPairsCapacity) + }// + }//fi (1) + }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + }//for (int b=0;b<numChildrenB;b++) + return; + }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) + && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + for (int b=0;b<numChildrenB;b++) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; + b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + + int shapeIndexB = collidables[childColIndexB].m_shapeIndex; + + + ////////////////////////////////////// + + if (1) + { + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + // float dmin = FLT_MAX; + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = newPosB; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + float4 c0; + c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + b3Quat ornB =newOrnB; + float4 c1; + c1 = transform(&c1local,&posB,&ornB); + // const float4 DeltaC2 = c0 - c1; + {// + int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,-1,childShapeIndexB); + }//fi (compoundPairIdx<maxNumCompoundPairsCapacity) + }// + }//fi (1) + }//for (int b=0;b<numChildrenB;b++) + return; + }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + return; + }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + }//i<numPairs +} + + + +__kernel void processCompoundPairsKernel( __global const b3Int4* gpuCompoundPairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray<b3Float4>& vertices, + __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, + __global const b3AlignedObjectArray<b3GpuFace>& faces, + __global const b3AlignedObjectArray<int>& indices, + __global b3Aabb* aabbs, + __global const b3GpuChildShape* gpuChildShapes, + __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, + __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, + int numCompoundPairs, + int i + ) +{ + +// int i = get_global_id(0); + if (i<numCompoundPairs) + { + int bodyIndexA = gpuCompoundPairs[i].x; + int bodyIndexB = gpuCompoundPairs[i].y; + + int childShapeIndexA = gpuCompoundPairs[i].z; + int childShapeIndexB = gpuCompoundPairs[i].w; + + int collidableIndexA = -1; + int collidableIndexB = -1; + + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + float4 posA = rigidBodies[bodyIndexA].m_pos; + + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + if (childShapeIndexA >= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; + b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; + b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + gpuHasCompoundSepNormalsOut[i] = 0; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int shapeTypeA = collidables[collidableIndexA].m_shapeType; + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + + + if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) + { + return; + } + + int hasSeparatingAxis = 5; + + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + posA.w = 0.f; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal = make_float4(1,0,0,0); +// bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); + + hasSeparatingAxis = 4; + if (!sepA) + { + hasSeparatingAxis = 0; + } else + { + bool sepB = findSeparatingAxis( convexShapes[shapeIndexB],convexShapes[shapeIndexA],posB,ornB,posA,ornA,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); + + if (!sepB) + { + hasSeparatingAxis = 0; + } else//(!sepB) + { + bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + if (sepEE) + { + gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); + gpuHasCompoundSepNormalsOut[i] = 1; + }//sepEE + }//(!sepB) + }//(!sepA) + + + } + +} + + +__kernel void clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundPairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray<b3Float4>& vertices, + __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, + __global const b3AlignedObjectArray<b3GpuFace>& faces, + __global const b3AlignedObjectArray<int>& indices, + __global const b3GpuChildShape* gpuChildShapes, + __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, + __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, + __global struct b3Contact4Data* globalContactsOut, + int* nGlobalContactsOut, + int numCompoundPairs, int maxContactCapacity, int i) +{ + +// int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.0f; + + if (i<numCompoundPairs) + { + + if (gpuHasCompoundSepNormalsOut[i]) + { + + int bodyIndexA = gpuCompoundPairs[i].x; + int bodyIndexB = gpuCompoundPairs[i].y; + + int childShapeIndexA = gpuCompoundPairs[i].z; + int childShapeIndexB = gpuCompoundPairs[i].w; + + int collidableIndexA = -1; + int collidableIndexB = -1; + + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + float4 posA = rigidBodies[bodyIndexA].m_pos; + + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + if (childShapeIndexA >= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; + b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; + b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], + convexShapes[shapeIndexA], convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -gpuCompoundSepNormalsOut[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + b3Int4 contactIdx;// = {-1,-1,-1,-1}; + + contactIdx.s[0] = 0; + contactIdx.s[1] = 1; + contactIdx.s[2] = 2; + contactIdx.s[3] = 3; + + int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); + + int dstIdx; + dstIdx = b3AtomicInc( nGlobalContactsOut); + if ((dstIdx+nReducedContacts) < maxContactCapacity) + { + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = gpuCompoundPairs[pairIndex].x; + int bodyB = gpuCompoundPairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = childShapeIndexA; + c->m_childIndexB = childShapeIndexB; + for (int i=0;i<nReducedContacts;i++) + { + c->m_worldPosB[i] = pointsIn[contactIdx.s[i]]; + } + b3Contact4Data_setNumPoints(c,nReducedContacts); + } + + }// if (numContactsOut>0) + }// if (gpuHasCompoundSepNormalsOut[i]) + }// if (i<numCompoundPairs) + +} + + +void computeContactCompoundCompound(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3GpuChildShape* cpuChildShapes, + const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, + const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, + + const b3AlignedObjectArray<b3Vector3>& convexVertices, + const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, + const b3AlignedObjectArray<int>& convexIndices, + const b3AlignedObjectArray<b3GpuFace>& faces, + + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity, + b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, + b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, + b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU + ) +{ + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); + + b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; + int numCompoundPairsOut=0; + int maxNumCompoundPairsCapacity = 8192;//1024; + cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); + + // work-in-progress + findCompoundPairsKernel( + pairIndex, + bodyIndexA,bodyIndexB, + collidableIndexA,collidableIndexB, + rigidBodies, + collidables, + convexShapes, + convexVertices, + hostAabbsWorldSpace, + hostAabbsLocalSpace, + cpuChildShapes, + &cpuCompoundPairsOut[0], + &numCompoundPairsOut, + maxNumCompoundPairsCapacity , + treeNodesCPU, + subTreesCPU, + bvhInfoCPU + ); + + printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); + if (numCompoundPairsOut>maxNumCompoundPairsCapacity) + { + b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); + numCompoundPairsOut=maxNumCompoundPairsCapacity; + } + b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut; + b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut; + cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); + cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); + + for (int i=0;i<numCompoundPairsOut;i++) + { + + processCompoundPairsKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,0,cpuChildShapes, + cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,numCompoundPairsOut,i); + } + + for (int i=0;i<numCompoundPairsOut;i++) + { + clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,cpuChildShapes, + cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,globalContactsOut,&nGlobalContactsOut,numCompoundPairsOut,maxContactCapacity,i); + } + /* + int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; + b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + + int shapeIndexA = collidables[childColIndexA].m_shapeIndex; + + + bool foundSepAxis = findSeparatingAxis(hullA,hullB, + posA, + ornA, + posB, + ornB, + + convexVertices,uniqueEdges,faces,convexIndices, + convexVertices,uniqueEdges,faces,convexIndices, + + sepNormalWorldSpace + ); + */ + + + /* + if (foundSepAxis) + { + + + contactIndex = clipHullHullSingle( + bodyIndexA, bodyIndexB, + posA,ornA, + posB,ornB, + collidableIndexA, collidableIndexB, + &rigidBodies, + &globalContactsOut, + nGlobalContactsOut, + + convexShapes, + convexShapes, + + convexVertices, + uniqueEdges, + faces, + convexIndices, + + convexVertices, + uniqueEdges, + faces, + convexIndices, + + collidables, + collidables, + sepNormalWorldSpace, + maxContactCapacity); + + } + */ + +// return contactIndex; + + /* + + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + for (int c=0;c<numChildrenB;c++) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; + int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; + + float4 rootPosB = rigidBodies[bodyIndexB].m_pos; + b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; + b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; + b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; + float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; + b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB); + + int shapeIndexB = collidables[childColIndexB].m_shapeIndex; + + const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; + + } + */ + +} + +void computeContactPlaneCompound(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3GpuChildShape* cpuChildShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) +{ + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); + + + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + for (int c=0;c<numChildrenB;c++) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; + int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; + + float4 rootPosB = rigidBodies[bodyIndexB].m_pos; + b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; + b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; + b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; + float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; + b3Quaternion ornB = rootOrnB*childOrnB;//b3QuatMul(ornB,childOrnB); + + int shapeIndexB = collidables[childColIndexB].m_shapeIndex; + + const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; + + + b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; + b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; + + // int numContactsOut = 0; + // int numWorldVertsB1= 0; + + b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; + b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); + b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); + float planeConstant = planeEq.w; + b3Transform convexWorldTransform; + convexWorldTransform.setIdentity(); + convexWorldTransform.setOrigin(posB); + convexWorldTransform.setRotation(ornB); + b3Transform planeTransform; + planeTransform.setIdentity(); + planeTransform.setOrigin(posA); + planeTransform.setRotation(ornA); + + b3Transform planeInConvex; + planeInConvex= convexWorldTransform.inverse() * planeTransform; + b3Transform convexInPlane; + convexInPlane = planeTransform.inverse() * convexWorldTransform; + + b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; + float maxDot = -1e30; + int hitVertex=-1; + b3Vector3 hitVtx; + + #define MAX_PLANE_CONVEX_POINTS 64 + + b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; + int numPoints = 0; + + b3Int4 contactIdx; + contactIdx.s[0] = 0; + contactIdx.s[1] = 1; + contactIdx.s[2] = 2; + contactIdx.s[3] = 3; + + for (int i=0;i<hullB->m_numVertices;i++) + { + b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; + float curDot = vtx.dot(planeNormalInConvex); + + + if (curDot>maxDot) + { + hitVertex=i; + maxDot=curDot; + hitVtx = vtx; + //make sure the deepest points is always included + if (numPoints==MAX_PLANE_CONVEX_POINTS) + numPoints--; + } + + if (numPoints<MAX_PLANE_CONVEX_POINTS) + { + b3Vector3 vtxWorld = convexWorldTransform*vtx; + b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; + float dist = planeNormal.dot(vtxInPlane)-planeConstant; + if (dist<0.f) + { + vtxWorld.w = dist; + contactPoints[numPoints] = vtxWorld; + numPoints++; + } + } + + } + + int numReducedPoints = 0; + + numReducedPoints = numPoints; + + if (numPoints>4) + { + numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + } + int dstIdx; + // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (numReducedPoints>0) + { + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx=nGlobalContactsOut; + nGlobalContactsOut++; + + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -planeNormalWorld; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + for (int i=0;i<numReducedPoints;i++) + { + b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; + c->m_worldPosB[i] = pOnB1; + } + c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; + }//if (dstIdx < numPairs) + } + + } + + +} + + + + + +void computeContactSphereConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) +{ + + float radius = collidables[collidableIndexA].m_radius; + float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; + b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; + + + + float4 pos = rigidBodies[bodyIndexB].m_pos; + + + b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; + + b3Transform tr; + tr.setIdentity(); + tr.setOrigin(pos); + tr.setRotation(quat); + b3Transform trInv = tr.inverse(); + + float4 spherePos = trInv(spherePos1); + + int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; + int shapeIndex = collidables[collidableIndex].m_shapeIndex; + int numFaces = convexShapes[shapeIndex].m_numFaces; + float4 closestPnt = b3MakeVector3(0, 0, 0, 0); +// float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); + float minDist = -1000000.f; // TODO: What is the largest/smallest float? + bool bCollide = true; + int region = -1; + float4 localHitNormal; + for ( int f = 0; f < numFaces; f++ ) + { + b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; + float4 planeEqn; + float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); + planeEqn = n1; + planeEqn[3] = face.m_plane.w; + + float4 pntReturn; + float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); + + if ( dist > radius) + { + bCollide = false; + break; + } + + if ( dist > 0 ) + { + //might hit an edge or vertex + b3Vector3 out; + + bool isInPoly = IsPointInPolygon(spherePos, + &face, + &convexVertices[convexShapes[shapeIndex].m_vertexOffset], + convexIndices, + &out); + if (isInPoly) + { + if (dist>minDist) + { + minDist = dist; + closestPnt = pntReturn; + localHitNormal = planeEqn; + region=1; + } + } else + { + b3Vector3 tmp = spherePos-out; + b3Scalar l2 = tmp.length2(); + if (l2<radius*radius) + { + dist = b3Sqrt(l2); + if (dist>minDist) + { + minDist = dist; + closestPnt = out; + localHitNormal = tmp/dist; + region=2; + } + + } else + { + bCollide = false; + break; + } + } + } + else + { + if ( dist > minDist ) + { + minDist = dist; + closestPnt = pntReturn; + localHitNormal = planeEqn; + region=3; + } + } + } + static int numChecks = 0; + numChecks++; + + if (bCollide && minDist > -10000) + { + + float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld; + float4 pOnB1 = tr(closestPnt); + //printf("dist ,%f,",minDist); + float actualDepth = minDist-radius; + if (actualDepth<0) + { + //printf("actualDepth = ,%f,", actualDepth); + //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); + //printf("region=,%d,\n", region); + pOnB1[3] = actualDepth; + + int dstIdx; +// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx=nGlobalContactsOut; + nGlobalContactsOut++; + + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = normalOnSurfaceB1; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_worldPosB[0] = pOnB1; + int numPoints = 1; + c->m_worldNormalOnB.w = (b3Scalar)numPoints; + }//if (dstIdx < numPairs) + } + }//if (hasCollision) + +} + + + + +int computeContactConvexConvex2( + int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, + const b3AlignedObjectArray<b3Collidable>& collidables, + const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, + const b3AlignedObjectArray<b3Vector3>& convexVertices, + const b3AlignedObjectArray<b3Vector3>& uniqueEdges, + const b3AlignedObjectArray<int>& convexIndices, + const b3AlignedObjectArray<b3GpuFace>& faces, + b3AlignedObjectArray<b3Contact4>& globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity, + const b3AlignedObjectArray<b3Contact4>& oldContacts + ) +{ + int contactIndex = -1; + b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; + b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; + b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; + b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; + + + b3ConvexPolyhedronData hullA, hullB; + + b3Vector3 sepNormalWorldSpace; + + + + b3Collidable colA = collidables[collidableIndexA]; + hullA = convexShapes[colA.m_shapeIndex]; + //printf("numvertsA = %d\n",hullA.m_numVertices); + + + b3Collidable colB = collidables[collidableIndexB]; + hullB = convexShapes[colB.m_shapeIndex]; + //printf("numvertsB = %d\n",hullB.m_numVertices); + +// int contactCapacity = MAX_VERTS; + //int numContactsOut=0; + + +#ifdef _WIN32 + b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); + b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); +#endif + + bool foundSepAxis = findSeparatingAxis(hullA,hullB, + posA, + ornA, + posB, + ornB, + + convexVertices,uniqueEdges,faces,convexIndices, + convexVertices,uniqueEdges,faces,convexIndices, + + sepNormalWorldSpace + ); + + + if (foundSepAxis) + { + + + contactIndex = clipHullHullSingle( + bodyIndexA, bodyIndexB, + posA,ornA, + posB,ornB, + collidableIndexA, collidableIndexB, + &rigidBodies, + &globalContactsOut, + nGlobalContactsOut, + + convexShapes, + convexShapes, + + convexVertices, + uniqueEdges, + faces, + convexIndices, + + convexVertices, + uniqueEdges, + faces, + convexIndices, + + collidables, + collidables, + sepNormalWorldSpace, + maxContactCapacity); + + } + + return contactIndex; +} + + + + + + + +void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, + const b3OpenCLArray<b3RigidBodyData>* bodyBuf, + b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, + const b3OpenCLArray<b3Contact4>* oldContacts, + int maxContactCapacity, + int compoundPairCapacity, + const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, + const b3OpenCLArray<b3Vector3>& gpuVertices, + const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, + const b3OpenCLArray<b3GpuFace>& gpuFaces, + const b3OpenCLArray<int>& gpuIndices, + const b3OpenCLArray<b3Collidable>& gpuCollidables, + const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, + + const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, + const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, + + b3OpenCLArray<b3Vector3>& worldVertsB1GPU, + b3OpenCLArray<b3Int4>& clippingFacesOutGPU, + b3OpenCLArray<b3Vector3>& worldNormalsAGPU, + b3OpenCLArray<b3Vector3>& worldVertsA1GPU, + b3OpenCLArray<b3Vector3>& worldVertsB2GPU, + b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, + b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, + b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, + b3OpenCLArray<b3BvhInfo>* bvhInfo, + + int numObjects, + int maxTriConvexPairCapacity, + b3OpenCLArray<b3Int4>& triangleConvexPairsOut, + int& numTriConvexPairsOut + ) +{ + myframecount++; + + if (!nPairs) + return; + +#ifdef CHECK_ON_HOST + + + b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + + b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + + b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); + + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + + + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray<b3Vector3> hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs; + + b3AlignedObjectArray<b3Contact4> hostContacts; + if (nContacts) + { + contactOut->copyToHost(hostContacts); + } + + b3AlignedObjectArray<b3Contact4> oldHostContacts; + + if (oldContacts->size()) + { + oldContacts->copyToHost(oldHostContacts); + } + + + hostContacts.resize(maxContactCapacity); + + for (int i=0;i<nPairs;i++) + { + int bodyIndexA = hostPairs[i].x; + int bodyIndexB = hostPairs[i].y; + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) + { + computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); + } + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) + { + computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); + //printf("convex-sphere\n"); + + } + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) + { + computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); +// printf("convex-plane\n"); + + } + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) + { + computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); +// printf("plane-convex\n"); + + } + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0], + nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU); +// printf("convex-plane\n"); + + } + + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) + { + computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); +// printf("convex-plane\n"); + + } + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], + &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); +// printf("plane-convex\n"); + + } + + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && + hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) + { + //printf("hostPairs[i].z=%d\n",hostPairs[i].z); + int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + + + if (contactIndex>=0) + { +// printf("convex convex contactIndex = %d\n",contactIndex); + hostPairs[i].z = contactIndex; + } +// printf("plane-convex\n"); + + } + + + } + + if (hostPairs.size()) + { + pairs->copyFromHost(hostPairs); + } + + hostContacts.resize(nContacts); + if (nContacts) + { + + contactOut->copyFromHost(hostContacts); + } else + { + contactOut->resize(0); + } + + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + //printf("(HOST) nContacts = %d\n",nContacts); + +#else + + { + if (nPairs) + { + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + + B3_PROFILE("primitiveContactsKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst(maxContactCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + } + } + + +#endif//CHECK_ON_HOST + + B3_PROFILE("computeConvexConvexContactsGPUSAT"); + // printf("nContacts = %d\n",nContacts); + + + m_sepNormals.resize(nPairs); + m_hasSeparatingNormals.resize(nPairs); + + int concaveCapacity=maxTriConvexPairCapacity; + m_concaveSepNormals.resize(concaveCapacity); + m_concaveHasSeparatingNormals.resize(concaveCapacity); + m_numConcavePairsOut.resize(0); + m_numConcavePairsOut.push_back(0); + + + m_gpuCompoundPairs.resize(compoundPairCapacity); + + m_gpuCompoundSepNormals.resize(compoundPairCapacity); + + + m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); + + m_numCompoundPairsOut.resize(0); + m_numCompoundPairsOut.push_back(0); + + int numCompoundPairs = 0; + + int numConcavePairs =0; + + { + clFinish(m_queue); + if (findSeparatingAxisOnGpu) + { + m_dmins.resize(nPairs); + if (splitSearchSepAxisConvex) + { + + + if (useMprGpu) + { + nContacts = m_totalContactsOut.at(0); + { + B3_PROFILE("mprPenetrationKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_mprPenetrationKernel,"mprPenetrationKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + + launcher.setConst(maxContactCapacity); + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + /* + b3AlignedObjectArray<int>hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + b3AlignedObjectArray<b3Vector3>hostSepAxis; + m_sepNormals.copyToHost(hostSepAxis); + */ + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); + if (nContacts>maxContactCapacity) + { + + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + + } + } + + if (1) + { + + if (1) + { + { + B3_PROFILE("findSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + + { + B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()), + b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) + + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numDirections); + launcher.setConst( nPairs ); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } + } + if (useMprGpu) + { + B3_PROFILE("findSeparatingAxisUnitSphereKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + launcher.setConst( numDirections); + + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + } + + + } else + { + B3_PROFILE("findSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + } + else + { + + B3_PROFILE("findSeparatingAxisKernel CPU"); + + + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; + convexData.copyToHost(hostConvexShapeData); + + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray<int> hostHasSepAxis; + hostHasSepAxis.resize(nPairs); + b3AlignedObjectArray<b3Vector3> hostSepAxis; + hostSepAxis.resize(nPairs); + + b3AlignedObjectArray<b3Vector3> hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); + + b3AlignedObjectArray<b3Contact4> hostContacts; + if (nContacts) + { + contactOut->copyToHost(hostContacts); + } + hostContacts.resize(maxContactCapacity); + int nGlobalContactsOut = nContacts; + + + for (int i=0;i<nPairs;i++) + { + + int bodyIndexA = hostPairs[i].x; + int bodyIndexB = hostPairs[i].y; + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + + int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; + + hostHasSepAxis[i] = 0; + + //once the broadphase avoids static-static pairs, we can remove this test + if ((hostBodyBuf[bodyIndexA].m_invMass==0) &&(hostBodyBuf[bodyIndexB].m_invMass==0)) + { + continue; + } + + + if ((hostCollidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(hostCollidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) + { + continue; + } + + float dmin = FLT_MAX; + + b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; + b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; + b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; + b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; + b3Quaternion ornA =hostBodyBuf[bodyIndexA].m_quat; + b3Quaternion ornB =hostBodyBuf[bodyIndexB].m_quat; + + + if (useGjk) + { + + //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR + { + b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; + b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); + b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; + b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); + b3Vector3 DeltaC2 = c0 - c1; + + b3Vector3 sepAxis; + + bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + + if (hasSepAxisA) + { + bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + if (hasSepAxisB) + { + bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin,false); + + if (hasEdgeEdge) + { + hostHasSepAxis[i] = 1; + hostSepAxis[i] = sepAxis; + hostSepAxis[i].w = dmin; + } + } + } + } + + if (hostHasSepAxis[i]) + { + int pairIndex = i; + + bool useMpr = true; + if (useMpr) + { + int res=0; + float depth = 0.f; + b3Vector3 sepAxis2 = b3MakeVector3(1,0,0); + b3Vector3 resultPointOnBWorld = b3MakeVector3(0,0,0); + + float depthOut; + b3Vector3 dirOut; + b3Vector3 posOut; + + + //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); + res = b3MprPenetration(pairIndex,bodyIndexA,bodyIndexB,&hostBodyBuf[0],&hostConvexShapeData[0],&hostCollidables[0],&hostVertices[0],&hostSepAxis[0],&hostHasSepAxis[0],&depthOut,&dirOut,&posOut); + depth = depthOut; + sepAxis2 = b3MakeVector3(-dirOut.x,-dirOut.y,-dirOut.z); + resultPointOnBWorld = posOut; + //hostHasSepAxis[i] = 0; + + + if (res==0) + { + //add point? + //printf("depth = %f\n",depth); + //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); + //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); + + + + float dist=0.f; + + const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; + const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; + + if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) + { + if (depth > dist) + { + float diff = depth - dist; + + static float maxdiff = 0.f; + if (maxdiff < diff) + { + maxdiff = diff; + printf("maxdiff = %20.10f\n",maxdiff); + } + } + } + if (depth > dmin) + { + b3Vector3 oldAxis = hostSepAxis[i]; + depth = dmin; + sepAxis2 = oldAxis; + } + + + + if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) + { + if (depth > dist) + { + float diff = depth - dist; + //printf("?diff = %f\n",diff ); + static float maxdiff = 0.f; + if (maxdiff < diff) + { + maxdiff = diff; + printf("maxdiff = %20.10f\n",maxdiff); + } + } + //this is used for SAT + //hostHasSepAxis[i] = 1; + //hostSepAxis[i] = sepAxis2; + + //add contact point + + //int contactIndex = nGlobalContactsOut; + b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); + nGlobalContactsOut++; + newContact.m_batchIdx = 0;//i; + newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; + newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; + + newContact.m_frictionCoeffCmp = 45874; + newContact.m_restituitionCoeffCmp = 0; + + + static float maxDepth = 0.f; + + if (depth > maxDepth) + { + maxDepth = depth; + printf("MPR maxdepth = %f\n",maxDepth ); + + } + + + resultPointOnBWorld.w = -depth; + newContact.m_worldPosB[0] = resultPointOnBWorld; + //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; + newContact.m_worldNormalOnB = sepAxis2; + newContact.m_worldNormalOnB.w = (b3Scalar)1; + } else + { + printf("rejected\n"); + } + + + } + } else + { + + + + //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + b3AlignedObjectArray<b3Contact4> oldHostContacts; + int result; + result = computeContactConvexConvex2( //hostPairs, + pairIndex, + bodyIndexA, bodyIndexB, + collidableIndexA, collidableIndexB, + hostBodyBuf, + hostCollidables, + hostConvexShapeData, + hostVertices, + hostUniqueEdges, + hostIndices, + hostFaces, + hostContacts, + nGlobalContactsOut, + maxContactCapacity, + oldHostContacts + //hostHasSepAxis, + //hostSepAxis + + ); + }//mpr + }//hostHasSepAxis[i] = 1; + + } else + { + + b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; + b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); + b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; + b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); + b3Vector3 DeltaC2 = c0 - c1; + + b3Vector3 sepAxis; + + bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + + if (hasSepAxisA) + { + bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + if (hasSepAxisB) + { + bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin,true); + + if (hasEdgeEdge) + { + hostHasSepAxis[i] = 1; + hostSepAxis[i] = sepAxis; + } + } + } + } + } + + if (useGjkContacts)//nGlobalContactsOut>0) + { + //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); + nContacts = nGlobalContactsOut; + contactOut->copyFromHost(hostContacts); + + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + } + + m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); + m_sepNormals.copyFromHost(hostSepAxis); + + /* + //double-check results from GPU (comment-out the 'else' so both paths are executed + b3AlignedObjectArray<int> checkHasSepAxis; + m_hasSeparatingNormals.copyToHost(checkHasSepAxis); + static int frameCount = 0; + frameCount++; + for (int i=0;i<nPairs;i++) + { + if (hostHasSepAxis[i] != checkHasSepAxis[i]) + { + printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n", + frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]); + } + } + //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); + // m_sepNormals.copyFromHost(hostSepAxis); + */ + } + + + numCompoundPairs = m_numCompoundPairsOut.at(0); + bool useGpuFindCompoundPairs=true; + if (useGpuFindCompoundPairs) + { + B3_PROFILE("findCompoundPairsKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), + b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), + b3BufferInfoCL(subTreesGPU->getBufferCL()), + b3BufferInfoCL(treeNodesGPU->getBufferCL()), + b3BufferInfoCL(bvhInfo->getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst( compoundPairCapacity); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + numCompoundPairs = m_numCompoundPairsOut.at(0); + //printf("numCompoundPairs =%d\n",numCompoundPairs ); + if (numCompoundPairs) + { + //printf("numCompoundPairs=%d\n",numCompoundPairs); + } + + + } else + { + + + b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + + b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + + b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); + + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + + b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; + cpuCompoundPairsOut.resize(compoundPairCapacity); + + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + + + + + for (int pairIndex=0;pairIndex<nPairs;pairIndex++) + { + int bodyIndexA = hostPairs[pairIndex].x; + int bodyIndexB = hostPairs[pairIndex].y; + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + if (cpuChildShapes.size()) + { + findCompoundPairsKernel( + pairIndex, + bodyIndexA, + bodyIndexB, + collidableIndexA, + collidableIndexB, + &hostBodyBuf[0], + &hostCollidables[0], + &hostConvexData[0], + hostVertices, + hostAabbsWorldSpace, + hostAabbsLocalSpace, + &cpuChildShapes[0], + &cpuCompoundPairsOut[0], + &numCompoundPairs, + compoundPairCapacity, + treeNodesCPU, + subTreesCPU, + bvhInfoCPU + ); + } + } + + + m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs,1,0,true); + if (numCompoundPairs) + { + b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0]; + m_gpuCompoundPairs.copyFromHostPointer(ptr,numCompoundPairs,0,true); + } + //cpuCompoundPairsOut + + } + if (numCompoundPairs) + { + printf("numCompoundPairs=%d\n",numCompoundPairs); + } + + if (numCompoundPairs > compoundPairCapacity) + { + b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); + numCompoundPairs = compoundPairCapacity; + } + + + + m_gpuCompoundPairs.resize(numCompoundPairs); + m_gpuHasCompoundSepNormals.resize(numCompoundPairs); + m_gpuCompoundSepNormals.resize(numCompoundPairs); + + + if (numCompoundPairs) + { + B3_PROFILE("processCompoundPairsPrimitivesKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numCompoundPairs ); + launcher.setConst(maxContactCapacity); + + int num = numCompoundPairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); + if (nContacts>maxContactCapacity) + { + + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + } + + + if (numCompoundPairs) + { + B3_PROFILE("processCompoundPairsKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL()), + b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numCompoundPairs ); + + int num = numCompoundPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } + + + //printf("numConcave = %d\n",numConcave); + + + +// printf("hostNormals.size()=%d\n",hostNormals.size()); + //int numPairs = pairCount.at(0); + + + + } + int vertexFaceCapacity = 64; + + + + { + //now perform the tree query on GPU + + + + + if (treeNodesGPU->size() && treeNodesGPU->size()) + { + if (bvhTraversalKernelGPU) + { + + B3_PROFILE("m_bvhTraversalKernel"); + + + numConcavePairs = m_numConcavePairsOut.at(0); + + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); + launcher.setBuffer( pairs->getBufferCL()); + launcher.setBuffer( bodyBuf->getBufferCL()); + launcher.setBuffer( gpuCollidables.getBufferCL()); + launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); + launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); + launcher.setBuffer( subTreesGPU->getBufferCL()); + launcher.setBuffer( treeNodesGPU->getBufferCL()); + launcher.setBuffer( bvhInfo->getBufferCL()); + + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + numConcavePairs = m_numConcavePairsOut.at(0); + } else + { + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + //int maxTriConvexPairCapacity, + b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; + triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + + //int numTriConvexPairsOutHost=0; + numConcavePairs = 0; + //m_numConcavePairsOut + + b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + //compute it... + + volatile int hostNumConcavePairsOut=0; + + // + for (int i=0;i<nPairs;i++) + { + b3BvhTraversal( &hostPairs.at(0), + &hostBodyBuf.at(0), + &hostCollidables.at(0), + &hostAabbsWorldSpace.at(0), + &triangleConvexPairsOutHost.at(0), + &hostNumConcavePairsOut, + &subTreesCPU.at(0), + &treeNodesCPU.at(0), + &bvhInfoCPU.at(0), + nPairs, + maxTriConvexPairCapacity, + i); + } + numConcavePairs = hostNumConcavePairsOut; + + if (hostNumConcavePairsOut) + { + triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); + triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); + } + // + + m_numConcavePairsOut.resize(0); + m_numConcavePairsOut.push_back(numConcavePairs); + } + + //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); + + if (numConcavePairs > maxTriConvexPairCapacity) + { + static int exceeded_maxTriConvexPairCapacity_count = 0; + b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", + numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); + numConcavePairs = maxTriConvexPairCapacity; + } + triangleConvexPairsOut.resize(numConcavePairs); + + if (numConcavePairs) + { + + + + + clippingFacesOutGPU.resize(numConcavePairs); + worldNormalsAGPU.resize(numConcavePairs); + worldVertsA1GPU.resize(vertexFaceCapacity*(numConcavePairs)); + worldVertsB1GPU.resize(vertexFaceCapacity*(numConcavePairs)); + + + if (findConcaveSeparatingAxisKernelGPU) + { + + /* + m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + */ + + //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) + if (splitSearchSepAxisConcave) + { + //printf("numConcavePairs = %d\n",numConcavePairs); + m_dmins.resize(numConcavePairs); + { + B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel,"m_findConcaveSeparatingAxisVertexFaceKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + + + } +// numConcavePairs = 0; + if (1) + { + B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel,"m_findConcaveSeparatingAxisEdgeEdgeKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + // numConcavePairs = 0; + + + + + + + } else + { + B3_PROFILE("findConcaveSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + } else + { + + b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; + b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; + + b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray<b3Vector3> hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + + b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); + + b3GpuChildShape* childShapePointerCPU = 0; + if (cpuChildShapes.size()) + childShapePointerCPU = &cpuChildShapes.at(0); + + clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + worldNormalsACPU.resize(worldNormalsAGPU.size()); + worldVertsB1CPU.resize(worldVertsB1GPU.size()); + + for (int i=0;i<numConcavePairs;i++) + { + b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), + &hostBodyBuf.at(0), + &hostCollidables.at(0), + &hostConvexData.at(0), &hostVertices.at(0),&hostUniqueEdges.at(0), + &hostFaces.at(0),&hostIndices.at(0),childShapePointerCPU, + &hostAabbsWorldSpace.at(0), + &concaveSepNormalsHost.at(0), + &clippingFacesOutCPU.at(0), + &worldVertsA1CPU.at(0), + &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + &concaveHasSeparatingNormalsCPU.at(0), + vertexFaceCapacity, + numConcavePairs,i); + }; + + m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); + m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + + + + } +// b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; +// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); +// b3AlignedObjectArray<b3Int4> cpuConcavePairs; +// triangleConvexPairsOut.copyToHost(cpuConcavePairs); + + + } + } + + + } + + if (numConcavePairs) + { + if (numConcavePairs) + { + B3_PROFILE("findConcaveSphereContactsKernel"); + nContacts = m_totalContactsOut.at(0); +// printf("nContacts1 = %d\n",nContacts); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + + launcher.setConst( numConcavePairs ); + launcher.setConst(maxContactCapacity); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); + + //printf("nContacts2 = %d\n",nContacts); + + if (nContacts >= maxContactCapacity) + { + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + } + + } + + + +#ifdef __APPLE__ + bool contactClippingOnGpu = true; +#else + bool contactClippingOnGpu = true; +#endif + + if (contactClippingOnGpu) + { + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); +// printf("nContacts3 = %d\n",nContacts); + + + //B3_PROFILE("clipHullHullKernel"); + + bool breakupConcaveConvexKernel = true; + +#ifdef __APPLE__ + //actually, some Apple OpenCL platform/device combinations work fine... + breakupConcaveConvexKernel = true; +#endif + //concave-convex contact clipping + if (numConcavePairs) + { + // printf("numConcavePairs = %d\n", numConcavePairs); + // nContacts = m_totalContactsOut.at(0); + // printf("nContacts before = %d\n", nContacts); + + if (breakupConcaveConvexKernel) + { + + worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); + + + //clipFacesAndFindContacts + + if (clipConcaveFacesAndFindContactsCPU) + { + + b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; + b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + worldVertsA1GPU.copyToHost(worldVertsA1CPU); + worldNormalsAGPU.copyToHost(worldNormalsACPU); + worldVertsB1GPU.copyToHost(worldVertsB1CPU); + + + + b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; + m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); + + b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + worldVertsB2CPU.resize(worldVertsB2GPU.size()); + + + for (int i=0;i<numConcavePairs;i++) + { + + clipFacesAndFindContactsKernel( &concaveSepNormalsHost.at(0), + &concaveHasSeparatingNormalsCPU.at(0), + &clippingFacesOutCPU.at(0), + &worldVertsA1CPU.at(0), + &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + &worldVertsB2CPU.at(0), + vertexFaceCapacity, + i); + } + + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsB2GPU.copyFromHost(worldVertsB2CPU); + + + } else + { + + if (1) + { + + + + B3_PROFILE("clipFacesAndFindContacts"); + //nContacts = m_totalContactsOut.at(0); + //int h = m_hasSeparatingNormals.at(0); + //int4 p = clippingFacesOutGPU.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + + launcher.setConst( numConcavePairs ); + int debugMode = 0; + launcher.setConst( debugMode); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + //int bla = m_totalContactsOut.at(0); + } + } + //contactReduction + { + int newContactCapacity=nContacts+numConcavePairs; + contactOut->reserve(newContactCapacity); + if (reduceConcaveContactsOnGPU) + { +// printf("newReservation = %d\n",newReservation); + { + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(newContactCapacity); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + + //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); + }else + { + + volatile int nGlobalContactsOut = nContacts; + b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; + m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); + + b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + + + b3AlignedObjectArray<b3Contact4> hostContacts; + if (nContacts) + { + contactOut->copyToHost(hostContacts); + } + hostContacts.resize(newContactCapacity); + + b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + worldVertsB2GPU.copyToHost(worldVertsB2CPU); + + + + for (int i=0;i<numConcavePairs;i++) + { + b3NewContactReductionKernel( &triangleConvexPairsOutHost.at(0), + &hostBodyBuf.at(0), + &concaveSepNormalsHost.at(0), + &concaveHasSeparatingNormalsCPU.at(0), + &hostContacts.at(0), + &clippingFacesOutCPU.at(0), + &worldVertsB2CPU.at(0), + &nGlobalContactsOut, + vertexFaceCapacity, + newContactCapacity, + numConcavePairs, + i + ); + + } + + + nContacts = nGlobalContactsOut; + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); +// nContacts = m_totalContactsOut.at(0); + //contactOut->resize(nContacts); + hostContacts.resize(nContacts); + //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); + contactOut->copyFromHost(hostContacts); + } + + } + //re-use? + + + } else + { + B3_PROFILE("clipHullHullConcaveConvexKernel"); + nContacts = m_totalContactsOut.at(0); + int newContactCapacity = contactOut->capacity(); + + //printf("contactOut5 = %d\n",nContacts); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(newContactCapacity); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + //printf("contactOut6 = %d\n",nContacts); + b3AlignedObjectArray<b3Contact4> cpuContacts; + contactOut->copyToHost(cpuContacts); + } + // printf("nContacts after = %d\n", nContacts); + }//numConcavePairs + + + + //convex-convex contact clipping + + bool breakupKernel = false; + +#ifdef __APPLE__ + breakupKernel = true; +#endif + +#ifdef CHECK_ON_HOST + bool computeConvexConvex = false; +#else + bool computeConvexConvex = true; +#endif//CHECK_ON_HOST + if (computeConvexConvex) + { + B3_PROFILE("clipHullHullKernel"); + if (breakupKernel) + { + + + + + worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); + clippingFacesOutGPU.resize(nPairs); + worldNormalsAGPU.resize(nPairs); + worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); + worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); + + if (findConvexClippingFacesGPU) + { + B3_PROFILE("findClippingFacesKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( vertexFaceCapacity); + launcher.setConst( nPairs ); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } else + { + + float minDist = -1e30f; + float maxDist = 0.02f; + + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray<int> hostHasSepNormals; + m_hasSeparatingNormals.copyToHost(hostHasSepNormals); + b3AlignedObjectArray<b3Vector3> cpuSepNormals; + m_sepNormals.copyToHost(cpuSepNormals); + + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + + //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + worldVertsB1GPU.copyToHost(worldVertsB1CPU); + + b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + worldNormalsACPU.resize(nPairs); + + b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + + + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); + + + for (int i=0;i<nPairs;i++) + { + + int bodyIndexA = hostPairs[i].x; + int bodyIndexB = hostPairs[i].y; + + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + + int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; + + + if (hostHasSepNormals[i]) + { + b3FindClippingFaces(cpuSepNormals[i], + &hostConvexData[shapeIndexA], + &hostConvexData[shapeIndexB], + hostBodyBuf[bodyIndexA].m_pos,hostBodyBuf[bodyIndexA].m_quat, + hostBodyBuf[bodyIndexB].m_pos,hostBodyBuf[bodyIndexB].m_quat, + &worldVertsA1CPU.at(0),&worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + vertexFaceCapacity,minDist,maxDist, + &hostVertices.at(0),&hostFaces.at(0), + &hostIndices.at(0), + &hostVertices.at(0),&hostFaces.at(0), + &hostIndices.at(0),&clippingFacesOutCPU.at(0),i); + } + } + + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + + } + + + + + + ///clip face B against face A, reduce contacts and append them to a global contact array + if (1) + { + if (clipConvexFacesAndFindContactsCPU) + { + + //b3AlignedObjectArray<b3Int4> hostPairs; + //pairs->copyToHost(hostPairs); + + b3AlignedObjectArray<b3Vector3> hostSepNormals; + m_sepNormals.copyToHost(hostSepNormals); + b3AlignedObjectArray<int> hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + + b3AlignedObjectArray<b3Int4> hostClippingFaces; + clippingFacesOutGPU.copyToHost(hostClippingFaces); + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + worldVertsB2CPU.resize(vertexFaceCapacity*nPairs); + + b3AlignedObjectArray<b3Vector3>worldVertsA1CPU; + worldVertsA1GPU.copyToHost(worldVertsA1CPU); + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + worldNormalsAGPU.copyToHost(worldNormalsACPU); + + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + worldVertsB1GPU.copyToHost(worldVertsB1CPU); + + /* + __global const b3Float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global b3Int4* clippingFacesOut, + __global b3Float4* worldVertsA1, + __global b3Float4* worldNormalsA1, + __global b3Float4* worldVertsB1, + __global b3Float4* worldVertsB2, + int vertexFaceCapacity, + int pairIndex + */ + for (int i=0;i<nPairs;i++) + { + clipFacesAndFindContactsKernel( + &hostSepNormals.at(0), + &hostHasSepAxis.at(0), + &hostClippingFaces.at(0), + &worldVertsA1CPU.at(0), + &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + &worldVertsB2CPU.at(0), + + vertexFaceCapacity, + i); + } + + clippingFacesOutGPU.copyFromHost(hostClippingFaces); + worldVertsB2GPU.copyFromHost(worldVertsB2CPU); + + } else + { + B3_PROFILE("clipFacesAndFindContacts"); + //nContacts = m_totalContactsOut.at(0); + //int h = m_hasSeparatingNormals.at(0); + //int4 p = clippingFacesOutGPU.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + + launcher.setConst( nPairs ); + int debugMode = 0; + launcher.setConst( debugMode); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + { + nContacts = m_totalContactsOut.at(0); + //printf("nContacts = %d\n",nContacts); + + int newContactCapacity = nContacts+nPairs; + contactOut->reserve(newContactCapacity); + + if (reduceConvexContactsOnGPU) + { + { + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(newContactCapacity); + launcher.setConst( nPairs ); + int num = nPairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + } else + { + + volatile int nGlobalContactsOut = nContacts; + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3Vector3> hostSepNormals; + m_sepNormals.copyToHost(hostSepNormals); + b3AlignedObjectArray<int> hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + b3AlignedObjectArray<b3Contact4> hostContactsOut; + contactOut->copyToHost(hostContactsOut); + hostContactsOut.resize(newContactCapacity); + + b3AlignedObjectArray<b3Int4> hostClippingFaces; + clippingFacesOutGPU.copyToHost(hostClippingFaces); + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + worldVertsB2GPU.copyToHost(worldVertsB2CPU); + + for (int i=0;i<nPairs;i++) + { + b3NewContactReductionKernel(&hostPairs.at(0), + &hostBodyBuf.at(0), + &hostSepNormals.at(0), + &hostHasSepAxis.at(0), + &hostContactsOut.at(0), + &hostClippingFaces.at(0), + &worldVertsB2CPU.at(0), + &nGlobalContactsOut, + vertexFaceCapacity, + newContactCapacity, + nPairs, + i); + } + + nContacts = nGlobalContactsOut; + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + hostContactsOut.resize(nContacts); + //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); + contactOut->copyFromHost(hostContactsOut); + } + // b3Contact4 pt = contactOut->at(0); + // printf("nContacts = %d\n",nContacts); + } + } + } + else//breakupKernel + { + + if (nPairs) + { + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst(maxContactCapacity); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + if (nContacts >= maxContactCapacity) + { + b3Error("Exceeded contact capacity (%d/%d)\n",nContacts,maxContactCapacity); + nContacts = maxContactCapacity; + } + contactOut->resize(nContacts); + } + } + + + int nCompoundsPairs = m_gpuCompoundPairs.size(); + + if (nCompoundsPairs) + { + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL(),true), + b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nCompoundsPairs ); + launcher.setConst(maxContactCapacity); + + int num = nCompoundsPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + if (nContacts>maxContactCapacity) + { + + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + contactOut->resize(nContacts); + }//if nCompoundsPairs + } + }//contactClippingOnGpu + + //printf("nContacts end = %d\n",nContacts); + + //printf("frameCount = %d\n",frameCount++); +} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h new file mode 100644 index 0000000000..e24c1579c6 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h @@ -0,0 +1,118 @@ + +#ifndef _CONVEX_HULL_CONTACT_H +#define _CONVEX_HULL_CONTACT_H + +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" +#include "Bullet3Common/b3AlignedObjectArray.h" + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" +#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" +#include "Bullet3Common/shared/b3Int2.h" +#include "Bullet3Common/shared/b3Int4.h" +#include "b3OptimizedBvh.h" +#include "b3BvhInfo.h" +#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" + +//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" + + + + +struct GpuSatCollision +{ + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_queue; + cl_kernel m_findSeparatingAxisKernel; + cl_kernel m_mprPenetrationKernel; + cl_kernel m_findSeparatingAxisUnitSphereKernel; + + + cl_kernel m_findSeparatingAxisVertexFaceKernel; + cl_kernel m_findSeparatingAxisEdgeEdgeKernel; + + cl_kernel m_findConcaveSeparatingAxisKernel; + cl_kernel m_findConcaveSeparatingAxisVertexFaceKernel; + cl_kernel m_findConcaveSeparatingAxisEdgeEdgeKernel; + + + + + cl_kernel m_findCompoundPairsKernel; + cl_kernel m_processCompoundPairsKernel; + + cl_kernel m_clipHullHullKernel; + cl_kernel m_clipCompoundsHullHullKernel; + + cl_kernel m_clipFacesAndFindContacts; + cl_kernel m_findClippingFacesKernel; + + cl_kernel m_clipHullHullConcaveConvexKernel; +// cl_kernel m_extractManifoldAndAddContactKernel; + cl_kernel m_newContactReductionKernel; + + cl_kernel m_bvhTraversalKernel; + cl_kernel m_primitiveContactsKernel; + cl_kernel m_findConcaveSphereContactsKernel; + + cl_kernel m_processCompoundPairsPrimitivesKernel; + + b3OpenCLArray<b3Vector3> m_unitSphereDirections; + + b3OpenCLArray<int> m_totalContactsOut; + + b3OpenCLArray<b3Vector3> m_sepNormals; + b3OpenCLArray<float> m_dmins; + + b3OpenCLArray<int> m_hasSeparatingNormals; + b3OpenCLArray<b3Vector3> m_concaveSepNormals; + b3OpenCLArray<int> m_concaveHasSeparatingNormals; + b3OpenCLArray<int> m_numConcavePairsOut; + b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs; + b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals; + b3OpenCLArray<int> m_gpuHasCompoundSepNormals; + b3OpenCLArray<int> m_numCompoundPairsOut; + + + GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ); + virtual ~GpuSatCollision(); + + + void computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, + const b3OpenCLArray<b3RigidBodyData>* bodyBuf, + b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, + const b3OpenCLArray<b3Contact4>* oldContacts, + int maxContactCapacity, + int compoundPairCapacity, + const b3OpenCLArray<b3ConvexPolyhedronData>& hostConvexData, + const b3OpenCLArray<b3Vector3>& vertices, + const b3OpenCLArray<b3Vector3>& uniqueEdges, + const b3OpenCLArray<b3GpuFace>& faces, + const b3OpenCLArray<int>& indices, + const b3OpenCLArray<b3Collidable>& gpuCollidables, + const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, + + const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, + const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, + + b3OpenCLArray<b3Vector3>& worldVertsB1GPU, + b3OpenCLArray<b3Int4>& clippingFacesOutGPU, + b3OpenCLArray<b3Vector3>& worldNormalsAGPU, + b3OpenCLArray<b3Vector3>& worldVertsA1GPU, + b3OpenCLArray<b3Vector3>& worldVertsB2GPU, + b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData, + b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, + b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, + b3OpenCLArray<b3BvhInfo>* bvhInfo, + int numObjects, + int maxTriConvexPairCapacity, + b3OpenCLArray<b3Int4>& triangleConvexPairs, + int& numTriConvexPairsOut + ); + + +}; + +#endif //_CONVEX_HULL_CONTACT_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h new file mode 100644 index 0000000000..337100fb1a --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h @@ -0,0 +1,9 @@ +#ifndef CONVEX_POLYHEDRON_CL +#define CONVEX_POLYHEDRON_CL + +#include "Bullet3Common/b3Transform.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" + + + +#endif //CONVEX_POLYHEDRON_CL diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp new file mode 100644 index 0000000000..d636f983c6 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp @@ -0,0 +1,1014 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the +use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software in a +product, an acknowledgment in the product documentation would be appreciated +but is not required. +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +/* +GJK-EPA collision solver by Nathanael Presson, 2008 +*/ + +#include "b3GjkEpa.h" + +#include "b3SupportMappings.h" + +namespace gjkepa2_impl2 +{ + + // Config + + /* GJK */ +#define GJK_MAX_ITERATIONS 128 +#define GJK_ACCURACY ((b3Scalar)0.0001) +#define GJK_MIN_DISTANCE ((b3Scalar)0.0001) +#define GJK_DUPLICATED_EPS ((b3Scalar)0.0001) +#define GJK_SIMPLEX2_EPS ((b3Scalar)0.0) +#define GJK_SIMPLEX3_EPS ((b3Scalar)0.0) +#define GJK_SIMPLEX4_EPS ((b3Scalar)0.0) + + /* EPA */ +#define EPA_MAX_VERTICES 64 +#define EPA_MAX_FACES (EPA_MAX_VERTICES*2) +#define EPA_MAX_ITERATIONS 255 +#define EPA_ACCURACY ((b3Scalar)0.0001) +#define EPA_FALLBACK (10*EPA_ACCURACY) +#define EPA_PLANE_EPS ((b3Scalar)0.00001) +#define EPA_INSIDE_EPS ((b3Scalar)0.01) + + + // Shorthands + + + // MinkowskiDiff + struct b3MinkowskiDiff + { + + + const b3ConvexPolyhedronData* m_shapes[2]; + + + b3Matrix3x3 m_toshape1; + b3Transform m_toshape0; + + bool m_enableMargin; + + + void EnableMargin(bool enable) + { + m_enableMargin = enable; + } + inline b3Vector3 Support0(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA) const + { + if (m_enableMargin) + { + return localGetSupportVertexWithMargin(d,m_shapes[0],verticesA,0.f); + } else + { + return localGetSupportVertexWithoutMargin(d,m_shapes[0],verticesA); + } + } + inline b3Vector3 Support1(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesB) const + { + if (m_enableMargin) + { + return m_toshape0*(localGetSupportVertexWithMargin(m_toshape1*d,m_shapes[1],verticesB,0.f)); + } else + { + return m_toshape0*(localGetSupportVertexWithoutMargin(m_toshape1*d,m_shapes[1],verticesB)); + } + } + + inline b3Vector3 Support(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const + { + return(Support0(d,verticesA)-Support1(-d,verticesB)); + } + b3Vector3 Support(const b3Vector3& d,unsigned int index,const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const + { + if(index) + return(Support1(d,verticesA)); + else + return(Support0(d,verticesB)); + } + }; + + typedef b3MinkowskiDiff tShape; + + + // GJK + struct b3GJK + { + /* Types */ + struct sSV + { + b3Vector3 d,w; + }; + struct sSimplex + { + sSV* c[4]; + b3Scalar p[4]; + unsigned int rank; + }; + struct eStatus { enum _ { + Valid, + Inside, + Failed };}; + /* Fields */ + tShape m_shape; + const b3AlignedObjectArray<b3Vector3>& m_verticesA; + const b3AlignedObjectArray<b3Vector3>& m_verticesB; + b3Vector3 m_ray; + b3Scalar m_distance; + sSimplex m_simplices[2]; + sSV m_store[4]; + sSV* m_free[4]; + unsigned int m_nfree; + unsigned int m_current; + sSimplex* m_simplex; + eStatus::_ m_status; + /* Methods */ + b3GJK(const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB) + :m_verticesA(verticesA),m_verticesB(verticesB) + { + Initialize(); + } + void Initialize() + { + m_ray = b3MakeVector3(0,0,0); + m_nfree = 0; + m_status = eStatus::Failed; + m_current = 0; + m_distance = 0; + } + eStatus::_ Evaluate(const tShape& shapearg,const b3Vector3& guess) + { + unsigned int iterations=0; + b3Scalar sqdist=0; + b3Scalar alpha=0; + b3Vector3 lastw[4]; + unsigned int clastw=0; + /* Initialize solver */ + m_free[0] = &m_store[0]; + m_free[1] = &m_store[1]; + m_free[2] = &m_store[2]; + m_free[3] = &m_store[3]; + m_nfree = 4; + m_current = 0; + m_status = eStatus::Valid; + m_shape = shapearg; + m_distance = 0; + /* Initialize simplex */ + m_simplices[0].rank = 0; + m_ray = guess; + const b3Scalar sqrl= m_ray.length2(); + appendvertice(m_simplices[0],sqrl>0?-m_ray:b3MakeVector3(1,0,0)); + m_simplices[0].p[0] = 1; + m_ray = m_simplices[0].c[0]->w; + sqdist = sqrl; + lastw[0] = + lastw[1] = + lastw[2] = + lastw[3] = m_ray; + /* Loop */ + do { + const unsigned int next=1-m_current; + sSimplex& cs=m_simplices[m_current]; + sSimplex& ns=m_simplices[next]; + /* Check zero */ + const b3Scalar rl=m_ray.length(); + if(rl<GJK_MIN_DISTANCE) + {/* Touching or inside */ + m_status=eStatus::Inside; + break; + } + /* Append new vertice in -'v' direction */ + appendvertice(cs,-m_ray); + const b3Vector3& w=cs.c[cs.rank-1]->w; + bool found=false; + for(unsigned int i=0;i<4;++i) + { + if((w-lastw[i]).length2()<GJK_DUPLICATED_EPS) + { found=true;break; } + } + if(found) + {/* Return old simplex */ + removevertice(m_simplices[m_current]); + break; + } + else + {/* Update lastw */ + lastw[clastw=(clastw+1)&3]=w; + } + /* Check for termination */ + const b3Scalar omega=b3Dot(m_ray,w)/rl; + alpha=b3Max(omega,alpha); + if(((rl-alpha)-(GJK_ACCURACY*rl))<=0) + {/* Return old simplex */ + removevertice(m_simplices[m_current]); + break; + } + /* Reduce simplex */ + b3Scalar weights[4]; + unsigned int mask=0; + switch(cs.rank) + { + case 2: sqdist=projectorigin( cs.c[0]->w, + cs.c[1]->w, + weights,mask);break; + case 3: sqdist=projectorigin( cs.c[0]->w, + cs.c[1]->w, + cs.c[2]->w, + weights,mask);break; + case 4: sqdist=projectorigin( cs.c[0]->w, + cs.c[1]->w, + cs.c[2]->w, + cs.c[3]->w, + weights,mask);break; + } + if(sqdist>=0) + {/* Valid */ + ns.rank = 0; + m_ray = b3MakeVector3(0,0,0); + m_current = next; + for(unsigned int i=0,ni=cs.rank;i<ni;++i) + { + if(mask&(1<<i)) + { + ns.c[ns.rank] = cs.c[i]; + ns.p[ns.rank++] = weights[i]; + m_ray += cs.c[i]->w*weights[i]; + } + else + { + m_free[m_nfree++] = cs.c[i]; + } + } + if(mask==15) m_status=eStatus::Inside; + } + else + {/* Return old simplex */ + removevertice(m_simplices[m_current]); + break; + } + m_status=((++iterations)<GJK_MAX_ITERATIONS)?m_status:eStatus::Failed; + } while(m_status==eStatus::Valid); + m_simplex=&m_simplices[m_current]; + switch(m_status) + { + case eStatus::Valid: m_distance=m_ray.length();break; + case eStatus::Inside: m_distance=0;break; + default: + { + } + } + return(m_status); + } + bool EncloseOrigin() + { + switch(m_simplex->rank) + { + case 1: + { + for(unsigned int i=0;i<3;++i) + { + b3Vector3 axis=b3MakeVector3(0,0,0); + axis[i]=1; + appendvertice(*m_simplex, axis); + if(EncloseOrigin()) return(true); + removevertice(*m_simplex); + appendvertice(*m_simplex,-axis); + if(EncloseOrigin()) return(true); + removevertice(*m_simplex); + } + } + break; + case 2: + { + const b3Vector3 d=m_simplex->c[1]->w-m_simplex->c[0]->w; + for(unsigned int i=0;i<3;++i) + { + b3Vector3 axis=b3MakeVector3(0,0,0); + axis[i]=1; + const b3Vector3 p=b3Cross(d,axis); + if(p.length2()>0) + { + appendvertice(*m_simplex, p); + if(EncloseOrigin()) return(true); + removevertice(*m_simplex); + appendvertice(*m_simplex,-p); + if(EncloseOrigin()) return(true); + removevertice(*m_simplex); + } + } + } + break; + case 3: + { + const b3Vector3 n=b3Cross(m_simplex->c[1]->w-m_simplex->c[0]->w, + m_simplex->c[2]->w-m_simplex->c[0]->w); + if(n.length2()>0) + { + appendvertice(*m_simplex,n); + if(EncloseOrigin()) return(true); + removevertice(*m_simplex); + appendvertice(*m_simplex,-n); + if(EncloseOrigin()) return(true); + removevertice(*m_simplex); + } + } + break; + case 4: + { + if(b3Fabs(det( m_simplex->c[0]->w-m_simplex->c[3]->w, + m_simplex->c[1]->w-m_simplex->c[3]->w, + m_simplex->c[2]->w-m_simplex->c[3]->w))>0) + return(true); + } + break; + } + return(false); + } + /* Internals */ + void getsupport(const b3Vector3& d,sSV& sv) const + { + sv.d = d/d.length(); + sv.w = m_shape.Support(sv.d,m_verticesA,m_verticesB); + } + void removevertice(sSimplex& simplex) + { + m_free[m_nfree++]=simplex.c[--simplex.rank]; + } + void appendvertice(sSimplex& simplex,const b3Vector3& v) + { + simplex.p[simplex.rank]=0; + simplex.c[simplex.rank]=m_free[--m_nfree]; + getsupport(v,*simplex.c[simplex.rank++]); + } + static b3Scalar det(const b3Vector3& a,const b3Vector3& b,const b3Vector3& c) + { + return( a.y*b.z*c.x+a.z*b.x*c.y- + a.x*b.z*c.y-a.y*b.x*c.z+ + a.x*b.y*c.z-a.z*b.y*c.x); + } + static b3Scalar projectorigin( const b3Vector3& a, + const b3Vector3& b, + b3Scalar* w,unsigned int& m) + { + const b3Vector3 d=b-a; + const b3Scalar l=d.length2(); + if(l>GJK_SIMPLEX2_EPS) + { + const b3Scalar t(l>0?-b3Dot(a,d)/l:0); + if(t>=1) { w[0]=0;w[1]=1;m=2;return(b.length2()); } + else if(t<=0) { w[0]=1;w[1]=0;m=1;return(a.length2()); } + else { w[0]=1-(w[1]=t);m=3;return((a+d*t).length2()); } + } + return(-1); + } + static b3Scalar projectorigin( const b3Vector3& a, + const b3Vector3& b, + const b3Vector3& c, + b3Scalar* w,unsigned int& m) + { + static const unsigned int imd3[]={1,2,0}; + const b3Vector3* vt[]={&a,&b,&c}; + const b3Vector3 dl[]={a-b,b-c,c-a}; + const b3Vector3 n=b3Cross(dl[0],dl[1]); + const b3Scalar l=n.length2(); + if(l>GJK_SIMPLEX3_EPS) + { + b3Scalar mindist=-1; + b3Scalar subw[2]={0.f,0.f}; + unsigned int subm(0); + for(unsigned int i=0;i<3;++i) + { + if(b3Dot(*vt[i],b3Cross(dl[i],n))>0) + { + const unsigned int j=imd3[i]; + const b3Scalar subd(projectorigin(*vt[i],*vt[j],subw,subm)); + if((mindist<0)||(subd<mindist)) + { + mindist = subd; + m = static_cast<unsigned int>(((subm&1)?1<<i:0)+((subm&2)?1<<j:0)); + w[i] = subw[0]; + w[j] = subw[1]; + w[imd3[j]] = 0; + } + } + } + if(mindist<0) + { + const b3Scalar d=b3Dot(a,n); + const b3Scalar s=b3Sqrt(l); + const b3Vector3 p=n*(d/l); + mindist = p.length2(); + m = 7; + w[0] = (b3Cross(dl[1],b-p)).length()/s; + w[1] = (b3Cross(dl[2],c-p)).length()/s; + w[2] = 1-(w[0]+w[1]); + } + return(mindist); + } + return(-1); + } + static b3Scalar projectorigin( const b3Vector3& a, + const b3Vector3& b, + const b3Vector3& c, + const b3Vector3& d, + b3Scalar* w,unsigned int& m) + { + static const unsigned int imd3[]={1,2,0}; + const b3Vector3* vt[]={&a,&b,&c,&d}; + const b3Vector3 dl[]={a-d,b-d,c-d}; + const b3Scalar vl=det(dl[0],dl[1],dl[2]); + const bool ng=(vl*b3Dot(a,b3Cross(b-c,a-b)))<=0; + if(ng&&(b3Fabs(vl)>GJK_SIMPLEX4_EPS)) + { + b3Scalar mindist=-1; + b3Scalar subw[3]={0.f,0.f,0.f}; + unsigned int subm(0); + for(unsigned int i=0;i<3;++i) + { + const unsigned int j=imd3[i]; + const b3Scalar s=vl*b3Dot(d,b3Cross(dl[i],dl[j])); + if(s>0) + { + const b3Scalar subd=projectorigin(*vt[i],*vt[j],d,subw,subm); + if((mindist<0)||(subd<mindist)) + { + mindist = subd; + m = static_cast<unsigned int>((subm&1?1<<i:0)+ + (subm&2?1<<j:0)+ + (subm&4?8:0)); + w[i] = subw[0]; + w[j] = subw[1]; + w[imd3[j]] = 0; + w[3] = subw[2]; + } + } + } + if(mindist<0) + { + mindist = 0; + m = 15; + w[0] = det(c,b,d)/vl; + w[1] = det(a,c,d)/vl; + w[2] = det(b,a,d)/vl; + w[3] = 1-(w[0]+w[1]+w[2]); + } + return(mindist); + } + return(-1); + } + }; + + // EPA + struct b3EPA + { + /* Types */ + typedef b3GJK::sSV sSV; + struct sFace + { + b3Vector3 n; + b3Scalar d; + sSV* c[3]; + sFace* f[3]; + sFace* l[2]; + unsigned char e[3]; + unsigned char pass; + }; + struct sList + { + sFace* root; + unsigned int count; + sList() : root(0),count(0) {} + }; + struct sHorizon + { + sFace* cf; + sFace* ff; + unsigned int nf; + sHorizon() : cf(0),ff(0),nf(0) {} + }; + struct eStatus { enum _ { + Valid, + Touching, + Degenerated, + NonConvex, + InvalidHull, + OutOfFaces, + OutOfVertices, + AccuraryReached, + FallBack, + Failed };}; + /* Fields */ + eStatus::_ m_status; + b3GJK::sSimplex m_result; + b3Vector3 m_normal; + b3Scalar m_depth; + sSV m_sv_store[EPA_MAX_VERTICES]; + sFace m_fc_store[EPA_MAX_FACES]; + unsigned int m_nextsv; + sList m_hull; + sList m_stock; + /* Methods */ + b3EPA() + { + Initialize(); + } + + + static inline void bind(sFace* fa,unsigned int ea,sFace* fb,unsigned int eb) + { + fa->e[ea]=(unsigned char)eb;fa->f[ea]=fb; + fb->e[eb]=(unsigned char)ea;fb->f[eb]=fa; + } + static inline void append(sList& list,sFace* face) + { + face->l[0] = 0; + face->l[1] = list.root; + if(list.root) list.root->l[0]=face; + list.root = face; + ++list.count; + } + static inline void remove(sList& list,sFace* face) + { + if(face->l[1]) face->l[1]->l[0]=face->l[0]; + if(face->l[0]) face->l[0]->l[1]=face->l[1]; + if(face==list.root) list.root=face->l[1]; + --list.count; + } + + + void Initialize() + { + m_status = eStatus::Failed; + m_normal = b3MakeVector3(0,0,0); + m_depth = 0; + m_nextsv = 0; + for(unsigned int i=0;i<EPA_MAX_FACES;++i) + { + append(m_stock,&m_fc_store[EPA_MAX_FACES-i-1]); + } + } + eStatus::_ Evaluate(b3GJK& gjk,const b3Vector3& guess) + { + b3GJK::sSimplex& simplex=*gjk.m_simplex; + if((simplex.rank>1)&&gjk.EncloseOrigin()) + { + + /* Clean up */ + while(m_hull.root) + { + sFace* f = m_hull.root; + remove(m_hull,f); + append(m_stock,f); + } + m_status = eStatus::Valid; + m_nextsv = 0; + /* Orient simplex */ + if(gjk.det( simplex.c[0]->w-simplex.c[3]->w, + simplex.c[1]->w-simplex.c[3]->w, + simplex.c[2]->w-simplex.c[3]->w)<0) + { + b3Swap(simplex.c[0],simplex.c[1]); + b3Swap(simplex.p[0],simplex.p[1]); + } + /* Build initial hull */ + sFace* tetra[]={newface(simplex.c[0],simplex.c[1],simplex.c[2],true), + newface(simplex.c[1],simplex.c[0],simplex.c[3],true), + newface(simplex.c[2],simplex.c[1],simplex.c[3],true), + newface(simplex.c[0],simplex.c[2],simplex.c[3],true)}; + if(m_hull.count==4) + { + sFace* best=findbest(); + sFace outer=*best; + unsigned int pass=0; + unsigned int iterations=0; + bind(tetra[0],0,tetra[1],0); + bind(tetra[0],1,tetra[2],0); + bind(tetra[0],2,tetra[3],0); + bind(tetra[1],1,tetra[3],2); + bind(tetra[1],2,tetra[2],1); + bind(tetra[2],2,tetra[3],1); + m_status=eStatus::Valid; + for(;iterations<EPA_MAX_ITERATIONS;++iterations) + { + if(m_nextsv<EPA_MAX_VERTICES) + { + sHorizon horizon; + sSV* w=&m_sv_store[m_nextsv++]; + bool valid=true; + best->pass = (unsigned char)(++pass); + gjk.getsupport(best->n,*w); + const b3Scalar wdist=b3Dot(best->n,w->w)-best->d; + if(wdist>EPA_ACCURACY) + { + for(unsigned int j=0;(j<3)&&valid;++j) + { + valid&=expand( pass,w, + best->f[j],best->e[j], + horizon); + } + if(valid&&(horizon.nf>=3)) + { + bind(horizon.cf,1,horizon.ff,2); + remove(m_hull,best); + append(m_stock,best); + best=findbest(); + outer=*best; + } else { + m_status=eStatus::Failed; + //m_status=eStatus::InvalidHull; + break; } + } else { m_status=eStatus::AccuraryReached;break; } + } else { m_status=eStatus::OutOfVertices;break; } + } + const b3Vector3 projection=outer.n*outer.d; + m_normal = outer.n; + m_depth = outer.d; + m_result.rank = 3; + m_result.c[0] = outer.c[0]; + m_result.c[1] = outer.c[1]; + m_result.c[2] = outer.c[2]; + m_result.p[0] = b3Cross( outer.c[1]->w-projection, + outer.c[2]->w-projection).length(); + m_result.p[1] = b3Cross( outer.c[2]->w-projection, + outer.c[0]->w-projection).length(); + m_result.p[2] = b3Cross( outer.c[0]->w-projection, + outer.c[1]->w-projection).length(); + const b3Scalar sum=m_result.p[0]+m_result.p[1]+m_result.p[2]; + m_result.p[0] /= sum; + m_result.p[1] /= sum; + m_result.p[2] /= sum; + return(m_status); + } + } + /* Fallback */ + m_status = eStatus::FallBack; + m_normal = -guess; + const b3Scalar nl=m_normal.length(); + if(nl>0) + m_normal = m_normal/nl; + else + m_normal = b3MakeVector3(1,0,0); + m_depth = 0; + m_result.rank=1; + m_result.c[0]=simplex.c[0]; + m_result.p[0]=1; + return(m_status); + } + bool getedgedist(sFace* face, sSV* a, sSV* b, b3Scalar& dist) + { + const b3Vector3 ba = b->w - a->w; + const b3Vector3 n_ab = b3Cross(ba, face->n); // Outward facing edge normal direction, on triangle plane + const b3Scalar a_dot_nab = b3Dot(a->w, n_ab); // Only care about the sign to determine inside/outside, so not normalization required + + if(a_dot_nab < 0) + { + // Outside of edge a->b + + const b3Scalar ba_l2 = ba.length2(); + const b3Scalar a_dot_ba = b3Dot(a->w, ba); + const b3Scalar b_dot_ba = b3Dot(b->w, ba); + + if(a_dot_ba > 0) + { + // Pick distance vertex a + dist = a->w.length(); + } + else if(b_dot_ba < 0) + { + // Pick distance vertex b + dist = b->w.length(); + } + else + { + // Pick distance to edge a->b + const b3Scalar a_dot_b = b3Dot(a->w, b->w); + dist = b3Sqrt(b3Max((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (b3Scalar)0)); + } + + return true; + } + + return false; + } + sFace* newface(sSV* a,sSV* b,sSV* c,bool forced) + { + if(m_stock.root) + { + sFace* face=m_stock.root; + remove(m_stock,face); + append(m_hull,face); + face->pass = 0; + face->c[0] = a; + face->c[1] = b; + face->c[2] = c; + face->n = b3Cross(b->w-a->w,c->w-a->w); + const b3Scalar l=face->n.length(); + const bool v=l>EPA_ACCURACY; + + if(v) + { + if(!(getedgedist(face, a, b, face->d) || + getedgedist(face, b, c, face->d) || + getedgedist(face, c, a, face->d))) + { + // Origin projects to the interior of the triangle + // Use distance to triangle plane + face->d = b3Dot(a->w, face->n) / l; + } + + face->n /= l; + if(forced || (face->d >= -EPA_PLANE_EPS)) + { + return face; + } + else + m_status=eStatus::NonConvex; + } + else + m_status=eStatus::Degenerated; + + remove(m_hull, face); + append(m_stock, face); + return 0; + + } + m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces; + return 0; + } + sFace* findbest() + { + sFace* minf=m_hull.root; + b3Scalar mind=minf->d*minf->d; + for(sFace* f=minf->l[1];f;f=f->l[1]) + { + const b3Scalar sqd=f->d*f->d; + if(sqd<mind) + { + minf=f; + mind=sqd; + } + } + return(minf); + } + bool expand(unsigned int pass,sSV* w,sFace* f,unsigned int e,sHorizon& horizon) + { + static const unsigned int i1m3[]={1,2,0}; + static const unsigned int i2m3[]={2,0,1}; + if(f->pass!=pass) + { + const unsigned int e1=i1m3[e]; + if((b3Dot(f->n,w->w)-f->d)<-EPA_PLANE_EPS) + { + sFace* nf=newface(f->c[e1],f->c[e],w,false); + if(nf) + { + bind(nf,0,f,e); + if(horizon.cf) bind(horizon.cf,1,nf,2); else horizon.ff=nf; + horizon.cf=nf; + ++horizon.nf; + return(true); + } + } + else + { + const unsigned int e2=i2m3[e]; + f->pass = (unsigned char)pass; + if( expand(pass,w,f->f[e1],f->e[e1],horizon)&& + expand(pass,w,f->f[e2],f->e[e2],horizon)) + { + remove(m_hull,f); + append(m_stock,f); + return(true); + } + } + } + return(false); + } + + }; + + // + static void Initialize(const b3Transform& transA, const b3Transform& transB, + const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + b3GjkEpaSolver2::sResults& results, + tShape& shape, + bool withmargins) + { + /* Results */ + results.witnesses[0] = + results.witnesses[1] = b3MakeVector3(0,0,0); + results.status = b3GjkEpaSolver2::sResults::Separated; + /* Shape */ + shape.m_shapes[0] = hullA; + shape.m_shapes[1] = hullB; + shape.m_toshape1 = transB.getBasis().transposeTimes(transA.getBasis()); + shape.m_toshape0 = transA.inverseTimes(transB); + shape.EnableMargin(withmargins); + } + +} + +// +// Api +// + +using namespace gjkepa2_impl2; + +// +int b3GjkEpaSolver2::StackSizeRequirement() +{ + return(sizeof(b3GJK)+sizeof(b3EPA)); +} + +// +bool b3GjkEpaSolver2::Distance( const b3Transform& transA, const b3Transform& transB, + const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3Vector3& guess, + sResults& results) +{ + tShape shape; + Initialize(transA,transB,hullA,hullB,verticesA,verticesB,results,shape,false); + b3GJK gjk(verticesA,verticesB); + b3GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,guess); + if(gjk_status==b3GJK::eStatus::Valid) + { + b3Vector3 w0=b3MakeVector3(0,0,0); + b3Vector3 w1=b3MakeVector3(0,0,0); + for(unsigned int i=0;i<gjk.m_simplex->rank;++i) + { + const b3Scalar p=gjk.m_simplex->p[i]; + w0+=shape.Support( gjk.m_simplex->c[i]->d,0,verticesA,verticesB)*p; + w1+=shape.Support(-gjk.m_simplex->c[i]->d,1,verticesA,verticesB)*p; + } + results.witnesses[0] = transA*w0; + results.witnesses[1] = transA*w1; + results.normal = w0-w1; + results.distance = results.normal.length(); + results.normal /= results.distance>GJK_MIN_DISTANCE?results.distance:1; + return(true); + } + else + { + results.status = gjk_status==b3GJK::eStatus::Inside? + sResults::Penetrating : + sResults::GJK_Failed ; + return(false); + } +} + +// +bool b3GjkEpaSolver2::Penetration( const b3Transform& transA, const b3Transform& transB, + const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3Vector3& guess, + sResults& results, + bool usemargins) +{ + + tShape shape; + Initialize(transA,transB,hullA,hullB,verticesA,verticesB,results,shape,usemargins); + b3GJK gjk(verticesA,verticesB); + b3GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,guess); + switch(gjk_status) + { + case b3GJK::eStatus::Inside: + { + b3EPA epa; + b3EPA::eStatus::_ epa_status=epa.Evaluate(gjk,-guess); + if(epa_status!=b3EPA::eStatus::Failed) + { + b3Vector3 w0=b3MakeVector3(0,0,0); + for(unsigned int i=0;i<epa.m_result.rank;++i) + { + w0+=shape.Support(epa.m_result.c[i]->d,0,verticesA,verticesB)*epa.m_result.p[i]; + } + results.status = sResults::Penetrating; + results.witnesses[0] = transA*w0; + results.witnesses[1] = transA*(w0-epa.m_normal*epa.m_depth); + results.normal = -epa.m_normal; + results.distance = -epa.m_depth; + return(true); + } else results.status=sResults::EPA_Failed; + } + break; + case b3GJK::eStatus::Failed: + results.status=sResults::GJK_Failed; + break; + default: + { + } + } + return(false); +} + + +#if 0 +// +b3Scalar b3GjkEpaSolver2::SignedDistance(const b3Vector3& position, + b3Scalar margin, + const b3Transform& transA, + const b3ConvexPolyhedronData& hullA, + const b3AlignedObjectArray<b3Vector3>& verticesA, + sResults& results) +{ + tShape shape; + btSphereShape shape1(margin); + b3Transform wtrs1(b3Quaternion(0,0,0,1),position); + Initialize(shape0,wtrs0,&shape1,wtrs1,results,shape,false); + GJK gjk; + GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,b3Vector3(1,1,1)); + if(gjk_status==GJK::eStatus::Valid) + { + b3Vector3 w0=b3Vector3(0,0,0); + b3Vector3 w1=b3Vector3(0,0,0); + for(unsigned int i=0;i<gjk.m_simplex->rank;++i) + { + const b3Scalar p=gjk.m_simplex->p[i]; + w0+=shape.Support( gjk.m_simplex->c[i]->d,0)*p; + w1+=shape.Support(-gjk.m_simplex->c[i]->d,1)*p; + } + results.witnesses[0] = wtrs0*w0; + results.witnesses[1] = wtrs0*w1; + const b3Vector3 delta= results.witnesses[1]- + results.witnesses[0]; + const b3Scalar margin= shape0->getMarginNonVirtual()+ + shape1.getMarginNonVirtual(); + const b3Scalar length= delta.length(); + results.normal = delta/length; + results.witnesses[0] += results.normal*margin; + return(length-margin); + } + else + { + if(gjk_status==GJK::eStatus::Inside) + { + if(Penetration(shape0,wtrs0,&shape1,wtrs1,gjk.m_ray,results)) + { + const b3Vector3 delta= results.witnesses[0]- + results.witnesses[1]; + const b3Scalar length= delta.length(); + if (length >= B3_EPSILON) + results.normal = delta/length; + return(-length); + } + } + } + return(B3_INFINITY); +} + +// +bool b3GjkEpaSolver2::SignedDistance(const btConvexShape* shape0, + const b3Transform& wtrs0, + const btConvexShape* shape1, + const b3Transform& wtrs1, + const b3Vector3& guess, + sResults& results) +{ + if(!Distance(shape0,wtrs0,shape1,wtrs1,guess,results)) + return(Penetration(shape0,wtrs0,shape1,wtrs1,guess,results,false)); + else + return(true); +} +#endif + + +/* Symbols cleanup */ + +#undef GJK_MAX_ITERATIONS +#undef GJK_ACCURACY +#undef GJK_MIN_DISTANCE +#undef GJK_DUPLICATED_EPS +#undef GJK_SIMPLEX2_EPS +#undef GJK_SIMPLEX3_EPS +#undef GJK_SIMPLEX4_EPS + +#undef EPA_MAX_VERTICES +#undef EPA_MAX_FACES +#undef EPA_MAX_ITERATIONS +#undef EPA_ACCURACY +#undef EPA_FALLBACK +#undef EPA_PLANE_EPS +#undef EPA_INSIDE_EPS diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h new file mode 100644 index 0000000000..976238a04c --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h @@ -0,0 +1,82 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the +use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not +claim that you wrote the original software. If you use this software in a +product, an acknowledgment in the product documentation would be appreciated +but is not required. +2. Altered source versions must be plainly marked as such, and must not be +misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +/* +GJK-EPA collision solver by Nathanael Presson, 2008 +*/ +#ifndef B3_GJK_EPA2_H +#define B3_GJK_EPA2_H + +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "Bullet3Common/b3Transform.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" + + +///btGjkEpaSolver contributed under zlib by Nathanael Presson +struct b3GjkEpaSolver2 +{ +struct sResults + { + enum eStatus + { + Separated, /* Shapes doesnt penetrate */ + Penetrating, /* Shapes are penetrating */ + GJK_Failed, /* GJK phase fail, no big issue, shapes are probably just 'touching' */ + EPA_Failed /* EPA phase fail, bigger problem, need to save parameters, and debug */ + } status; + b3Vector3 witnesses[2]; + b3Vector3 normal; + b3Scalar distance; + }; + +static int StackSizeRequirement(); + +static bool Distance( const b3Transform& transA, const b3Transform& transB, + const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3Vector3& guess, + sResults& results); + +static bool Penetration( const b3Transform& transA, const b3Transform& transB, + const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3Vector3& guess, + sResults& results, + bool usemargins=true); +#if 0 +static b3Scalar SignedDistance( const b3Vector3& position, + b3Scalar margin, + const btConvexShape* shape, + const btTransform& wtrs, + sResults& results); + +static bool SignedDistance( const btConvexShape* shape0,const btTransform& wtrs0, + const btConvexShape* shape1,const btTransform& wtrs1, + const b3Vector3& guess, + sResults& results); +#endif + +}; + +#endif //B3_GJK_EPA2_H + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp new file mode 100644 index 0000000000..e9e51d5a36 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp @@ -0,0 +1,390 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + +#include "b3OptimizedBvh.h" +#include "b3StridingMeshInterface.h" +#include "Bullet3Geometry/b3AabbUtil.h" + + +b3OptimizedBvh::b3OptimizedBvh() +{ +} + +b3OptimizedBvh::~b3OptimizedBvh() +{ +} + + +void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) +{ + m_useQuantization = useQuantizedAabbCompression; + + + // NodeArray triangleNodes; + + struct NodeTriangleCallback : public b3InternalTriangleIndexCallback + { + + NodeArray& m_triangleNodes; + + NodeTriangleCallback& operator=(NodeTriangleCallback& other) + { + m_triangleNodes.copyFromArray(other.m_triangleNodes); + return *this; + } + + NodeTriangleCallback(NodeArray& triangleNodes) + :m_triangleNodes(triangleNodes) + { + } + + virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) + { + b3OptimizedBvhNode node; + b3Vector3 aabbMin,aabbMax; + aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + aabbMin.setMin(triangle[0]); + aabbMax.setMax(triangle[0]); + aabbMin.setMin(triangle[1]); + aabbMax.setMax(triangle[1]); + aabbMin.setMin(triangle[2]); + aabbMax.setMax(triangle[2]); + + //with quantization? + node.m_aabbMinOrg = aabbMin; + node.m_aabbMaxOrg = aabbMax; + + node.m_escapeIndex = -1; + + //for child nodes + node.m_subPart = partId; + node.m_triangleIndex = triangleIndex; + m_triangleNodes.push_back(node); + } + }; + struct QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback + { + QuantizedNodeArray& m_triangleNodes; + const b3QuantizedBvh* m_optimizedTree; // for quantization + + QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other) + { + m_triangleNodes.copyFromArray(other.m_triangleNodes); + m_optimizedTree = other.m_optimizedTree; + return *this; + } + + QuantizedNodeTriangleCallback(QuantizedNodeArray& triangleNodes,const b3QuantizedBvh* tree) + :m_triangleNodes(triangleNodes),m_optimizedTree(tree) + { + } + + virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) + { + // The partId and triangle index must fit in the same (positive) integer + b3Assert(partId < (1<<MAX_NUM_PARTS_IN_BITS)); + b3Assert(triangleIndex < (1<<(31-MAX_NUM_PARTS_IN_BITS))); + //negative indices are reserved for escapeIndex + b3Assert(triangleIndex>=0); + + b3QuantizedBvhNode node; + b3Vector3 aabbMin,aabbMax; + aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + aabbMin.setMin(triangle[0]); + aabbMax.setMax(triangle[0]); + aabbMin.setMin(triangle[1]); + aabbMax.setMax(triangle[1]); + aabbMin.setMin(triangle[2]); + aabbMax.setMax(triangle[2]); + + //PCK: add these checks for zero dimensions of aabb + const b3Scalar MIN_AABB_DIMENSION = b3Scalar(0.002); + const b3Scalar MIN_AABB_HALF_DIMENSION = b3Scalar(0.001); + if (aabbMax.getX() - aabbMin.getX() < MIN_AABB_DIMENSION) + { + aabbMax.setX(aabbMax.getX() + MIN_AABB_HALF_DIMENSION); + aabbMin.setX(aabbMin.getX() - MIN_AABB_HALF_DIMENSION); + } + if (aabbMax.getY() - aabbMin.getY() < MIN_AABB_DIMENSION) + { + aabbMax.setY(aabbMax.getY() + MIN_AABB_HALF_DIMENSION); + aabbMin.setY(aabbMin.getY() - MIN_AABB_HALF_DIMENSION); + } + if (aabbMax.getZ() - aabbMin.getZ() < MIN_AABB_DIMENSION) + { + aabbMax.setZ(aabbMax.getZ() + MIN_AABB_HALF_DIMENSION); + aabbMin.setZ(aabbMin.getZ() - MIN_AABB_HALF_DIMENSION); + } + + m_optimizedTree->quantize(&node.m_quantizedAabbMin[0],aabbMin,0); + m_optimizedTree->quantize(&node.m_quantizedAabbMax[0],aabbMax,1); + + node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex; + + m_triangleNodes.push_back(node); + } + }; + + + + int numLeafNodes = 0; + + + if (m_useQuantization) + { + + //initialize quantization values + setQuantizationValues(bvhAabbMin,bvhAabbMax); + + QuantizedNodeTriangleCallback callback(m_quantizedLeafNodes,this); + + + triangles->InternalProcessAllTriangles(&callback,m_bvhAabbMin,m_bvhAabbMax); + + //now we have an array of leafnodes in m_leafNodes + numLeafNodes = m_quantizedLeafNodes.size(); + + + m_quantizedContiguousNodes.resize(2*numLeafNodes); + + + } else + { + NodeTriangleCallback callback(m_leafNodes); + + b3Vector3 aabbMin=b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + b3Vector3 aabbMax=b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + + triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax); + + //now we have an array of leafnodes in m_leafNodes + numLeafNodes = m_leafNodes.size(); + + m_contiguousNodes.resize(2*numLeafNodes); + } + + m_curNodeIndex = 0; + + buildTree(0,numLeafNodes); + + ///if the entire tree is small then subtree size, we need to create a header info for the tree + if(m_useQuantization && !m_SubtreeHeaders.size()) + { + b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); + subtree.m_rootNodeIndex = 0; + subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); + } + + //PCK: update the copy of the size + m_subtreeHeaderCount = m_SubtreeHeaders.size(); + + //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary + m_quantizedLeafNodes.clear(); + m_leafNodes.clear(); +} + + + + +void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax) +{ + if (m_useQuantization) + { + + setQuantizationValues(aabbMin,aabbMax); + + updateBvhNodes(meshInterface,0,m_curNodeIndex,0); + + ///now update all subtree headers + + int i; + for (i=0;i<m_SubtreeHeaders.size();i++) + { + b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); + } + + } else + { + + } +} + + + + +void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax) +{ + //incrementally initialize quantization values + b3Assert(m_useQuantization); + + b3Assert(aabbMin.getX() > m_bvhAabbMin.getX()); + b3Assert(aabbMin.getY() > m_bvhAabbMin.getY()); + b3Assert(aabbMin.getZ() > m_bvhAabbMin.getZ()); + + b3Assert(aabbMax.getX() < m_bvhAabbMax.getX()); + b3Assert(aabbMax.getY() < m_bvhAabbMax.getY()); + b3Assert(aabbMax.getZ() < m_bvhAabbMax.getZ()); + + ///we should update all quantization values, using updateBvhNodes(meshInterface); + ///but we only update chunks that overlap the given aabb + + unsigned short quantizedQueryAabbMin[3]; + unsigned short quantizedQueryAabbMax[3]; + + quantize(&quantizedQueryAabbMin[0],aabbMin,0); + quantize(&quantizedQueryAabbMax[0],aabbMax,1); + + int i; + for (i=0;i<this->m_SubtreeHeaders.size();i++) + { + b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; + + //PCK: unsigned instead of bool + unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap != 0) + { + updateBvhNodes(meshInterface,subtree.m_rootNodeIndex,subtree.m_rootNodeIndex+subtree.m_subtreeSize,i); + + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); + } + } + +} + +void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index) +{ + (void)index; + + b3Assert(m_useQuantization); + + int curNodeSubPart=-1; + + //get access info to trianglemesh data + const unsigned char *vertexbase = 0; + int numverts = 0; + PHY_ScalarType type = PHY_INTEGER; + int stride = 0; + const unsigned char *indexbase = 0; + int indexstride = 0; + int numfaces = 0; + PHY_ScalarType indicestype = PHY_INTEGER; + + b3Vector3 triangleVerts[3]; + b3Vector3 aabbMin,aabbMax; + const b3Vector3& meshScaling = meshInterface->getScaling(); + + int i; + for (i=endNode-1;i>=firstNode;i--) + { + + + b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; + if (curNode.isLeafNode()) + { + //recalc aabb from triangle data + int nodeSubPart = curNode.getPartId(); + int nodeTriangleIndex = curNode.getTriangleIndex(); + if (nodeSubPart != curNodeSubPart) + { + if (curNodeSubPart >= 0) + meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); + meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts, type,stride,&indexbase,indexstride,numfaces,indicestype,nodeSubPart); + + curNodeSubPart = nodeSubPart; + b3Assert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT); + } + //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, + + unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride); + + + for (int j=2;j>=0;j--) + { + + int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j]; + if (type == PHY_FLOAT) + { + float* graphicsbase = (float*)(vertexbase+graphicsindex*stride); + triangleVerts[j] = b3MakeVector3( + graphicsbase[0]*meshScaling.getX(), + graphicsbase[1]*meshScaling.getY(), + graphicsbase[2]*meshScaling.getZ()); + } + else + { + double* graphicsbase = (double*)(vertexbase+graphicsindex*stride); + triangleVerts[j] = b3MakeVector3( b3Scalar(graphicsbase[0]*meshScaling.getX()), b3Scalar(graphicsbase[1]*meshScaling.getY()), b3Scalar(graphicsbase[2]*meshScaling.getZ())); + } + } + + + + aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + aabbMin.setMin(triangleVerts[0]); + aabbMax.setMax(triangleVerts[0]); + aabbMin.setMin(triangleVerts[1]); + aabbMax.setMax(triangleVerts[1]); + aabbMin.setMin(triangleVerts[2]); + aabbMax.setMax(triangleVerts[2]); + + quantize(&curNode.m_quantizedAabbMin[0],aabbMin,0); + quantize(&curNode.m_quantizedAabbMax[0],aabbMax,1); + + } else + { + //combine aabb from both children + + b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1]; + + b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] : + &m_quantizedContiguousNodes[i+1+leftChildNode->getEscapeIndex()]; + + + { + for (int i=0;i<3;i++) + { + curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; + if (curNode.m_quantizedAabbMin[i]>rightChildNode->m_quantizedAabbMin[i]) + curNode.m_quantizedAabbMin[i]=rightChildNode->m_quantizedAabbMin[i]; + + curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; + if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) + curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i]; + } + } + } + + } + + if (curNodeSubPart >= 0) + meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); + + +} + +///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' +b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) +{ + b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian); + + //we don't add additional data so just do a static upcast + return static_cast<b3OptimizedBvh*>(bvh); +} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h new file mode 100644 index 0000000000..0272ef83bf --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h @@ -0,0 +1,65 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +///Contains contributions from Disney Studio's + +#ifndef B3_OPTIMIZED_BVH_H +#define B3_OPTIMIZED_BVH_H + +#include "b3QuantizedBvh.h" + +class b3StridingMeshInterface; + + +///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface. +B3_ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh +{ + +public: + B3_DECLARE_ALIGNED_ALLOCATOR(); + +protected: + +public: + + b3OptimizedBvh(); + + virtual ~b3OptimizedBvh(); + + void build(b3StridingMeshInterface* triangles,bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax); + + void refit(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin,const b3Vector3& aabbMax); + + void refitPartial(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin, const b3Vector3& aabbMax); + + void updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index); + + /// Data buffer MUST be 16 byte aligned + virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const + { + return b3QuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian); + + } + + ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' + static b3OptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); + + +}; + + +#endif //B3_OPTIMIZED_BVH_H + + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp new file mode 100644 index 0000000000..52027e1118 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp @@ -0,0 +1,1301 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "b3QuantizedBvh.h" + +#include "Bullet3Geometry/b3AabbUtil.h" + + +#define RAYAABB2 + +b3QuantizedBvh::b3QuantizedBvh() : + m_bulletVersion(B3_BULLET_VERSION), + m_useQuantization(false), + m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) + //m_traversalMode(TRAVERSAL_STACKLESS) + //m_traversalMode(TRAVERSAL_RECURSIVE) + ,m_subtreeHeaderCount(0) //PCK: add this line +{ + m_bvhAabbMin.setValue(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY); + m_bvhAabbMax.setValue(B3_INFINITY,B3_INFINITY,B3_INFINITY); +} + + + + + +void b3QuantizedBvh::buildInternal() +{ + ///assumes that caller filled in the m_quantizedLeafNodes + m_useQuantization = true; + int numLeafNodes = 0; + + if (m_useQuantization) + { + //now we have an array of leafnodes in m_leafNodes + numLeafNodes = m_quantizedLeafNodes.size(); + + m_quantizedContiguousNodes.resize(2*numLeafNodes); + + } + + m_curNodeIndex = 0; + + buildTree(0,numLeafNodes); + + ///if the entire tree is small then subtree size, we need to create a header info for the tree + if(m_useQuantization && !m_SubtreeHeaders.size()) + { + b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); + subtree.m_rootNodeIndex = 0; + subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); + } + + //PCK: update the copy of the size + m_subtreeHeaderCount = m_SubtreeHeaders.size(); + + //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary + m_quantizedLeafNodes.clear(); + m_leafNodes.clear(); +} + + + +///just for debugging, to visualize the individual patches/subtrees +#ifdef DEBUG_PATCH_COLORS +b3Vector3 color[4]= +{ + b3Vector3(1,0,0), + b3Vector3(0,1,0), + b3Vector3(0,0,1), + b3Vector3(0,1,1) +}; +#endif //DEBUG_PATCH_COLORS + + + +void b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin) +{ + //enlarge the AABB to avoid division by zero when initializing the quantization values + b3Vector3 clampValue =b3MakeVector3(quantizationMargin,quantizationMargin,quantizationMargin); + m_bvhAabbMin = bvhAabbMin - clampValue; + m_bvhAabbMax = bvhAabbMax + clampValue; + b3Vector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; + m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0),b3Scalar(65533.0),b3Scalar(65533.0)) / aabbSize; + m_useQuantization = true; +} + + + + +b3QuantizedBvh::~b3QuantizedBvh() +{ +} + +#ifdef DEBUG_TREE_BUILDING +int gStackDepth = 0; +int gMaxStackDepth = 0; +#endif //DEBUG_TREE_BUILDING + +void b3QuantizedBvh::buildTree (int startIndex,int endIndex) +{ +#ifdef DEBUG_TREE_BUILDING + gStackDepth++; + if (gStackDepth > gMaxStackDepth) + gMaxStackDepth = gStackDepth; +#endif //DEBUG_TREE_BUILDING + + + int splitAxis, splitIndex, i; + int numIndices =endIndex-startIndex; + int curIndex = m_curNodeIndex; + + b3Assert(numIndices>0); + + if (numIndices==1) + { +#ifdef DEBUG_TREE_BUILDING + gStackDepth--; +#endif //DEBUG_TREE_BUILDING + + assignInternalNodeFromLeafNode(m_curNodeIndex,startIndex); + + m_curNodeIndex++; + return; + } + //calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'. + + splitAxis = calcSplittingAxis(startIndex,endIndex); + + splitIndex = sortAndCalcSplittingIndex(startIndex,endIndex,splitAxis); + + int internalNodeIndex = m_curNodeIndex; + + //set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value. + //the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values + setInternalNodeAabbMin(m_curNodeIndex,m_bvhAabbMax);//can't use b3Vector3(B3_INFINITY,B3_INFINITY,B3_INFINITY)) because of quantization + setInternalNodeAabbMax(m_curNodeIndex,m_bvhAabbMin);//can't use b3Vector3(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY)) because of quantization + + + for (i=startIndex;i<endIndex;i++) + { + mergeInternalNodeAabb(m_curNodeIndex,getAabbMin(i),getAabbMax(i)); + } + + m_curNodeIndex++; + + + //internalNode->m_escapeIndex; + + int leftChildNodexIndex = m_curNodeIndex; + + //build left child tree + buildTree(startIndex,splitIndex); + + int rightChildNodexIndex = m_curNodeIndex; + //build right child tree + buildTree(splitIndex,endIndex); + +#ifdef DEBUG_TREE_BUILDING + gStackDepth--; +#endif //DEBUG_TREE_BUILDING + + int escapeIndex = m_curNodeIndex - curIndex; + + if (m_useQuantization) + { + //escapeIndex is the number of nodes of this subtree + const int sizeQuantizedNode =sizeof(b3QuantizedBvhNode); + const int treeSizeInBytes = escapeIndex * sizeQuantizedNode; + if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES) + { + updateSubtreeHeaders(leftChildNodexIndex,rightChildNodexIndex); + } + } else + { + + } + + setInternalNodeEscapeIndex(internalNodeIndex,escapeIndex); + +} + +void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex) +{ + b3Assert(m_useQuantization); + + b3QuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex]; + int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex(); + int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); + + b3QuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex]; + int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex(); + int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); + + if(leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) + { + b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(leftChildNode); + subtree.m_rootNodeIndex = leftChildNodexIndex; + subtree.m_subtreeSize = leftSubTreeSize; + } + + if(rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) + { + b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); + subtree.setAabbFromQuantizeNode(rightChildNode); + subtree.m_rootNodeIndex = rightChildNodexIndex; + subtree.m_subtreeSize = rightSubTreeSize; + } + + //PCK: update the copy of the size + m_subtreeHeaderCount = m_SubtreeHeaders.size(); +} + + +int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis) +{ + int i; + int splitIndex =startIndex; + int numIndices = endIndex - startIndex; + b3Scalar splitValue; + + b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + for (i=startIndex;i<endIndex;i++) + { + b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); + means+=center; + } + means *= (b3Scalar(1.)/(b3Scalar)numIndices); + + splitValue = means[splitAxis]; + + //sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'. + for (i=startIndex;i<endIndex;i++) + { + b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); + if (center[splitAxis] > splitValue) + { + //swap + swapLeafNodes(i,splitIndex); + splitIndex++; + } + } + + //if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex + //otherwise the tree-building might fail due to stack-overflows in certain cases. + //unbalanced1 is unsafe: it can cause stack overflows + //bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1))); + + //unbalanced2 should work too: always use center (perfect balanced trees) + //bool unbalanced2 = true; + + //this should be safe too: + int rangeBalancedIndices = numIndices/3; + bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices))); + + if (unbalanced) + { + splitIndex = startIndex+ (numIndices>>1); + } + + bool unbal = (splitIndex==startIndex) || (splitIndex == (endIndex)); + (void)unbal; + b3Assert(!unbal); + + return splitIndex; +} + + +int b3QuantizedBvh::calcSplittingAxis(int startIndex,int endIndex) +{ + int i; + + b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 variance=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + int numIndices = endIndex-startIndex; + + for (i=startIndex;i<endIndex;i++) + { + b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); + means+=center; + } + means *= (b3Scalar(1.)/(b3Scalar)numIndices); + + for (i=startIndex;i<endIndex;i++) + { + b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); + b3Vector3 diff2 = center-means; + diff2 = diff2 * diff2; + variance += diff2; + } + variance *= (b3Scalar(1.)/ ((b3Scalar)numIndices-1) ); + + return variance.maxAxis(); +} + + + +void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +{ + //either choose recursive traversal (walkTree) or stackless (walkStacklessTree) + + if (m_useQuantization) + { + ///quantize query AABB + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0); + quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1); + + switch (m_traversalMode) + { + case TRAVERSAL_STACKLESS: + walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,0,m_curNodeIndex); + break; + case TRAVERSAL_STACKLESS_CACHE_FRIENDLY: + walkStacklessQuantizedTreeCacheFriendly(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); + break; + case TRAVERSAL_RECURSIVE: + { + const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0]; + walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); + } + break; + default: + //unsupported + b3Assert(0); + } + } else + { + walkStacklessTree(nodeCallback,aabbMin,aabbMax); + } +} + + +static int b3s_maxIterations = 0; + + +void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +{ + b3Assert(!m_useQuantization); + + const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; + int escapeIndex, curIndex = 0; + int walkIterations = 0; + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap; + + while (curIndex < m_curNodeIndex) + { + //catch bugs in tree data + b3Assert (walkIterations < m_curNodeIndex); + + walkIterations++; + aabbOverlap = b3TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); + isLeafNode = rootNode->m_escapeIndex == -1; + + //PCK: unsigned instead of bool + if (isLeafNode && (aabbOverlap != 0)) + { + nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); + } + + //PCK: unsigned instead of bool + if ((aabbOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->m_escapeIndex; + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (b3s_maxIterations < walkIterations) + b3s_maxIterations = walkIterations; + +} + +/* +///this was the original recursive traversal, before we optimized towards stackless traversal +void b3QuantizedBvh::walkTree(b3OptimizedBvhNode* rootNode,b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +{ + bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax); + if (aabbOverlap) + { + isLeafNode = (!rootNode->m_leftChild && !rootNode->m_rightChild); + if (isLeafNode) + { + nodeCallback->processNode(rootNode); + } else + { + walkTree(rootNode->m_leftChild,nodeCallback,aabbMin,aabbMax); + walkTree(rootNode->m_rightChild,nodeCallback,aabbMin,aabbMax); + } + } + +} +*/ + +void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const +{ + b3Assert(m_useQuantization); + + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap; + + //PCK: unsigned instead of bool + aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,currentNode->m_quantizedAabbMin,currentNode->m_quantizedAabbMax); + isLeafNode = currentNode->isLeafNode(); + + //PCK: unsigned instead of bool + if (aabbOverlap != 0) + { + if (isLeafNode) + { + nodeCallback->processNode(currentNode->getPartId(),currentNode->getTriangleIndex()); + } else + { + //process left and right children + const b3QuantizedBvhNode* leftChildNode = currentNode+1; + walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); + + const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex(); + walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); + } + } +} + + + +void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const +{ + b3Assert(!m_useQuantization); + + const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; + int escapeIndex, curIndex = 0; + int walkIterations = 0; + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap=0; + unsigned rayBoxOverlap=0; + b3Scalar lambda_max = 1.0; + + /* Quick pruning by quantized box */ + b3Vector3 rayAabbMin = raySource; + b3Vector3 rayAabbMax = raySource; + rayAabbMin.setMin(rayTarget); + rayAabbMax.setMax(rayTarget); + + /* Add box cast extents to bounding box */ + rayAabbMin += aabbMin; + rayAabbMax += aabbMax; + +#ifdef RAYAABB2 + b3Vector3 rayDir = (rayTarget-raySource); + rayDir.normalize (); + lambda_max = rayDir.dot(rayTarget-raySource); + ///what about division by zero? --> just set rayDirection[i] to 1.0 + b3Vector3 rayDirectionInverse; + rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0]; + rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1]; + rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; + unsigned int sign[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; +#endif + + b3Vector3 bounds[2]; + + while (curIndex < m_curNodeIndex) + { + b3Scalar param = 1.0; + //catch bugs in tree data + b3Assert (walkIterations < m_curNodeIndex); + + walkIterations++; + + bounds[0] = rootNode->m_aabbMinOrg; + bounds[1] = rootNode->m_aabbMaxOrg; + /* Add box cast extents */ + bounds[0] -= aabbMax; + bounds[1] -= aabbMin; + + aabbOverlap = b3TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); + //perhaps profile if it is worth doing the aabbOverlap test first + +#ifdef RAYAABB2 + ///careful with this check: need to check division by zero (above) and fix the unQuantize method + ///thanks Joerg/hiker for the reproduction case! + ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 + rayBoxOverlap = aabbOverlap ? b3RayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false; + +#else + b3Vector3 normal; + rayBoxOverlap = b3RayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal); +#endif + + isLeafNode = rootNode->m_escapeIndex == -1; + + //PCK: unsigned instead of bool + if (isLeafNode && (rayBoxOverlap != 0)) + { + nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); + } + + //PCK: unsigned instead of bool + if ((rayBoxOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->m_escapeIndex; + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (b3s_maxIterations < walkIterations) + b3s_maxIterations = walkIterations; + +} + + + +void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const +{ + b3Assert(m_useQuantization); + + int curIndex = startNodeIndex; + int walkIterations = 0; + int subTreeSize = endNodeIndex - startNodeIndex; + (void)subTreeSize; + + const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; + int escapeIndex; + + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned boxBoxOverlap = 0; + unsigned rayBoxOverlap = 0; + + b3Scalar lambda_max = 1.0; + +#ifdef RAYAABB2 + b3Vector3 rayDirection = (rayTarget-raySource); + rayDirection.normalize (); + lambda_max = rayDirection.dot(rayTarget-raySource); + ///what about division by zero? --> just set rayDirection[i] to 1.0 + rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0]; + rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1]; + rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2]; + unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; +#endif + + /* Quick pruning by quantized box */ + b3Vector3 rayAabbMin = raySource; + b3Vector3 rayAabbMax = raySource; + rayAabbMin.setMin(rayTarget); + rayAabbMax.setMax(rayTarget); + + /* Add box cast extents to bounding box */ + rayAabbMin += aabbMin; + rayAabbMax += aabbMax; + + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + quantizeWithClamp(quantizedQueryAabbMin,rayAabbMin,0); + quantizeWithClamp(quantizedQueryAabbMax,rayAabbMax,1); + + while (curIndex < endNodeIndex) + { + +//#define VISUALLY_ANALYZE_BVH 1 +#ifdef VISUALLY_ANALYZE_BVH + //some code snippet to debugDraw aabb, to visually analyze bvh structure + static int drawPatch = 0; + //need some global access to a debugDrawer + extern b3IDebugDraw* debugDrawerPtr; + if (curIndex==drawPatch) + { + b3Vector3 aabbMin,aabbMax; + aabbMin = unQuantize(rootNode->m_quantizedAabbMin); + aabbMax = unQuantize(rootNode->m_quantizedAabbMax); + b3Vector3 color(1,0,0); + debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); + } +#endif//VISUALLY_ANALYZE_BVH + + //catch bugs in tree data + b3Assert (walkIterations < subTreeSize); + + walkIterations++; + //PCK: unsigned instead of bool + // only interested if this is closer than any previous hit + b3Scalar param = 1.0; + rayBoxOverlap = 0; + boxBoxOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + if (boxBoxOverlap) + { + b3Vector3 bounds[2]; + bounds[0] = unQuantize(rootNode->m_quantizedAabbMin); + bounds[1] = unQuantize(rootNode->m_quantizedAabbMax); + /* Add box cast extents */ + bounds[0] -= aabbMax; + bounds[1] -= aabbMin; +#if 0 + b3Vector3 normal; + bool ra2 = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max); + bool ra = b3RayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal); + if (ra2 != ra) + { + printf("functions don't match\n"); + } +#endif +#ifdef RAYAABB2 + ///careful with this check: need to check division by zero (above) and fix the unQuantize method + ///thanks Joerg/hiker for the reproduction case! + ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 + + //B3_PROFILE("b3RayAabb2"); + rayBoxOverlap = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); + +#else + rayBoxOverlap = true;//b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); +#endif + } + + if (isLeafNode && rayBoxOverlap) + { + nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); + } + + //PCK: unsigned instead of bool + if ((rayBoxOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (b3s_maxIterations < walkIterations) + b3s_maxIterations = walkIterations; + +} + +void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const +{ + b3Assert(m_useQuantization); + + int curIndex = startNodeIndex; + int walkIterations = 0; + int subTreeSize = endNodeIndex - startNodeIndex; + (void)subTreeSize; + + const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; + int escapeIndex; + + bool isLeafNode; + //PCK: unsigned instead of bool + unsigned aabbOverlap; + + while (curIndex < endNodeIndex) + { + +//#define VISUALLY_ANALYZE_BVH 1 +#ifdef VISUALLY_ANALYZE_BVH + //some code snippet to debugDraw aabb, to visually analyze bvh structure + static int drawPatch = 0; + //need some global access to a debugDrawer + extern b3IDebugDraw* debugDrawerPtr; + if (curIndex==drawPatch) + { + b3Vector3 aabbMin,aabbMax; + aabbMin = unQuantize(rootNode->m_quantizedAabbMin); + aabbMax = unQuantize(rootNode->m_quantizedAabbMax); + b3Vector3 color(1,0,0); + debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); + } +#endif//VISUALLY_ANALYZE_BVH + + //catch bugs in tree data + b3Assert (walkIterations < subTreeSize); + + walkIterations++; + //PCK: unsigned instead of bool + aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); + isLeafNode = rootNode->isLeafNode(); + + if (isLeafNode && aabbOverlap) + { + nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); + } + + //PCK: unsigned instead of bool + if ((aabbOverlap != 0) || isLeafNode) + { + rootNode++; + curIndex++; + } else + { + escapeIndex = rootNode->getEscapeIndex(); + rootNode += escapeIndex; + curIndex += escapeIndex; + } + } + if (b3s_maxIterations < walkIterations) + b3s_maxIterations = walkIterations; + +} + +//This traversal can be called from Playstation 3 SPU +void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const +{ + b3Assert(m_useQuantization); + + int i; + + + for (i=0;i<this->m_SubtreeHeaders.size();i++) + { + const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; + + //PCK: unsigned instead of bool + unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap != 0) + { + walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, + subtree.m_rootNodeIndex, + subtree.m_rootNodeIndex+subtree.m_subtreeSize); + } + } +} + + +void b3QuantizedBvh::reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const +{ + reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,b3MakeVector3(0,0,0),b3MakeVector3(0,0,0)); +} + + +void b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +{ + //always use stackless + + if (m_useQuantization) + { + walkStacklessQuantizedTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); + } + else + { + walkStacklessTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); + } + /* + { + //recursive traversal + b3Vector3 qaabbMin = raySource; + b3Vector3 qaabbMax = raySource; + qaabbMin.setMin(rayTarget); + qaabbMax.setMax(rayTarget); + qaabbMin += aabbMin; + qaabbMax += aabbMax; + reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax); + } + */ + +} + + +void b3QuantizedBvh::swapLeafNodes(int i,int splitIndex) +{ + if (m_useQuantization) + { + b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i]; + m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; + m_quantizedLeafNodes[splitIndex] = tmp; + } else + { + b3OptimizedBvhNode tmp = m_leafNodes[i]; + m_leafNodes[i] = m_leafNodes[splitIndex]; + m_leafNodes[splitIndex] = tmp; + } +} + +void b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex) +{ + if (m_useQuantization) + { + m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex]; + } else + { + m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex]; + } +} + +//PCK: include +#include <new> + +#if 0 +//PCK: consts +static const unsigned BVH_ALIGNMENT = 16; +static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1; + +static const unsigned BVH_ALIGNMENT_BLOCKS = 2; +#endif + + +unsigned int b3QuantizedBvh::getAlignmentSerializationPadding() +{ + // I changed this to 0 since the extra padding is not needed or used. + return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT; +} + +unsigned b3QuantizedBvh::calculateSerializeBufferSize() const +{ + unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding(); + baseSize += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; + if (m_useQuantization) + { + return baseSize + m_curNodeIndex * sizeof(b3QuantizedBvhNode); + } + return baseSize + m_curNodeIndex * sizeof(b3OptimizedBvhNode); +} + +bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const +{ + b3Assert(m_subtreeHeaderCount == m_SubtreeHeaders.size()); + m_subtreeHeaderCount = m_SubtreeHeaders.size(); + +/* if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) + { + ///check alignedment for buffer? + b3Assert(0); + return false; + } +*/ + + b3QuantizedBvh *targetBvh = (b3QuantizedBvh *)o_alignedDataBuffer; + + // construct the class so the virtual function table, etc will be set up + // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor + new (targetBvh) b3QuantizedBvh; + + if (i_swapEndian) + { + targetBvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(m_curNodeIndex)); + + + b3SwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin); + b3SwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax); + b3SwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization); + + targetBvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(m_traversalMode); + targetBvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(m_subtreeHeaderCount)); + } + else + { + targetBvh->m_curNodeIndex = m_curNodeIndex; + targetBvh->m_bvhAabbMin = m_bvhAabbMin; + targetBvh->m_bvhAabbMax = m_bvhAabbMax; + targetBvh->m_bvhQuantization = m_bvhQuantization; + targetBvh->m_traversalMode = m_traversalMode; + targetBvh->m_subtreeHeaderCount = m_subtreeHeaderCount; + } + + targetBvh->m_useQuantization = m_useQuantization; + + unsigned char *nodeData = (unsigned char *)targetBvh; + nodeData += sizeof(b3QuantizedBvh); + + unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + int nodeCount = m_curNodeIndex; + + if (m_useQuantization) + { + targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); + } + } + else + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]; + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]; + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]; + + targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex; + + + } + } + nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; + + // this clears the pointer in the member variable it doesn't really do anything to the data + // it does call the destructor on the contained objects, but they are all classes with no destructor defined + // so the memory (which is not freed) is left alone + targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(NULL, 0, 0); + } + else + { + targetBvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); + b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); + + targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex)); + targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_subPart)); + targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex)); + } + } + else + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg = m_contiguousNodes[nodeIndex].m_aabbMinOrg; + targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg = m_contiguousNodes[nodeIndex].m_aabbMaxOrg; + + targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = m_contiguousNodes[nodeIndex].m_escapeIndex; + targetBvh->m_contiguousNodes[nodeIndex].m_subPart = m_contiguousNodes[nodeIndex].m_subPart; + targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex; + } + } + nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; + + // this clears the pointer in the member variable it doesn't really do anything to the data + // it does call the destructor on the contained objects, but they are all classes with no destructor defined + // so the memory (which is not freed) is left alone + targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0); + } + + sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + // Now serialize the subtree headers + targetBvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, m_subtreeHeaderCount, m_subtreeHeaderCount); + if (i_swapEndian) + { + for (int i = 0; i < m_subtreeHeaderCount; i++) + { + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]); + + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]); + + targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex)); + targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_subtreeSize)); + } + } + else + { + for (int i = 0; i < m_subtreeHeaderCount; i++) + { + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = (m_SubtreeHeaders[i].m_quantizedAabbMin[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = (m_SubtreeHeaders[i].m_quantizedAabbMin[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = (m_SubtreeHeaders[i].m_quantizedAabbMin[2]); + + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = (m_SubtreeHeaders[i].m_quantizedAabbMax[0]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = (m_SubtreeHeaders[i].m_quantizedAabbMax[1]); + targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = (m_SubtreeHeaders[i].m_quantizedAabbMax[2]); + + targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = (m_SubtreeHeaders[i].m_rootNodeIndex); + targetBvh->m_SubtreeHeaders[i].m_subtreeSize = (m_SubtreeHeaders[i].m_subtreeSize); + + // need to clear padding in destination buffer + targetBvh->m_SubtreeHeaders[i].m_padding[0] = 0; + targetBvh->m_SubtreeHeaders[i].m_padding[1] = 0; + targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0; + } + } + nodeData += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; + + // this clears the pointer in the member variable it doesn't really do anything to the data + // it does call the destructor on the contained objects, but they are all classes with no destructor defined + // so the memory (which is not freed) is left alone + targetBvh->m_SubtreeHeaders.initializeFromBuffer(NULL, 0, 0); + + // this wipes the virtual function table pointer at the start of the buffer for the class + *((void**)o_alignedDataBuffer) = NULL; + + return true; +} + +b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) +{ + + if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) + { + return NULL; + } + b3QuantizedBvh *bvh = (b3QuantizedBvh *)i_alignedDataBuffer; + + if (i_swapEndian) + { + bvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_curNodeIndex)); + + b3UnSwapVector3Endian(bvh->m_bvhAabbMin); + b3UnSwapVector3Endian(bvh->m_bvhAabbMax); + b3UnSwapVector3Endian(bvh->m_bvhQuantization); + + bvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(bvh->m_traversalMode); + bvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(bvh->m_subtreeHeaderCount)); + } + + unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize(); + b3Assert(calculatedBufSize <= i_dataBufferSize); + + if (calculatedBufSize > i_dataBufferSize) + { + return NULL; + } + + unsigned char *nodeData = (unsigned char *)bvh; + nodeData += sizeof(b3QuantizedBvh); + + unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + int nodeCount = bvh->m_curNodeIndex; + + // Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor + // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor + new (bvh) b3QuantizedBvh(*bvh, false); + + if (bvh->m_useQuantization) + { + bvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); + + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); + bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); + + bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); + } + } + nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; + } + else + { + bvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); + + if (i_swapEndian) + { + for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) + { + b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); + b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); + + bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex)); + bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart)); + bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex)); + } + } + nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; + } + + sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; + nodeData += sizeToAdd; + + // Now serialize the subtree headers + bvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, bvh->m_subtreeHeaderCount, bvh->m_subtreeHeaderCount); + if (i_swapEndian) + { + for (int i = 0; i < bvh->m_subtreeHeaderCount; i++) + { + bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]); + + bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]); + bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]); + + bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex)); + bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize)); + } + } + + return bvh; +} + +// Constructor that prevents b3Vector3's default constructor from being called +b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh &self, bool /* ownsMemory */) : +m_bvhAabbMin(self.m_bvhAabbMin), +m_bvhAabbMax(self.m_bvhAabbMax), +m_bvhQuantization(self.m_bvhQuantization), +m_bulletVersion(B3_BULLET_VERSION) +{ + +} + +void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData) +{ + m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax); + m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin); + m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization); + + m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex; + m_useQuantization = quantizedBvhFloatData.m_useQuantization!=0; + + { + int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes; + m_contiguousNodes.resize(numElem); + + if (numElem) + { + b3OptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr; + + for (int i=0;i<numElem;i++,memPtr++) + { + m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg); + m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg); + m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; + m_contiguousNodes[i].m_subPart = memPtr->m_subPart; + m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; + } + } + } + + { + int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes; + m_quantizedContiguousNodes.resize(numElem); + + if (numElem) + { + b3QuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr; + for (int i=0;i<numElem;i++,memPtr++) + { + m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + } + } + } + + m_traversalMode = b3TraversalMode(quantizedBvhFloatData.m_traversalMode); + + { + int numElem = quantizedBvhFloatData.m_numSubtreeHeaders; + m_SubtreeHeaders.resize(numElem); + if (numElem) + { + b3BvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr; + for (int i=0;i<numElem;i++,memPtr++) + { + m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ; + m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; + m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; + } + } + } +} + +void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData) +{ + m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax); + m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin); + m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization); + + m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex; + m_useQuantization = quantizedBvhDoubleData.m_useQuantization!=0; + + { + int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes; + m_contiguousNodes.resize(numElem); + + if (numElem) + { + b3OptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr; + + for (int i=0;i<numElem;i++,memPtr++) + { + m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg); + m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg); + m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; + m_contiguousNodes[i].m_subPart = memPtr->m_subPart; + m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; + } + } + } + + { + int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes; + m_quantizedContiguousNodes.resize(numElem); + + if (numElem) + { + b3QuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr; + for (int i=0;i<numElem;i++,memPtr++) + { + m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + } + } + } + + m_traversalMode = b3TraversalMode(quantizedBvhDoubleData.m_traversalMode); + + { + int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders; + m_SubtreeHeaders.resize(numElem); + if (numElem) + { + b3BvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr; + for (int i=0;i<numElem;i++,memPtr++) + { + m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ; + m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; + m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; + m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; + m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; + m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; + m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; + m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; + } + } + } + +} + + + +///fills the dataBuffer and returns the struct name (and 0 on failure) +const char* b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const +{ + b3Assert(0); + return 0; +} + + + + + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h new file mode 100644 index 0000000000..63c523c758 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h @@ -0,0 +1,556 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef B3_QUANTIZED_BVH_H +#define B3_QUANTIZED_BVH_H + +class b3Serializer; + +//#define DEBUG_CHECK_DEQUANTIZATION 1 +#ifdef DEBUG_CHECK_DEQUANTIZATION +#ifdef __SPU__ +#define printf spu_printf +#endif //__SPU__ + +#include <stdio.h> +#include <stdlib.h> +#endif //DEBUG_CHECK_DEQUANTIZATION + +#include "Bullet3Common/b3Vector3.h" +#include "Bullet3Common/b3AlignedAllocator.h" + +#ifdef B3_USE_DOUBLE_PRECISION +#define b3QuantizedBvhData b3QuantizedBvhDoubleData +#define b3OptimizedBvhNodeData b3OptimizedBvhNodeDoubleData +#define b3QuantizedBvhDataName "b3QuantizedBvhDoubleData" +#else +#define b3QuantizedBvhData b3QuantizedBvhFloatData +#define b3OptimizedBvhNodeData b3OptimizedBvhNodeFloatData +#define b3QuantizedBvhDataName "b3QuantizedBvhFloatData" +#endif + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" + + + +//http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp + + +//Note: currently we have 16 bytes per quantized node +#define MAX_SUBTREE_SIZE_IN_BYTES 2048 + +// 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one +// actually) triangles each (since the sign bit is reserved +#define MAX_NUM_PARTS_IN_BITS 10 + +///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +B3_ATTRIBUTE_ALIGNED16 (struct) b3QuantizedBvhNode : public b3QuantizedBvhNodeData +{ + B3_DECLARE_ALIGNED_ALLOCATOR(); + + bool isLeafNode() const + { + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (m_escapeIndexOrTriangleIndex >= 0); + } + int getEscapeIndex() const + { + b3Assert(!isLeafNode()); + return -m_escapeIndexOrTriangleIndex; + } + int getTriangleIndex() const + { + b3Assert(isLeafNode()); + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (m_escapeIndexOrTriangleIndex&~(y)); + } + int getPartId() const + { + b3Assert(isLeafNode()); + // Get only the highest bits where the part index is stored + return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS)); + } +} +; + +/// b3OptimizedBvhNode contains both internal and leaf node information. +/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes. +B3_ATTRIBUTE_ALIGNED16 (struct) b3OptimizedBvhNode +{ + B3_DECLARE_ALIGNED_ALLOCATOR(); + + //32 bytes + b3Vector3 m_aabbMinOrg; + b3Vector3 m_aabbMaxOrg; + + //4 + int m_escapeIndex; + + //8 + //for child nodes + int m_subPart; + int m_triangleIndex; + +//pad the size to 64 bytes + char m_padding[20]; +}; + + +///b3BvhSubtreeInfo provides info to gather a subtree of limited size +B3_ATTRIBUTE_ALIGNED16(class) b3BvhSubtreeInfo : public b3BvhSubtreeInfoData +{ +public: + B3_DECLARE_ALIGNED_ALLOCATOR(); + + b3BvhSubtreeInfo() + { + //memset(&m_padding[0], 0, sizeof(m_padding)); + } + + + void setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode) + { + m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0]; + m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1]; + m_quantizedAabbMin[2] = quantizedNode.m_quantizedAabbMin[2]; + m_quantizedAabbMax[0] = quantizedNode.m_quantizedAabbMax[0]; + m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1]; + m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2]; + } +} +; + + +class b3NodeOverlapCallback +{ +public: + virtual ~b3NodeOverlapCallback() {}; + + virtual void processNode(int subPart, int triangleIndex) = 0; +}; + +#include "Bullet3Common/b3AlignedAllocator.h" +#include "Bullet3Common/b3AlignedObjectArray.h" + + + +///for code readability: +typedef b3AlignedObjectArray<b3OptimizedBvhNode> NodeArray; +typedef b3AlignedObjectArray<b3QuantizedBvhNode> QuantizedNodeArray; +typedef b3AlignedObjectArray<b3BvhSubtreeInfo> BvhSubtreeInfoArray; + + +///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU. +///It is used by the b3BvhTriangleMeshShape as midphase +///It is recommended to use quantization for better performance and lower memory requirements. +B3_ATTRIBUTE_ALIGNED16(class) b3QuantizedBvh +{ +public: + enum b3TraversalMode + { + TRAVERSAL_STACKLESS = 0, + TRAVERSAL_STACKLESS_CACHE_FRIENDLY, + TRAVERSAL_RECURSIVE + }; + + + + + b3Vector3 m_bvhAabbMin; + b3Vector3 m_bvhAabbMax; + b3Vector3 m_bvhQuantization; + +protected: + int m_bulletVersion; //for serialization versioning. It could also be used to detect endianess. + + int m_curNodeIndex; + //quantization data + bool m_useQuantization; + + + + NodeArray m_leafNodes; + NodeArray m_contiguousNodes; + QuantizedNodeArray m_quantizedLeafNodes; + QuantizedNodeArray m_quantizedContiguousNodes; + + b3TraversalMode m_traversalMode; + BvhSubtreeInfoArray m_SubtreeHeaders; + + //This is only used for serialization so we don't have to add serialization directly to b3AlignedObjectArray + mutable int m_subtreeHeaderCount; + + + + + + ///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!) + ///this might be refactored into a virtual, it is usually not calculated at run-time + void setInternalNodeAabbMin(int nodeIndex, const b3Vector3& aabbMin) + { + if (m_useQuantization) + { + quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] ,aabbMin,0); + } else + { + m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin; + + } + } + void setInternalNodeAabbMax(int nodeIndex,const b3Vector3& aabbMax) + { + if (m_useQuantization) + { + quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0],aabbMax,1); + } else + { + m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax; + } + } + + b3Vector3 getAabbMin(int nodeIndex) const + { + if (m_useQuantization) + { + return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMin[0]); + } + //non-quantized + return m_leafNodes[nodeIndex].m_aabbMinOrg; + + } + b3Vector3 getAabbMax(int nodeIndex) const + { + if (m_useQuantization) + { + return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]); + } + //non-quantized + return m_leafNodes[nodeIndex].m_aabbMaxOrg; + + } + + + void setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex) + { + if (m_useQuantization) + { + m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex; + } + else + { + m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex; + } + + } + + void mergeInternalNodeAabb(int nodeIndex,const b3Vector3& newAabbMin,const b3Vector3& newAabbMax) + { + if (m_useQuantization) + { + unsigned short int quantizedAabbMin[3]; + unsigned short int quantizedAabbMax[3]; + quantize(quantizedAabbMin,newAabbMin,0); + quantize(quantizedAabbMax,newAabbMax,1); + for (int i=0;i<3;i++) + { + if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i]) + m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i]; + + if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i]) + m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i]; + + } + } else + { + //non-quantized + m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin); + m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax); + } + } + + void swapLeafNodes(int firstIndex,int secondIndex); + + void assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex); + +protected: + + + + void buildTree (int startIndex,int endIndex); + + int calcSplittingAxis(int startIndex,int endIndex); + + int sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis); + + void walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; + + void walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const; + void walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const; + void walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const; + + ///tree traversal designed for small-memory processors like PS3 SPU + void walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; + + ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal + void walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; + + ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal + void walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA,const b3QuantizedBvhNode* treeNodeB,b3NodeOverlapCallback* nodeCallback) const; + + + + + void updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex); + +public: + + B3_DECLARE_ALIGNED_ALLOCATOR(); + + b3QuantizedBvh(); + + virtual ~b3QuantizedBvh(); + + + ///***************************************** expert/internal use only ************************* + void setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0)); + QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } + ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized + void buildInternal(); + ///***************************************** expert/internal use only ************************* + + void reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; + void reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const; + void reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; + + B3_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point,int isMax) const + { + + b3Assert(m_useQuantization); + + b3Assert(point.getX() <= m_bvhAabbMax.getX()); + b3Assert(point.getY() <= m_bvhAabbMax.getY()); + b3Assert(point.getZ() <= m_bvhAabbMax.getZ()); + + b3Assert(point.getX() >= m_bvhAabbMin.getX()); + b3Assert(point.getY() >= m_bvhAabbMin.getY()); + b3Assert(point.getZ() >= m_bvhAabbMin.getZ()); + + b3Vector3 v = (point - m_bvhAabbMin) * m_bvhQuantization; + ///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative + ///end-points always set the first bit, so that they are sorted properly (so that neighbouring AABBs overlap properly) + ///@todo: double-check this + if (isMax) + { + out[0] = (unsigned short) (((unsigned short)(v.getX()+b3Scalar(1.)) | 1)); + out[1] = (unsigned short) (((unsigned short)(v.getY()+b3Scalar(1.)) | 1)); + out[2] = (unsigned short) (((unsigned short)(v.getZ()+b3Scalar(1.)) | 1)); + } else + { + out[0] = (unsigned short) (((unsigned short)(v.getX()) & 0xfffe)); + out[1] = (unsigned short) (((unsigned short)(v.getY()) & 0xfffe)); + out[2] = (unsigned short) (((unsigned short)(v.getZ()) & 0xfffe)); + } + + +#ifdef DEBUG_CHECK_DEQUANTIZATION + b3Vector3 newPoint = unQuantize(out); + if (isMax) + { + if (newPoint.getX() < point.getX()) + { + printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); + } + if (newPoint.getY() < point.getY()) + { + printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); + } + if (newPoint.getZ() < point.getZ()) + { + + printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); + } + } else + { + if (newPoint.getX() > point.getX()) + { + printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); + } + if (newPoint.getY() > point.getY()) + { + printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); + } + if (newPoint.getZ() > point.getZ()) + { + printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); + } + } +#endif //DEBUG_CHECK_DEQUANTIZATION + + } + + + B3_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2,int isMax) const + { + + b3Assert(m_useQuantization); + + b3Vector3 clampedPoint(point2); + clampedPoint.setMax(m_bvhAabbMin); + clampedPoint.setMin(m_bvhAabbMax); + + quantize(out,clampedPoint,isMax); + + } + + B3_FORCE_INLINE b3Vector3 unQuantize(const unsigned short* vecIn) const + { + b3Vector3 vecOut; + vecOut.setValue( + (b3Scalar)(vecIn[0]) / (m_bvhQuantization.getX()), + (b3Scalar)(vecIn[1]) / (m_bvhQuantization.getY()), + (b3Scalar)(vecIn[2]) / (m_bvhQuantization.getZ())); + vecOut += m_bvhAabbMin; + return vecOut; + } + + ///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees. + void setTraversalMode(b3TraversalMode traversalMode) + { + m_traversalMode = traversalMode; + } + + + B3_FORCE_INLINE QuantizedNodeArray& getQuantizedNodeArray() + { + return m_quantizedContiguousNodes; + } + + + B3_FORCE_INLINE BvhSubtreeInfoArray& getSubtreeInfoArray() + { + return m_SubtreeHeaders; + } + +//////////////////////////////////////////////////////////////////// + + /////Calculate space needed to store BVH for serialization + unsigned calculateSerializeBufferSize() const; + + /// Data buffer MUST be 16 byte aligned + virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const; + + ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' + static b3QuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); + + static unsigned int getAlignmentSerializationPadding(); +////////////////////////////////////////////////////////////////////// + + + virtual int calculateSerializeBufferSizeNew() const; + + ///fills the dataBuffer and returns the struct name (and 0 on failure) + virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; + + virtual void deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData); + + virtual void deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData); + + +//////////////////////////////////////////////////////////////////// + + B3_FORCE_INLINE bool isQuantized() + { + return m_useQuantization; + } + +private: + // Special "copy" constructor that allows for in-place deserialization + // Prevents b3Vector3's default constructor from being called, but doesn't inialize much else + // ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need) + b3QuantizedBvh(b3QuantizedBvh &other, bool ownsMemory); + +} +; + + +struct b3OptimizedBvhNodeFloatData +{ + b3Vector3FloatData m_aabbMinOrg; + b3Vector3FloatData m_aabbMaxOrg; + int m_escapeIndex; + int m_subPart; + int m_triangleIndex; + char m_pad[4]; +}; + +struct b3OptimizedBvhNodeDoubleData +{ + b3Vector3DoubleData m_aabbMinOrg; + b3Vector3DoubleData m_aabbMaxOrg; + int m_escapeIndex; + int m_subPart; + int m_triangleIndex; + char m_pad[4]; +}; + + + +struct b3QuantizedBvhFloatData +{ + b3Vector3FloatData m_bvhAabbMin; + b3Vector3FloatData m_bvhAabbMax; + b3Vector3FloatData m_bvhQuantization; + int m_curNodeIndex; + int m_useQuantization; + int m_numContiguousLeafNodes; + int m_numQuantizedContiguousNodes; + b3OptimizedBvhNodeFloatData *m_contiguousNodesPtr; + b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr; + b3BvhSubtreeInfoData *m_subTreeInfoPtr; + int m_traversalMode; + int m_numSubtreeHeaders; + +}; + +struct b3QuantizedBvhDoubleData +{ + b3Vector3DoubleData m_bvhAabbMin; + b3Vector3DoubleData m_bvhAabbMax; + b3Vector3DoubleData m_bvhQuantization; + int m_curNodeIndex; + int m_useQuantization; + int m_numContiguousLeafNodes; + int m_numQuantizedContiguousNodes; + b3OptimizedBvhNodeDoubleData *m_contiguousNodesPtr; + b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr; + + int m_traversalMode; + int m_numSubtreeHeaders; + b3BvhSubtreeInfoData *m_subTreeInfoPtr; +}; + + +B3_FORCE_INLINE int b3QuantizedBvh::calculateSerializeBufferSizeNew() const +{ + return sizeof(b3QuantizedBvhData); +} + + + +#endif //B3_QUANTIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp new file mode 100644 index 0000000000..4d97f7f62b --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp @@ -0,0 +1,214 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "b3StridingMeshInterface.h" + + +b3StridingMeshInterface::~b3StridingMeshInterface() +{ + +} + + +void b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const +{ + (void)aabbMin; + (void)aabbMax; + int numtotalphysicsverts = 0; + int part,graphicssubparts = getNumSubParts(); + const unsigned char * vertexbase; + const unsigned char * indexbase; + int indexstride; + PHY_ScalarType type; + PHY_ScalarType gfxindextype; + int stride,numverts,numtriangles; + int gfxindex; + b3Vector3 triangle[3]; + + b3Vector3 meshScaling = getScaling(); + + ///if the number of parts is big, the performance might drop due to the innerloop switch on indextype + for (part=0;part<graphicssubparts ;part++) + { + getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,type,stride,&indexbase,indexstride,numtriangles,gfxindextype,part); + numtotalphysicsverts+=numtriangles*3; //upper bound + + ///unlike that developers want to pass in double-precision meshes in single-precision Bullet build + ///so disable this feature by default + ///see patch http://code.google.com/p/bullet/issues/detail?id=213 + + switch (type) + { + case PHY_FLOAT: + { + + float* graphicsbase; + + switch (gfxindextype) + { + case PHY_INTEGER: + { + for (gfxindex=0;gfxindex<numtriangles;gfxindex++) + { + unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride); + graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); + triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); + triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); + triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); + callback->internalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_SHORT: + { + for (gfxindex=0;gfxindex<numtriangles;gfxindex++) + { + unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride); + graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); + triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); + triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); + triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); + callback->internalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_UCHAR: + { + for (gfxindex=0;gfxindex<numtriangles;gfxindex++) + { + unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride); + graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); + triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); + triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); + triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); + callback->internalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + default: + b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); + } + break; + } + + case PHY_DOUBLE: + { + double* graphicsbase; + + switch (gfxindextype) + { + case PHY_INTEGER: + { + for (gfxindex=0;gfxindex<numtriangles;gfxindex++) + { + unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride); + graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); + triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); + triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); + triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); + callback->internalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_SHORT: + { + for (gfxindex=0;gfxindex<numtriangles;gfxindex++) + { + unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride); + graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); + triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); + triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); + triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); + callback->internalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + case PHY_UCHAR: + { + for (gfxindex=0;gfxindex<numtriangles;gfxindex++) + { + unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride); + graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); + triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); + triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); + graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); + triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); + callback->internalProcessTriangleIndex(triangle,part,gfxindex); + } + break; + } + default: + b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); + } + break; + } + default: + b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE)); + } + + unLockReadOnlyVertexBase(part); + } +} + +void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax) +{ + + struct AabbCalculationCallback : public b3InternalTriangleIndexCallback + { + b3Vector3 m_aabbMin; + b3Vector3 m_aabbMax; + + AabbCalculationCallback() + { + m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + } + + virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) + { + (void)partId; + (void)triangleIndex; + + m_aabbMin.setMin(triangle[0]); + m_aabbMax.setMax(triangle[0]); + m_aabbMin.setMin(triangle[1]); + m_aabbMax.setMax(triangle[1]); + m_aabbMin.setMin(triangle[2]); + m_aabbMax.setMax(triangle[2]); + } + }; + + //first calculate the total aabb for all triangles + AabbCalculationCallback aabbCallback; + aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); + aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + InternalProcessAllTriangles(&aabbCallback,aabbMin,aabbMax); + + aabbMin = aabbCallback.m_aabbMin; + aabbMax = aabbCallback.m_aabbMax; +} + + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h new file mode 100644 index 0000000000..9513f68f77 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h @@ -0,0 +1,167 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef B3_STRIDING_MESHINTERFACE_H +#define B3_STRIDING_MESHINTERFACE_H + +#include "Bullet3Common/b3Vector3.h" +#include "b3TriangleCallback.h" +//#include "b3ConcaveShape.h" + + +enum PHY_ScalarType { + PHY_FLOAT, PHY_DOUBLE, PHY_INTEGER, PHY_SHORT, + PHY_FIXEDPOINT88, PHY_UCHAR +}; + + +/// The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with b3BvhTriangleMeshShape and some other collision shapes. +/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips. +/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory. +B3_ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface +{ + protected: + + b3Vector3 m_scaling; + + public: + B3_DECLARE_ALIGNED_ALLOCATOR(); + + b3StridingMeshInterface() :m_scaling(b3MakeVector3(b3Scalar(1.),b3Scalar(1.),b3Scalar(1.))) + { + + } + + virtual ~b3StridingMeshInterface(); + + + + virtual void InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; + + ///brute force method to calculate aabb + void calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax); + + /// get read and write access to a subpart of a triangle mesh + /// this subpart has a continuous array of vertices and indices + /// in this way the mesh can be handled as chunks of memory with striding + /// very similar to OpenGL vertexarray support + /// make a call to unLockVertexBase when the read and write access is finished + virtual void getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0)=0; + + virtual void getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const=0; + + /// unLockVertexBase finishes the access to a subpart of the triangle mesh + /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished + virtual void unLockVertexBase(int subpart)=0; + + virtual void unLockReadOnlyVertexBase(int subpart) const=0; + + + /// getNumSubParts returns the number of seperate subparts + /// each subpart has a continuous array of vertices and indices + virtual int getNumSubParts() const=0; + + virtual void preallocateVertices(int numverts)=0; + virtual void preallocateIndices(int numindices)=0; + + virtual bool hasPremadeAabb() const { return false; } + virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const + { + (void) aabbMin; + (void) aabbMax; + } + virtual void getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const + { + (void) aabbMin; + (void) aabbMax; + } + + const b3Vector3& getScaling() const { + return m_scaling; + } + void setScaling(const b3Vector3& scaling) + { + m_scaling = scaling; + } + + virtual int calculateSerializeBufferSize() const; + + ///fills the dataBuffer and returns the struct name (and 0 on failure) + //virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; + + +}; + +struct b3IntIndexData +{ + int m_value; +}; + +struct b3ShortIntIndexData +{ + short m_value; + char m_pad[2]; +}; + +struct b3ShortIntIndexTripletData +{ + short m_values[3]; + char m_pad[2]; +}; + +struct b3CharIndexTripletData +{ + unsigned char m_values[3]; + char m_pad; +}; + + +///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 +struct b3MeshPartData +{ + b3Vector3FloatData *m_vertices3f; + b3Vector3DoubleData *m_vertices3d; + + b3IntIndexData *m_indices32; + b3ShortIntIndexTripletData *m_3indices16; + b3CharIndexTripletData *m_3indices8; + + b3ShortIntIndexData *m_indices16;//backwards compatibility + + int m_numTriangles;//length of m_indices = m_numTriangles + int m_numVertices; +}; + + +///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 +struct b3StridingMeshInterfaceData +{ + b3MeshPartData *m_meshPartsPtr; + b3Vector3FloatData m_scaling; + int m_numMeshParts; + char m_padding[4]; +}; + + + + +B3_FORCE_INLINE int b3StridingMeshInterface::calculateSerializeBufferSize() const +{ + return sizeof(b3StridingMeshInterfaceData); +} + + + +#endif //B3_STRIDING_MESHINTERFACE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h new file mode 100644 index 0000000000..d073ee57c3 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h @@ -0,0 +1,38 @@ + +#ifndef B3_SUPPORT_MAPPINGS_H +#define B3_SUPPORT_MAPPINGS_H + +#include "Bullet3Common/b3Transform.h" +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "b3VectorFloat4.h" + + +struct b3GjkPairDetector; + + + +inline b3Vector3 localGetSupportVertexWithMargin(const float4& supportVec,const struct b3ConvexPolyhedronData* hull, + const b3AlignedObjectArray<b3Vector3>& verticesA, b3Scalar margin) +{ + b3Vector3 supVec = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Scalar maxDot = b3Scalar(-B3_LARGE_FLOAT); + + // Here we take advantage of dot(a, b*c) = dot(a*b, c). Note: This is true mathematically, but not numerically. + if( 0 < hull->m_numVertices ) + { + const b3Vector3 scaled = supportVec; + int index = (int) scaled.maxDot( &verticesA[hull->m_vertexOffset], hull->m_numVertices, maxDot); + return verticesA[hull->m_vertexOffset+index]; + } + + return supVec; + +} + +inline b3Vector3 localGetSupportVertexWithoutMargin(const float4& supportVec,const struct b3ConvexPolyhedronData* hull, + const b3AlignedObjectArray<b3Vector3>& verticesA) +{ + return localGetSupportVertexWithMargin(supportVec,hull,verticesA,0.f); +} + +#endif //B3_SUPPORT_MAPPINGS_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp new file mode 100644 index 0000000000..9066451884 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp @@ -0,0 +1,28 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "b3TriangleCallback.h" + +b3TriangleCallback::~b3TriangleCallback() +{ + +} + + +b3InternalTriangleIndexCallback::~b3InternalTriangleIndexCallback() +{ + +} + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h new file mode 100644 index 0000000000..3059fa4f21 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h @@ -0,0 +1,42 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef B3_TRIANGLE_CALLBACK_H +#define B3_TRIANGLE_CALLBACK_H + +#include "Bullet3Common/b3Vector3.h" + + +///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles. +///This callback is called by processAllTriangles for all b3ConcaveShape derived class, such as b3BvhTriangleMeshShape, b3StaticPlaneShape and b3HeightfieldTerrainShape. +class b3TriangleCallback +{ +public: + + virtual ~b3TriangleCallback(); + virtual void processTriangle(b3Vector3* triangle, int partId, int triangleIndex) = 0; +}; + +class b3InternalTriangleIndexCallback +{ +public: + + virtual ~b3InternalTriangleIndexCallback(); + virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) = 0; +}; + + + +#endif //B3_TRIANGLE_CALLBACK_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp new file mode 100644 index 0000000000..a0f59babbe --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp @@ -0,0 +1,95 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#include "b3TriangleIndexVertexArray.h" + +b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride) +: m_hasAabb(0) +{ + b3IndexedMesh mesh; + + mesh.m_numTriangles = numTriangles; + mesh.m_triangleIndexBase = (const unsigned char *)triangleIndexBase; + mesh.m_triangleIndexStride = triangleIndexStride; + mesh.m_numVertices = numVertices; + mesh.m_vertexBase = (const unsigned char *)vertexBase; + mesh.m_vertexStride = vertexStride; + + addIndexedMesh(mesh); + +} + +b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray() +{ + +} + +void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) +{ + b3Assert(subpart< getNumSubParts() ); + + b3IndexedMesh& mesh = m_indexedMeshes[subpart]; + + numverts = mesh.m_numVertices; + (*vertexbase) = (unsigned char *) mesh.m_vertexBase; + + type = mesh.m_vertexType; + + vertexStride = mesh.m_vertexStride; + + numfaces = mesh.m_numTriangles; + + (*indexbase) = (unsigned char *)mesh.m_triangleIndexBase; + indexstride = mesh.m_triangleIndexStride; + indicestype = mesh.m_indexType; +} + +void b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const +{ + const b3IndexedMesh& mesh = m_indexedMeshes[subpart]; + + numverts = mesh.m_numVertices; + (*vertexbase) = (const unsigned char *)mesh.m_vertexBase; + + type = mesh.m_vertexType; + + vertexStride = mesh.m_vertexStride; + + numfaces = mesh.m_numTriangles; + (*indexbase) = (const unsigned char *)mesh.m_triangleIndexBase; + indexstride = mesh.m_triangleIndexStride; + indicestype = mesh.m_indexType; +} + +bool b3TriangleIndexVertexArray::hasPremadeAabb() const +{ + return (m_hasAabb == 1); +} + + +void b3TriangleIndexVertexArray::setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const +{ + m_aabbMin = aabbMin; + m_aabbMax = aabbMax; + m_hasAabb = 1; // this is intentionally an int see notes in header +} + +void b3TriangleIndexVertexArray::getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const +{ + *aabbMin = m_aabbMin; + *aabbMax = m_aabbMax; +} + + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h new file mode 100644 index 0000000000..d26b2893bc --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h @@ -0,0 +1,133 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +#ifndef B3_TRIANGLE_INDEX_VERTEX_ARRAY_H +#define B3_TRIANGLE_INDEX_VERTEX_ARRAY_H + +#include "b3StridingMeshInterface.h" +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "Bullet3Common/b3Scalar.h" + + +///The b3IndexedMesh indexes a single vertex and index array. Multiple b3IndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh. +///Instead of the number of indices, we pass the number of triangles. +B3_ATTRIBUTE_ALIGNED16( struct) b3IndexedMesh +{ + B3_DECLARE_ALIGNED_ALLOCATOR(); + + int m_numTriangles; + const unsigned char * m_triangleIndexBase; + // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) + int m_triangleIndexStride; + int m_numVertices; + const unsigned char * m_vertexBase; + // Size of a vertex, in bytes + int m_vertexStride; + + // The index type is set when adding an indexed mesh to the + // b3TriangleIndexVertexArray, do not set it manually + PHY_ScalarType m_indexType; + + // The vertex type has a default type similar to Bullet's precision mode (float or double) + // but can be set manually if you for example run Bullet with double precision but have + // mesh data in single precision.. + PHY_ScalarType m_vertexType; + + + b3IndexedMesh() + :m_indexType(PHY_INTEGER), +#ifdef B3_USE_DOUBLE_PRECISION + m_vertexType(PHY_DOUBLE) +#else // B3_USE_DOUBLE_PRECISION + m_vertexType(PHY_FLOAT) +#endif // B3_USE_DOUBLE_PRECISION + { + } +} +; + + +typedef b3AlignedObjectArray<b3IndexedMesh> IndexedMeshArray; + +///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays. +///Additional meshes can be added using addIndexedMesh +///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays. +///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray. +B3_ATTRIBUTE_ALIGNED16( class) b3TriangleIndexVertexArray : public b3StridingMeshInterface +{ +protected: + IndexedMeshArray m_indexedMeshes; + int m_pad[2]; + mutable int m_hasAabb; // using int instead of bool to maintain alignment + mutable b3Vector3 m_aabbMin; + mutable b3Vector3 m_aabbMax; + +public: + + B3_DECLARE_ALIGNED_ALLOCATOR(); + + b3TriangleIndexVertexArray() : m_hasAabb(0) + { + } + + virtual ~b3TriangleIndexVertexArray(); + + //just to be backwards compatible + b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride); + + void addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER) + { + m_indexedMeshes.push_back(mesh); + m_indexedMeshes[m_indexedMeshes.size()-1].m_indexType = indexType; + } + + + virtual void getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0); + + virtual void getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const; + + /// unLockVertexBase finishes the access to a subpart of the triangle mesh + /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished + virtual void unLockVertexBase(int subpart) {(void)subpart;} + + virtual void unLockReadOnlyVertexBase(int subpart) const {(void)subpart;} + + /// getNumSubParts returns the number of seperate subparts + /// each subpart has a continuous array of vertices and indices + virtual int getNumSubParts() const { + return (int)m_indexedMeshes.size(); + } + + IndexedMeshArray& getIndexedMeshArray() + { + return m_indexedMeshes; + } + + const IndexedMeshArray& getIndexedMeshArray() const + { + return m_indexedMeshes; + } + + virtual void preallocateVertices(int numverts){(void) numverts;} + virtual void preallocateIndices(int numindices){(void) numindices;} + + virtual bool hasPremadeAabb() const; + virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const; + virtual void getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const; + +} +; + +#endif //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h new file mode 100644 index 0000000000..f6f65f7719 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h @@ -0,0 +1,11 @@ +#ifndef B3_VECTOR_FLOAT4_H +#define B3_VECTOR_FLOAT4_H + +#include "Bullet3Common/b3Transform.h" + +//#define cross3(a,b) (a.cross(b)) +#define float4 b3Vector3 +//#define make_float4(x,y,z,w) b3Vector4(x,y,z,w) + + +#endif //B3_VECTOR_FLOAT4_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp new file mode 100644 index 0000000000..cf3d5ef49d --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp @@ -0,0 +1,609 @@ + +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. + + Elsevier CDROM license agreements grants nonexclusive license to use the software + for any purpose, commercial or non-commercial as long as the following credit is included + identifying the original source of the software: + + Parts of the source are "from the book Real-Time Collision Detection by + Christer Ericson, published by Morgan Kaufmann Publishers, + (c) 2005 Elsevier Inc." + +*/ + + +#include "b3VoronoiSimplexSolver.h" + +#define VERTA 0 +#define VERTB 1 +#define VERTC 2 +#define VERTD 3 + +#define B3_CATCH_DEGENERATE_TETRAHEDRON 1 +void b3VoronoiSimplexSolver::removeVertex(int index) +{ + + b3Assert(m_numVertices>0); + m_numVertices--; + m_simplexVectorW[index] = m_simplexVectorW[m_numVertices]; + m_simplexPointsP[index] = m_simplexPointsP[m_numVertices]; + m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices]; +} + +void b3VoronoiSimplexSolver::reduceVertices (const b3UsageBitfield& usedVerts) +{ + if ((numVertices() >= 4) && (!usedVerts.usedVertexD)) + removeVertex(3); + + if ((numVertices() >= 3) && (!usedVerts.usedVertexC)) + removeVertex(2); + + if ((numVertices() >= 2) && (!usedVerts.usedVertexB)) + removeVertex(1); + + if ((numVertices() >= 1) && (!usedVerts.usedVertexA)) + removeVertex(0); + +} + + + + + +//clear the simplex, remove all the vertices +void b3VoronoiSimplexSolver::reset() +{ + m_cachedValidClosest = false; + m_numVertices = 0; + m_needsUpdate = true; + m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); + m_cachedBC.reset(); +} + + + + //add a vertex +void b3VoronoiSimplexSolver::addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q) +{ + m_lastW = w; + m_needsUpdate = true; + + m_simplexVectorW[m_numVertices] = w; + m_simplexPointsP[m_numVertices] = p; + m_simplexPointsQ[m_numVertices] = q; + + m_numVertices++; +} + +bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() +{ + + if (m_needsUpdate) + { + m_cachedBC.reset(); + + m_needsUpdate = false; + + switch (numVertices()) + { + case 0: + m_cachedValidClosest = false; + break; + case 1: + { + m_cachedP1 = m_simplexPointsP[0]; + m_cachedP2 = m_simplexPointsQ[0]; + m_cachedV = m_cachedP1-m_cachedP2; //== m_simplexVectorW[0] + m_cachedBC.reset(); + m_cachedBC.setBarycentricCoordinates(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + m_cachedValidClosest = m_cachedBC.isValid(); + break; + }; + case 2: + { + //closest point origin from line segment + const b3Vector3& from = m_simplexVectorW[0]; + const b3Vector3& to = m_simplexVectorW[1]; + b3Vector3 nearest; + + b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + b3Vector3 diff = p - from; + b3Vector3 v = to - from; + b3Scalar t = v.dot(diff); + + if (t > 0) { + b3Scalar dotVV = v.dot(v); + if (t < dotVV) { + t /= dotVV; + diff -= t*v; + m_cachedBC.m_usedVertices.usedVertexA = true; + m_cachedBC.m_usedVertices.usedVertexB = true; + } else { + t = 1; + diff -= v; + //reduce to 1 point + m_cachedBC.m_usedVertices.usedVertexB = true; + } + } else + { + t = 0; + //reduce to 1 point + m_cachedBC.m_usedVertices.usedVertexA = true; + } + m_cachedBC.setBarycentricCoordinates(1-t,t); + nearest = from + t*v; + + m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]); + m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]); + m_cachedV = m_cachedP1 - m_cachedP2; + + reduceVertices(m_cachedBC.m_usedVertices); + + m_cachedValidClosest = m_cachedBC.isValid(); + break; + } + case 3: + { + //closest point origin from triangle + b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + + const b3Vector3& a = m_simplexVectorW[0]; + const b3Vector3& b = m_simplexVectorW[1]; + const b3Vector3& c = m_simplexVectorW[2]; + + closestPtPointTriangle(p,a,b,c,m_cachedBC); + m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + + m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + + m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2]; + + m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + + m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + + m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2]; + + m_cachedV = m_cachedP1-m_cachedP2; + + reduceVertices (m_cachedBC.m_usedVertices); + m_cachedValidClosest = m_cachedBC.isValid(); + + break; + } + case 4: + { + + + b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + + const b3Vector3& a = m_simplexVectorW[0]; + const b3Vector3& b = m_simplexVectorW[1]; + const b3Vector3& c = m_simplexVectorW[2]; + const b3Vector3& d = m_simplexVectorW[3]; + + bool hasSeperation = closestPtPointTetrahedron(p,a,b,c,d,m_cachedBC); + + if (hasSeperation) + { + + m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + + m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + + m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] + + m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3]; + + m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + + m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + + m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] + + m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3]; + + m_cachedV = m_cachedP1-m_cachedP2; + reduceVertices (m_cachedBC.m_usedVertices); + } else + { +// printf("sub distance got penetration\n"); + + if (m_cachedBC.m_degenerate) + { + m_cachedValidClosest = false; + } else + { + m_cachedValidClosest = true; + //degenerate case == false, penetration = true + zero + m_cachedV.setValue(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); + } + break; + } + + m_cachedValidClosest = m_cachedBC.isValid(); + + //closest point origin from tetrahedron + break; + } + default: + { + m_cachedValidClosest = false; + } + }; + } + + return m_cachedValidClosest; + +} + +//return/calculate the closest vertex +bool b3VoronoiSimplexSolver::closest(b3Vector3& v) +{ + bool succes = updateClosestVectorAndPoints(); + v = m_cachedV; + return succes; +} + + + +b3Scalar b3VoronoiSimplexSolver::maxVertex() +{ + int i, numverts = numVertices(); + b3Scalar maxV = b3Scalar(0.); + for (i=0;i<numverts;i++) + { + b3Scalar curLen2 = m_simplexVectorW[i].length2(); + if (maxV < curLen2) + maxV = curLen2; + } + return maxV; +} + + + + //return the current simplex +int b3VoronoiSimplexSolver::getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vector3 *yBuf) const +{ + int i; + for (i=0;i<numVertices();i++) + { + yBuf[i] = m_simplexVectorW[i]; + pBuf[i] = m_simplexPointsP[i]; + qBuf[i] = m_simplexPointsQ[i]; + } + return numVertices(); +} + + + + +bool b3VoronoiSimplexSolver::inSimplex(const b3Vector3& w) +{ + bool found = false; + int i, numverts = numVertices(); + //b3Scalar maxV = b3Scalar(0.); + + //w is in the current (reduced) simplex + for (i=0;i<numverts;i++) + { +#ifdef BT_USE_EQUAL_VERTEX_THRESHOLD + if ( m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold) +#else + if (m_simplexVectorW[i] == w) +#endif + found = true; + } + + //check in case lastW is already removed + if (w == m_lastW) + return true; + + return found; +} + +void b3VoronoiSimplexSolver::backup_closest(b3Vector3& v) +{ + v = m_cachedV; +} + + +bool b3VoronoiSimplexSolver::emptySimplex() const +{ + return (numVertices() == 0); + +} + +void b3VoronoiSimplexSolver::compute_points(b3Vector3& p1, b3Vector3& p2) +{ + updateClosestVectorAndPoints(); + p1 = m_cachedP1; + p2 = m_cachedP2; + +} + + + + +bool b3VoronoiSimplexSolver::closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c,b3SubSimplexClosestResult& result) +{ + result.m_usedVertices.reset(); + + // Check if P in vertex region outside A + b3Vector3 ab = b - a; + b3Vector3 ac = c - a; + b3Vector3 ap = p - a; + b3Scalar d1 = ab.dot(ap); + b3Scalar d2 = ac.dot(ap); + if (d1 <= b3Scalar(0.0) && d2 <= b3Scalar(0.0)) + { + result.m_closestPointOnSimplex = a; + result.m_usedVertices.usedVertexA = true; + result.setBarycentricCoordinates(1,0,0); + return true;// a; // barycentric coordinates (1,0,0) + } + + // Check if P in vertex region outside B + b3Vector3 bp = p - b; + b3Scalar d3 = ab.dot(bp); + b3Scalar d4 = ac.dot(bp); + if (d3 >= b3Scalar(0.0) && d4 <= d3) + { + result.m_closestPointOnSimplex = b; + result.m_usedVertices.usedVertexB = true; + result.setBarycentricCoordinates(0,1,0); + + return true; // b; // barycentric coordinates (0,1,0) + } + // Check if P in edge region of AB, if so return projection of P onto AB + b3Scalar vc = d1*d4 - d3*d2; + if (vc <= b3Scalar(0.0) && d1 >= b3Scalar(0.0) && d3 <= b3Scalar(0.0)) { + b3Scalar v = d1 / (d1 - d3); + result.m_closestPointOnSimplex = a + v * ab; + result.m_usedVertices.usedVertexA = true; + result.m_usedVertices.usedVertexB = true; + result.setBarycentricCoordinates(1-v,v,0); + return true; + //return a + v * ab; // barycentric coordinates (1-v,v,0) + } + + // Check if P in vertex region outside C + b3Vector3 cp = p - c; + b3Scalar d5 = ab.dot(cp); + b3Scalar d6 = ac.dot(cp); + if (d6 >= b3Scalar(0.0) && d5 <= d6) + { + result.m_closestPointOnSimplex = c; + result.m_usedVertices.usedVertexC = true; + result.setBarycentricCoordinates(0,0,1); + return true;//c; // barycentric coordinates (0,0,1) + } + + // Check if P in edge region of AC, if so return projection of P onto AC + b3Scalar vb = d5*d2 - d1*d6; + if (vb <= b3Scalar(0.0) && d2 >= b3Scalar(0.0) && d6 <= b3Scalar(0.0)) { + b3Scalar w = d2 / (d2 - d6); + result.m_closestPointOnSimplex = a + w * ac; + result.m_usedVertices.usedVertexA = true; + result.m_usedVertices.usedVertexC = true; + result.setBarycentricCoordinates(1-w,0,w); + return true; + //return a + w * ac; // barycentric coordinates (1-w,0,w) + } + + // Check if P in edge region of BC, if so return projection of P onto BC + b3Scalar va = d3*d6 - d5*d4; + if (va <= b3Scalar(0.0) && (d4 - d3) >= b3Scalar(0.0) && (d5 - d6) >= b3Scalar(0.0)) { + b3Scalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6)); + + result.m_closestPointOnSimplex = b + w * (c - b); + result.m_usedVertices.usedVertexB = true; + result.m_usedVertices.usedVertexC = true; + result.setBarycentricCoordinates(0,1-w,w); + return true; + // return b + w * (c - b); // barycentric coordinates (0,1-w,w) + } + + // P inside face region. Compute Q through its barycentric coordinates (u,v,w) + b3Scalar denom = b3Scalar(1.0) / (va + vb + vc); + b3Scalar v = vb * denom; + b3Scalar w = vc * denom; + + result.m_closestPointOnSimplex = a + ab * v + ac * w; + result.m_usedVertices.usedVertexA = true; + result.m_usedVertices.usedVertexB = true; + result.m_usedVertices.usedVertexC = true; + result.setBarycentricCoordinates(1-v-w,v,w); + + return true; +// return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = b3Scalar(1.0) - v - w + +} + + + + + +/// Test if point p and d lie on opposite sides of plane through abc +int b3VoronoiSimplexSolver::pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d) +{ + b3Vector3 normal = (b-a).cross(c-a); + + b3Scalar signp = (p - a).dot(normal); // [AP AB AC] + b3Scalar signd = (d - a).dot( normal); // [AD AB AC] + +#ifdef B3_CATCH_DEGENERATE_TETRAHEDRON +#ifdef BT_USE_DOUBLE_PRECISION +if (signd * signd < (b3Scalar(1e-8) * b3Scalar(1e-8))) + { + return -1; + } +#else + if (signd * signd < (b3Scalar(1e-4) * b3Scalar(1e-4))) + { +// printf("affine dependent/degenerate\n");// + return -1; + } +#endif + +#endif + // Points on opposite sides if expression signs are opposite + return signp * signd < b3Scalar(0.); +} + + +bool b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult) +{ + b3SubSimplexClosestResult tempResult; + + // Start out assuming point inside all halfspaces, so closest to itself + finalResult.m_closestPointOnSimplex = p; + finalResult.m_usedVertices.reset(); + finalResult.m_usedVertices.usedVertexA = true; + finalResult.m_usedVertices.usedVertexB = true; + finalResult.m_usedVertices.usedVertexC = true; + finalResult.m_usedVertices.usedVertexD = true; + + int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d); + int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b); + int pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c); + int pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a); + + if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0) + { + finalResult.m_degenerate = true; + return false; + } + + if (!pointOutsideABC && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC) + { + return false; + } + + + b3Scalar bestSqDist = FLT_MAX; + // If point outside face abc then compute closest point on abc + if (pointOutsideABC) + { + closestPtPointTriangle(p, a, b, c,tempResult); + b3Vector3 q = tempResult.m_closestPointOnSimplex; + + b3Scalar sqDist = (q - p).dot( q - p); + // Update best closest point if (squared) distance is less than current best + if (sqDist < bestSqDist) { + bestSqDist = sqDist; + finalResult.m_closestPointOnSimplex = q; + //convert result bitmask! + finalResult.m_usedVertices.reset(); + finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; + finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB; + finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; + finalResult.setBarycentricCoordinates( + tempResult.m_barycentricCoords[VERTA], + tempResult.m_barycentricCoords[VERTB], + tempResult.m_barycentricCoords[VERTC], + 0 + ); + + } + } + + + // Repeat test for face acd + if (pointOutsideACD) + { + closestPtPointTriangle(p, a, c, d,tempResult); + b3Vector3 q = tempResult.m_closestPointOnSimplex; + //convert result bitmask! + + b3Scalar sqDist = (q - p).dot( q - p); + if (sqDist < bestSqDist) + { + bestSqDist = sqDist; + finalResult.m_closestPointOnSimplex = q; + finalResult.m_usedVertices.reset(); + finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; + + finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB; + finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC; + finalResult.setBarycentricCoordinates( + tempResult.m_barycentricCoords[VERTA], + 0, + tempResult.m_barycentricCoords[VERTB], + tempResult.m_barycentricCoords[VERTC] + ); + + } + } + // Repeat test for face adb + + + if (pointOutsideADB) + { + closestPtPointTriangle(p, a, d, b,tempResult); + b3Vector3 q = tempResult.m_closestPointOnSimplex; + //convert result bitmask! + + b3Scalar sqDist = (q - p).dot( q - p); + if (sqDist < bestSqDist) + { + bestSqDist = sqDist; + finalResult.m_closestPointOnSimplex = q; + finalResult.m_usedVertices.reset(); + finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; + finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC; + + finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; + finalResult.setBarycentricCoordinates( + tempResult.m_barycentricCoords[VERTA], + tempResult.m_barycentricCoords[VERTC], + 0, + tempResult.m_barycentricCoords[VERTB] + ); + + } + } + // Repeat test for face bdc + + + if (pointOutsideBDC) + { + closestPtPointTriangle(p, b, d, c,tempResult); + b3Vector3 q = tempResult.m_closestPointOnSimplex; + //convert result bitmask! + b3Scalar sqDist = (q - p).dot( q - p); + if (sqDist < bestSqDist) + { + bestSqDist = sqDist; + finalResult.m_closestPointOnSimplex = q; + finalResult.m_usedVertices.reset(); + // + finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexA; + finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; + finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; + + finalResult.setBarycentricCoordinates( + 0, + tempResult.m_barycentricCoords[VERTA], + tempResult.m_barycentricCoords[VERTC], + tempResult.m_barycentricCoords[VERTB] + ); + + } + } + + //help! we ended up full ! + + if (finalResult.m_usedVertices.usedVertexA && + finalResult.m_usedVertices.usedVertexB && + finalResult.m_usedVertices.usedVertexC && + finalResult.m_usedVertices.usedVertexD) + { + return true; + } + + return true; +} + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h new file mode 100644 index 0000000000..a6e27667d8 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h @@ -0,0 +1,177 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + + + +#ifndef B3_VORONOI_SIMPLEX_SOLVER_H +#define B3_VORONOI_SIMPLEX_SOLVER_H + +#include "Bullet3Common/b3Vector3.h" + + +#define VORONOI_SIMPLEX_MAX_VERTS 5 + +///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure +//#define BT_USE_EQUAL_VERTEX_THRESHOLD +#define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f + + +struct b3UsageBitfield{ + b3UsageBitfield() + { + reset(); + } + + void reset() + { + usedVertexA = false; + usedVertexB = false; + usedVertexC = false; + usedVertexD = false; + } + unsigned short usedVertexA : 1; + unsigned short usedVertexB : 1; + unsigned short usedVertexC : 1; + unsigned short usedVertexD : 1; + unsigned short unused1 : 1; + unsigned short unused2 : 1; + unsigned short unused3 : 1; + unsigned short unused4 : 1; +}; + + +struct b3SubSimplexClosestResult +{ + b3Vector3 m_closestPointOnSimplex; + //MASK for m_usedVertices + //stores the simplex vertex-usage, using the MASK, + // if m_usedVertices & MASK then the related vertex is used + b3UsageBitfield m_usedVertices; + b3Scalar m_barycentricCoords[4]; + bool m_degenerate; + + void reset() + { + m_degenerate = false; + setBarycentricCoordinates(); + m_usedVertices.reset(); + } + bool isValid() + { + bool valid = (m_barycentricCoords[0] >= b3Scalar(0.)) && + (m_barycentricCoords[1] >= b3Scalar(0.)) && + (m_barycentricCoords[2] >= b3Scalar(0.)) && + (m_barycentricCoords[3] >= b3Scalar(0.)); + + + return valid; + } + void setBarycentricCoordinates(b3Scalar a=b3Scalar(0.),b3Scalar b=b3Scalar(0.),b3Scalar c=b3Scalar(0.),b3Scalar d=b3Scalar(0.)) + { + m_barycentricCoords[0] = a; + m_barycentricCoords[1] = b; + m_barycentricCoords[2] = c; + m_barycentricCoords[3] = d; + } + +}; + +/// b3VoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin. +/// Can be used with GJK, as an alternative to Johnson distance algorithm. + +B3_ATTRIBUTE_ALIGNED16(class) b3VoronoiSimplexSolver +{ +public: + + B3_DECLARE_ALIGNED_ALLOCATOR(); + + int m_numVertices; + + b3Vector3 m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS]; + b3Vector3 m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS]; + b3Vector3 m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS]; + + + + b3Vector3 m_cachedP1; + b3Vector3 m_cachedP2; + b3Vector3 m_cachedV; + b3Vector3 m_lastW; + + b3Scalar m_equalVertexThreshold; + bool m_cachedValidClosest; + + + b3SubSimplexClosestResult m_cachedBC; + + bool m_needsUpdate; + + void removeVertex(int index); + void reduceVertices (const b3UsageBitfield& usedVerts); + bool updateClosestVectorAndPoints(); + + bool closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult); + int pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d); + bool closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c,b3SubSimplexClosestResult& result); + +public: + + b3VoronoiSimplexSolver() + : m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD) + { + } + void reset(); + + void addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q); + + void setEqualVertexThreshold(b3Scalar threshold) + { + m_equalVertexThreshold = threshold; + } + + b3Scalar getEqualVertexThreshold() const + { + return m_equalVertexThreshold; + } + + bool closest(b3Vector3& v); + + b3Scalar maxVertex(); + + bool fullSimplex() const + { + return (m_numVertices == 4); + } + + int getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vector3 *yBuf) const; + + bool inSimplex(const b3Vector3& w); + + void backup_closest(b3Vector3& v) ; + + bool emptySimplex() const ; + + void compute_points(b3Vector3& p1, b3Vector3& p2) ; + + int numVertices() const + { + return m_numVertices; + } + + +}; + +#endif //B3_VORONOI_SIMPLEX_SOLVER_H + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl new file mode 100644 index 0000000000..faa413441c --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl @@ -0,0 +1,283 @@ +//keep this enum in sync with the CPU version (in btCollidable.h) +//written by Erwin Coumans + +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_CONCAVE_TRIMESH 5 +#define TRIANGLE_NUM_CONVEX_FACES 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 +#define SHAPE_SPHERE 7 + +typedef unsigned int u32; + +#define MAX_NUM_PARTS_IN_BITS 10 + +///btQuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes + int m_escapeIndexOrTriangleIndex; +} btQuantizedBvhNode; + +typedef struct +{ + float4 m_aabbMin; + float4 m_aabbMax; + float4 m_quantization; + int m_numNodes; + int m_numSubTrees; + int m_nodeOffset; + int m_subTreeOffset; + +} b3BvhInfo; + +int getTriangleIndex(const btQuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int isLeaf(const btQuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int getEscapeIndex(const btQuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; +} btBvhSubtreeInfo; + +///keep this in sync with btCollidable.h +typedef struct +{ + int m_numChildShapes; + int blaat2; + int m_shapeType; + int m_shapeIndex; + +} btCollidableGpu; + +typedef struct +{ + float4 m_childPosition; + float4 m_childOrientation; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +} btGpuChildShape; + + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + +typedef struct +{ + union + { + float4 m_min; + float m_minElems[4]; + int m_minIndices[4]; + }; + union + { + float4 m_max; + float m_maxElems[4]; + int m_maxIndices[4]; + }; +} btAabbCL; + + +int testQuantizedAabbAgainstQuantizedAabb( + const unsigned short int* aabbMin1, + const unsigned short int* aabbMax1, + const unsigned short int* aabbMin2, + const unsigned short int* aabbMax2) +{ + //int overlap = 1; + if (aabbMin1[0] > aabbMax2[0]) + return 0; + if (aabbMax1[0] < aabbMin2[0]) + return 0; + if (aabbMin1[1] > aabbMax2[1]) + return 0; + if (aabbMax1[1] < aabbMin2[1]) + return 0; + if (aabbMin1[2] > aabbMax2[2]) + return 0; + if (aabbMax1[2] < aabbMin2[2]) + return 0; + return 1; + //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; + //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; + //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; + //return overlap; +} + + +void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization) +{ + float4 clampedPoint = max(point2,bvhAabbMin); + clampedPoint = min (clampedPoint, bvhAabbMax); + + float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; + if (isMax) + { + out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1)); + out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1)); + out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1)); + } else + { + out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe)); + out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe)); + out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe)); + } + +} + + +// work-in-progress +__kernel void bvhTraversalKernel( __global const int4* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global btAabbCL* aabbs, + __global int4* concavePairsOut, + __global volatile int* numConcavePairsOut, + __global const btBvhSubtreeInfo* subtreeHeadersRoot, + __global const btQuantizedBvhNode* quantizedNodesRoot, + __global const b3BvhInfo* bvhInfos, + int numPairs, + int maxNumConcavePairsCapacity) +{ + int id = get_global_id(0); + if (id>=numPairs) + return; + + int bodyIndexA = pairs[id].x; + int bodyIndexB = pairs[id].y; + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) + return; + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + + if (shapeTypeB!=SHAPE_CONVEX_HULL && + shapeTypeB!=SHAPE_SPHERE && + shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS + ) + return; + + b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes]; + + float4 bvhAabbMin = bvhInfo.m_aabbMin; + float4 bvhAabbMax = bvhInfo.m_aabbMax; + float4 bvhQuantization = bvhInfo.m_quantization; + int numSubtreeHeaders = bvhInfo.m_numSubTrees; + __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset]; + __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset]; + + + unsigned short int quantizedQueryAabbMin[3]; + unsigned short int quantizedQueryAabbMax[3]; + quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization); + quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization); + + for (int i=0;i<numSubtreeHeaders;i++) + { + btBvhSubtreeInfo subtree = subtreeHeaders[i]; + + int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); + if (overlap != 0) + { + int startNodeIndex = subtree.m_rootNodeIndex; + int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize; + int curIndex = startNodeIndex; + int escapeIndex; + int isLeafNode; + int aabbOverlap; + while (curIndex < endNodeIndex) + { + btQuantizedBvhNode rootNode = quantizedNodes[curIndex]; + aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax); + isLeafNode = isLeaf(&rootNode); + if (aabbOverlap) + { + if (isLeafNode) + { + int triangleIndex = getTriangleIndex(&rootNode); + if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + int pairIdx = atomic_add(numConcavePairsOut,numChildrenB); + for (int b=0;b<numChildrenB;b++) + { + if ((pairIdx+b)<maxNumConcavePairsCapacity) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB); + concavePairsOut[pairIdx+b] = newPair; + } + } + } else + { + int pairIdx = atomic_inc(numConcavePairsOut); + if (pairIdx<maxNumConcavePairsCapacity) + { + int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0); + concavePairsOut[pairIdx] = newPair; + } + } + } + curIndex++; + } else + { + if (isLeafNode) + { + curIndex++; + } else + { + escapeIndex = getEscapeIndex(&rootNode); + curIndex += escapeIndex; + } + } + } + } + } + +}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h new file mode 100644 index 0000000000..4b3b49eae8 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h @@ -0,0 +1,258 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* bvhTraversalKernelCL= \ +"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +"//written by Erwin Coumans\n" +"#define SHAPE_CONVEX_HULL 3\n" +"#define SHAPE_CONCAVE_TRIMESH 5\n" +"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +"#define SHAPE_SPHERE 7\n" +"typedef unsigned int u32;\n" +"#define MAX_NUM_PARTS_IN_BITS 10\n" +"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes\n" +" int m_escapeIndexOrTriangleIndex;\n" +"} btQuantizedBvhNode;\n" +"typedef struct\n" +"{\n" +" float4 m_aabbMin;\n" +" float4 m_aabbMax;\n" +" float4 m_quantization;\n" +" int m_numNodes;\n" +" int m_numSubTrees;\n" +" int m_nodeOffset;\n" +" int m_subTreeOffset;\n" +"} b3BvhInfo;\n" +"int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int isLeaf(const btQuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +" \n" +"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes, points to the root of the subtree\n" +" int m_rootNodeIndex;\n" +" //4 bytes\n" +" int m_subtreeSize;\n" +" int m_padding[3];\n" +"} btBvhSubtreeInfo;\n" +"///keep this in sync with btCollidable.h\n" +"typedef struct\n" +"{\n" +" int m_numChildShapes;\n" +" int blaat2;\n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +" \n" +"} btCollidableGpu;\n" +"typedef struct\n" +"{\n" +" float4 m_childPosition;\n" +" float4 m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"} btGpuChildShape;\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +" u32 m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} BodyData;\n" +"typedef struct \n" +"{\n" +" union\n" +" {\n" +" float4 m_min;\n" +" float m_minElems[4];\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float4 m_max;\n" +" float m_maxElems[4];\n" +" int m_maxIndices[4];\n" +" };\n" +"} btAabbCL;\n" +"int testQuantizedAabbAgainstQuantizedAabb(\n" +" const unsigned short int* aabbMin1,\n" +" const unsigned short int* aabbMax1,\n" +" const unsigned short int* aabbMin2,\n" +" const unsigned short int* aabbMax2)\n" +"{\n" +" //int overlap = 1;\n" +" if (aabbMin1[0] > aabbMax2[0])\n" +" return 0;\n" +" if (aabbMax1[0] < aabbMin2[0])\n" +" return 0;\n" +" if (aabbMin1[1] > aabbMax2[1])\n" +" return 0;\n" +" if (aabbMax1[1] < aabbMin2[1])\n" +" return 0;\n" +" if (aabbMin1[2] > aabbMax2[2])\n" +" return 0;\n" +" if (aabbMax1[2] < aabbMin2[2])\n" +" return 0;\n" +" return 1;\n" +" //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" +" //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" +" //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" +" //return overlap;\n" +"}\n" +"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" +"{\n" +" float4 clampedPoint = max(point2,bvhAabbMin);\n" +" clampedPoint = min (clampedPoint, bvhAabbMax);\n" +" float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" +" if (isMax)\n" +" {\n" +" out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" +" out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" +" out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" +" } else\n" +" {\n" +" out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" +" out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" +" out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" +" }\n" +"}\n" +"// work-in-progress\n" +"__kernel void bvhTraversalKernel( __global const int4* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global btAabbCL* aabbs,\n" +" __global int4* concavePairsOut,\n" +" __global volatile int* numConcavePairsOut,\n" +" __global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" +" __global const btQuantizedBvhNode* quantizedNodesRoot,\n" +" __global const b3BvhInfo* bvhInfos,\n" +" int numPairs,\n" +" int maxNumConcavePairsCapacity)\n" +"{\n" +" int id = get_global_id(0);\n" +" if (id>=numPairs)\n" +" return;\n" +" \n" +" int bodyIndexA = pairs[id].x;\n" +" int bodyIndexB = pairs[id].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" //once the broadphase avoids static-static pairs, we can remove this test\n" +" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +" {\n" +" return;\n" +" }\n" +" \n" +" if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" +" return;\n" +" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +" \n" +" if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" +" shapeTypeB!=SHAPE_SPHERE &&\n" +" shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" +" )\n" +" return;\n" +" b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" +" float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" +" float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" +" float4 bvhQuantization = bvhInfo.m_quantization;\n" +" int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" +" __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" +" __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" +" \n" +" unsigned short int quantizedQueryAabbMin[3];\n" +" unsigned short int quantizedQueryAabbMax[3];\n" +" quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" +" quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" +" \n" +" for (int i=0;i<numSubtreeHeaders;i++)\n" +" {\n" +" btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" +" \n" +" int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" +" if (overlap != 0)\n" +" {\n" +" int startNodeIndex = subtree.m_rootNodeIndex;\n" +" int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" +" int curIndex = startNodeIndex;\n" +" int escapeIndex;\n" +" int isLeafNode;\n" +" int aabbOverlap;\n" +" while (curIndex < endNodeIndex)\n" +" {\n" +" btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" +" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" +" isLeafNode = isLeaf(&rootNode);\n" +" if (aabbOverlap)\n" +" {\n" +" if (isLeafNode)\n" +" {\n" +" int triangleIndex = getTriangleIndex(&rootNode);\n" +" if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +" int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" +" for (int b=0;b<numChildrenB;b++)\n" +" {\n" +" if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" +" {\n" +" int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +" int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" +" concavePairsOut[pairIdx+b] = newPair;\n" +" }\n" +" }\n" +" } else\n" +" {\n" +" int pairIdx = atomic_inc(numConcavePairsOut);\n" +" if (pairIdx<maxNumConcavePairsCapacity)\n" +" {\n" +" int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" +" concavePairsOut[pairIdx] = newPair;\n" +" }\n" +" }\n" +" } \n" +" curIndex++;\n" +" } else\n" +" {\n" +" if (isLeafNode)\n" +" {\n" +" curIndex++;\n" +" } else\n" +" {\n" +" escapeIndex = getEscapeIndex(&rootNode);\n" +" curIndex += escapeIndex;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" }\n" +"}\n" +; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl new file mode 100644 index 0000000000..e754f4e1da --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl @@ -0,0 +1,311 @@ + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" + +#define AppendInc(x, out) out = atomic_inc(x) +#define GET_NPOINTS(x) (x).m_worldNormalOnB.w +#ifdef cl_ext_atomic_counters_32 + #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else + #define counter32_t volatile __global int* +#endif + + +__kernel void mprPenetrationKernel( __global int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global float4* separatingNormals, + __global int* hasSeparatingAxis, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int contactCapacity, + int numPairs) +{ + int i = get_global_id(0); + int pairIndex = i; + if (i<numPairs) + { + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + + if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) + { + return; + } + + float depthOut; + b3Float4 dirOut; + b3Float4 posOut; + + + int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut); + + + + + + if (res==0) + { + //add a contact + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if (dstIdx<contactCapacity) + { + pairs[pairIndex].z = dstIdx; + __global struct b3Contact4Data* c = globalContactsOut + dstIdx; + c->m_worldNormalOnB = -dirOut;//normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + //for (int i=0;i<nContacts;i++) + posOut.w = -depthOut; + c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]]; + GET_NPOINTS(*c) = 1;//nContacts; + } + } + + } +} + +typedef float4 Quaternion; +#define make_float4 (float4) + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + + + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + + +inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, +const float4* dir, __global const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;i<numVerts;i++) + { + float dp = dot(vertices[hull->m_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + + +bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* vertices, + __global const float4* unitSphereDirections, + int numUnitSphereDirections, + float4* sep, + float* dmin) +{ + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test unit sphere directions + for (int i=0;i<numUnitSphereDirections;i++) + { + + float4 crossje; + crossje = unitSphereDirections[i]; + + if (dot3F4(DeltaC2,crossje)>0) + crossje *= -1.f; + { + float dist; + bool result = true; + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + dist = d0<d1 ? d0:d1; + result = true; + + if(dist<*dmin) + { + *dmin = dist; + *sep = crossje; + } + } + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + + +__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* unitSphereDirections, + __global float4* separatingNormals, + __global int* hasSeparatingAxis, + __global float* dmins, + int numUnitSphereDirections, + int numPairs + ) +{ + + int i = get_global_id(0); + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + + float dmin = dmins[i]; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal = separatingNormals[i]; + + int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; + if (numEdgeEdgeDirections>numUnitSphereDirections) + { + bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, + posB,ornB, + DeltaC2, + vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin); + if (!sepEE) + { + hasSeparatingAxis[i] = 0; + } else + { + hasSeparatingAxis[i] = 1; + separatingNormals[i] = sepNormal; + } + } + } //if (hasSeparatingAxis[i]) + }//(i<numPairs) +} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h new file mode 100644 index 0000000000..7ed4b382c3 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h @@ -0,0 +1,1446 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* mprKernelsCL= \ +"/***\n" +" * ---------------------------------\n" +" * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" +" *\n" +" * This file was ported from mpr.c file, part of libccd.\n" +" * The Minkoski Portal Refinement implementation was ported \n" +" * to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" +" * at http://github.com/erwincoumans/bullet3\n" +" *\n" +" * Distributed under the OSI-approved BSD License (the \"License\");\n" +" * see <http://www.opensource.org/licenses/bsd-license.php>.\n" +" * This software is distributed WITHOUT ANY WARRANTY; without even the\n" +" * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" +" * See the License for more information.\n" +" */\n" +"#ifndef B3_MPR_PENETRATION_H\n" +"#define B3_MPR_PENETRATION_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +"#define B3_LARGE_FLOAT 1e18f\n" +"#define B3_INFINITY 1e18f\n" +"#define b3Assert(a)\n" +"#define b3ConstArray(a) __global const a*\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" +"#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" +"#define B3_STATIC\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" +"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +"{\n" +" float maxDot = -B3_INFINITY;\n" +" int i = 0;\n" +" int ptIndex = -1;\n" +" for( i = 0; i < vecLen; i++ )\n" +" {\n" +" float dot = b3Dot3F4(vecArray[i],vec);\n" +" \n" +" if( dot > maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_RIGIDBODY_DATA_H\n" +"#define B3_RIGIDBODY_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" +"{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +"{\n" +" b3Quat q;\n" +" q=in;\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" +"}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"typedef struct\n" +"{\n" +" b3Float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +"struct b3RigidBodyData\n" +"{\n" +" b3Float4 m_pos;\n" +" b3Quat m_quat;\n" +" b3Float4 m_linVel;\n" +" b3Float4 m_angVel;\n" +" int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"};\n" +"typedef struct b3InertiaData b3InertiaData_t;\n" +"struct b3InertiaData\n" +"{\n" +" b3Mat3x3 m_invInertiaWorld;\n" +" b3Mat3x3 m_initInvInertia;\n" +"};\n" +"#endif //B3_RIGIDBODY_DATA_H\n" +" \n" +"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" +"#define B3_CONVEX_POLYHEDRON_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"typedef struct b3GpuFace b3GpuFace_t;\n" +"struct b3GpuFace\n" +"{\n" +" b3Float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +" int m_unusedPadding1;\n" +" int m_unusedPadding2;\n" +"};\n" +"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" +"struct b3ConvexPolyhedronData\n" +"{\n" +" b3Float4 m_localCenter;\n" +" b3Float4 m_extents;\n" +" b3Float4 mC;\n" +" b3Float4 mE;\n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"};\n" +"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" +"#ifndef B3_COLLIDABLE_H\n" +"#define B3_COLLIDABLE_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"enum b3ShapeTypes\n" +"{\n" +" SHAPE_HEIGHT_FIELD=1,\n" +" SHAPE_CONVEX_HULL=3,\n" +" SHAPE_PLANE=4,\n" +" SHAPE_CONCAVE_TRIMESH=5,\n" +" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +" SHAPE_SPHERE=7,\n" +" MAX_NUM_SHAPE_TYPES,\n" +"};\n" +"typedef struct b3Collidable b3Collidable_t;\n" +"struct b3Collidable\n" +"{\n" +" union {\n" +" int m_numChildShapes;\n" +" int m_bvhIndex;\n" +" };\n" +" union\n" +" {\n" +" float m_radius;\n" +" int m_compoundBvhIndex;\n" +" };\n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +"};\n" +"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" +"struct b3GpuChildShape\n" +"{\n" +" b3Float4 m_childPosition;\n" +" b3Quat m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"};\n" +"struct b3CompoundOverlappingPair\n" +"{\n" +" int m_bodyIndexA;\n" +" int m_bodyIndexB;\n" +"// int m_pairType;\n" +" int m_childShapeIndexA;\n" +" int m_childShapeIndexB;\n" +"};\n" +"#endif //B3_COLLIDABLE_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define B3_MPR_SQRT sqrt\n" +"#endif\n" +"#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" +"#define B3_MPR_FABS fabs\n" +"#define B3_MPR_TOLERANCE 1E-6f\n" +"#define B3_MPR_MAX_ITERATIONS 1000\n" +"struct _b3MprSupport_t \n" +"{\n" +" b3Float4 v; //!< Support point in minkowski sum\n" +" b3Float4 v1; //!< Support point in obj1\n" +" b3Float4 v2; //!< Support point in obj2\n" +"};\n" +"typedef struct _b3MprSupport_t b3MprSupport_t;\n" +"struct _b3MprSimplex_t \n" +"{\n" +" b3MprSupport_t ps[4];\n" +" int last; //!< index of last added point\n" +"};\n" +"typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" +"inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" +"{\n" +" return &s->ps[idx];\n" +"}\n" +"inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" +"{\n" +" s->last = size - 1;\n" +"}\n" +"inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" +"{\n" +" return s->last + 1;\n" +"}\n" +"inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" +"{\n" +" // here is no check on boundaries\n" +" return &s->ps[idx];\n" +"}\n" +"inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" +"{\n" +" *d = *s;\n" +"}\n" +"inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" +"{\n" +" b3MprSupportCopy(s->ps + pos, a);\n" +"}\n" +"inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" +"{\n" +" b3MprSupport_t supp;\n" +" b3MprSupportCopy(&supp, &s->ps[pos1]);\n" +" b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" +" b3MprSupportCopy(&s->ps[pos2], &supp);\n" +"}\n" +"inline int b3MprIsZero(float val)\n" +"{\n" +" return B3_MPR_FABS(val) < FLT_EPSILON;\n" +"}\n" +"inline int b3MprEq(float _a, float _b)\n" +"{\n" +" float ab;\n" +" float a, b;\n" +" ab = B3_MPR_FABS(_a - _b);\n" +" if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" +" return 1;\n" +" a = B3_MPR_FABS(_a);\n" +" b = B3_MPR_FABS(_b);\n" +" if (b > a){\n" +" return ab < FLT_EPSILON * b;\n" +" }else{\n" +" return ab < FLT_EPSILON * a;\n" +" }\n" +"}\n" +"inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" +"{\n" +" return b3MprEq((*a).x, (*b).x)\n" +" && b3MprEq((*a).y, (*b).y)\n" +" && b3MprEq((*a).z, (*b).z);\n" +"}\n" +"inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, b3ConstArray(b3Float4) verticesA)\n" +"{\n" +" b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" +" float maxDot = -B3_LARGE_FLOAT;\n" +" if( 0 < hull->m_numVertices )\n" +" {\n" +" const b3Float4 scaled = supportVec;\n" +" int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" +" return verticesA[hull->m_vertexOffset+index];\n" +" }\n" +" return supVec;\n" +"}\n" +"B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +" b3ConstArray(b3Collidable_t) cpuCollidables,\n" +" b3ConstArray(b3Float4) cpuVertices,\n" +" __global b3Float4* sepAxis,\n" +" const b3Float4* _dir, b3Float4* outp, int logme)\n" +"{\n" +" //dir is in worldspace, move to local space\n" +" \n" +" b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" +" b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" +" \n" +" b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" +" \n" +" const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" +" \n" +" //find local support vertex\n" +" int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" +" \n" +" b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" +" __global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" +" \n" +" b3Float4 pInA;\n" +" if (logme)\n" +" {\n" +" b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" +" float maxDot = -B3_LARGE_FLOAT;\n" +" if( 0 < hull->m_numVertices )\n" +" {\n" +" const b3Float4 scaled = localDir;\n" +" int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" +" pInA = cpuVertices[hull->m_vertexOffset+index];\n" +" \n" +" }\n" +" } else\n" +" {\n" +" pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" +" }\n" +" //move vertex to world space\n" +" *outp = b3TransformPoint(pInA,pos,orn);\n" +" \n" +"}\n" +"inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +" b3ConstArray(b3Collidable_t) cpuCollidables,\n" +" b3ConstArray(b3Float4) cpuVertices,\n" +" __global b3Float4* sepAxis,\n" +" const b3Float4* _dir, b3MprSupport_t *supp)\n" +"{\n" +" b3Float4 dir;\n" +" dir = *_dir;\n" +" b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" +" dir = *_dir*-1.f;\n" +" b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" +" supp->v = supp->v1 - supp->v2;\n" +"}\n" +"inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" +"{\n" +" center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" +" center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" +" center->v = center->v1 - center->v2;\n" +"}\n" +"inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" +"{\n" +" (*v).x = x;\n" +" (*v).y = y;\n" +" (*v).z = z;\n" +" (*v).w = 0.f;\n" +"}\n" +"inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" +"{\n" +" (*v).x += (*w).x;\n" +" (*v).y += (*w).y;\n" +" (*v).z += (*w).z;\n" +"}\n" +"inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" +"{\n" +" *v = *w;\n" +"}\n" +"inline void b3MprVec3Scale(b3Float4 *d, float k)\n" +"{\n" +" *d *= k;\n" +"}\n" +"inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" +"{\n" +" float dot;\n" +" dot = b3Dot3F4(*a,*b);\n" +" return dot;\n" +"}\n" +"inline float b3MprVec3Len2(const b3Float4 *v)\n" +"{\n" +" return b3MprVec3Dot(v, v);\n" +"}\n" +"inline void b3MprVec3Normalize(b3Float4 *d)\n" +"{\n" +" float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" +" b3MprVec3Scale(d, k);\n" +"}\n" +"inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" +"{\n" +" *d = b3Cross3(*a,*b);\n" +" \n" +"}\n" +"inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" +"{\n" +" *d = *v - *w;\n" +"}\n" +"inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" +"{\n" +" b3Float4 v2v1, v3v1;\n" +" b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" +" &b3MprSimplexPoint(portal, 1)->v);\n" +" b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" +" &b3MprSimplexPoint(portal, 1)->v);\n" +" b3MprVec3Cross(dir, &v2v1, &v3v1);\n" +" b3MprVec3Normalize(dir);\n" +"}\n" +"inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" +" const b3Float4 *dir)\n" +"{\n" +" float dot;\n" +" dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" +" return b3MprIsZero(dot) || dot > 0.f;\n" +"}\n" +"inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" +" const b3MprSupport_t *v4,\n" +" const b3Float4 *dir)\n" +"{\n" +" float dv1, dv2, dv3, dv4;\n" +" float dot1, dot2, dot3;\n" +" // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" +" dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" +" dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" +" dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" +" dv4 = b3MprVec3Dot(&v4->v, dir);\n" +" dot1 = dv4 - dv1;\n" +" dot2 = dv4 - dv2;\n" +" dot3 = dv4 - dv3;\n" +" dot1 = B3_MPR_FMIN(dot1, dot2);\n" +" dot1 = B3_MPR_FMIN(dot1, dot3);\n" +" return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" +"}\n" +"inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal, \n" +" const b3MprSupport_t *v4,\n" +" const b3Float4 *dir)\n" +"{\n" +" float dot;\n" +" dot = b3MprVec3Dot(&v4->v, dir);\n" +" return b3MprIsZero(dot) || dot > 0.f;\n" +"}\n" +"inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" +" const b3MprSupport_t *v4)\n" +"{\n" +" float dot;\n" +" b3Float4 v4v0;\n" +" b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" +" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" +" if (dot > 0.f){\n" +" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" +" if (dot > 0.f){\n" +" b3MprSimplexSet(portal, 1, v4);\n" +" }else{\n" +" b3MprSimplexSet(portal, 3, v4);\n" +" }\n" +" }else{\n" +" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" +" if (dot > 0.f){\n" +" b3MprSimplexSet(portal, 2, v4);\n" +" }else{\n" +" b3MprSimplexSet(portal, 1, v4);\n" +" }\n" +" }\n" +"}\n" +"B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +" b3ConstArray(b3Collidable_t) cpuCollidables,\n" +" b3ConstArray(b3Float4) cpuVertices,\n" +" __global b3Float4* sepAxis,\n" +" __global int* hasSepAxis,\n" +" b3MprSimplex_t *portal)\n" +"{\n" +" b3Float4 dir, va, vb;\n" +" float dot;\n" +" int cont;\n" +" \n" +" \n" +" // vertex 0 is center of portal\n" +" b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" +" // vertex 0 is center of portal\n" +" b3MprSimplexSetSize(portal, 1);\n" +" \n" +" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +" b3Float4* b3mpr_vec3_origin = &zero;\n" +" if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" +" // Portal's center lies on origin (0,0,0) => we know that objects\n" +" // intersect but we would need to know penetration info.\n" +" // So move center little bit...\n" +" b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" +" b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" +" }\n" +" // vertex 1 = support in direction of origin\n" +" b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" +" b3MprVec3Scale(&dir, -1.f);\n" +" b3MprVec3Normalize(&dir);\n" +" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" +" b3MprSimplexSetSize(portal, 2);\n" +" // test if origin isn't outside of v1\n" +" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" +" \n" +" if (b3MprIsZero(dot) || dot < 0.f)\n" +" return -1;\n" +" // vertex 2\n" +" b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" +" &b3MprSimplexPoint(portal, 1)->v);\n" +" if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" +" if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" +" // origin lies on v1\n" +" return 1;\n" +" }else{\n" +" // origin lies on v0-v1 segment\n" +" return 2;\n" +" }\n" +" }\n" +" b3MprVec3Normalize(&dir);\n" +" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" +" \n" +" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" +" if (b3MprIsZero(dot) || dot < 0.f)\n" +" return -1;\n" +" b3MprSimplexSetSize(portal, 3);\n" +" // vertex 3 direction\n" +" b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +" &b3MprSimplexPoint(portal, 0)->v);\n" +" b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" +" &b3MprSimplexPoint(portal, 0)->v);\n" +" b3MprVec3Cross(&dir, &va, &vb);\n" +" b3MprVec3Normalize(&dir);\n" +" // it is better to form portal faces to be oriented \"outside\" origin\n" +" dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" +" if (dot > 0.f){\n" +" b3MprSimplexSwap(portal, 1, 2);\n" +" b3MprVec3Scale(&dir, -1.f);\n" +" }\n" +" while (b3MprSimplexSize(portal) < 4){\n" +" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" +" \n" +" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" +" if (b3MprIsZero(dot) || dot < 0.f)\n" +" return -1;\n" +" cont = 0;\n" +" // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" +" // continue\n" +" b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +" &b3MprSimplexPoint(portal, 3)->v);\n" +" dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" +" if (dot < 0.f && !b3MprIsZero(dot)){\n" +" b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" +" cont = 1;\n" +" }\n" +" if (!cont){\n" +" // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" +" // continue\n" +" b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" +" &b3MprSimplexPoint(portal, 2)->v);\n" +" dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" +" if (dot < 0.f && !b3MprIsZero(dot)){\n" +" b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" +" cont = 1;\n" +" }\n" +" }\n" +" if (cont){\n" +" b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" +" &b3MprSimplexPoint(portal, 0)->v);\n" +" b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" +" &b3MprSimplexPoint(portal, 0)->v);\n" +" b3MprVec3Cross(&dir, &va, &vb);\n" +" b3MprVec3Normalize(&dir);\n" +" }else{\n" +" b3MprSimplexSetSize(portal, 4);\n" +" }\n" +" }\n" +" return 0;\n" +"}\n" +"B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +" b3ConstArray(b3Collidable_t) cpuCollidables,\n" +" b3ConstArray(b3Float4) cpuVertices,\n" +" __global b3Float4* sepAxis,\n" +" b3MprSimplex_t *portal)\n" +"{\n" +" b3Float4 dir;\n" +" b3MprSupport_t v4;\n" +" for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" +" //while (1)\n" +" {\n" +" // compute direction outside the portal (from v0 throught v1,v2,v3\n" +" // face)\n" +" b3PortalDir(portal, &dir);\n" +" // test if origin is inside the portal\n" +" if (portalEncapsulesOrigin(portal, &dir))\n" +" return 0;\n" +" // get next support point\n" +" \n" +" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" +" // test if v4 can expand portal to contain origin and if portal\n" +" // expanding doesn't reach given tolerance\n" +" if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" +" || portalReachTolerance(portal, &v4, &dir))\n" +" {\n" +" return -1;\n" +" }\n" +" // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" +" // difference (direction from v0).\n" +" b3ExpandPortal(portal, &v4);\n" +" }\n" +" return -1;\n" +"}\n" +"B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" +"{\n" +" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +" b3Float4* b3mpr_vec3_origin = &zero;\n" +" b3Float4 dir;\n" +" size_t i;\n" +" float b[4], sum, inv;\n" +" b3Float4 vec, p1, p2;\n" +" b3PortalDir(portal, &dir);\n" +" // use barycentric coordinates of tetrahedron to find origin\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" +" &b3MprSimplexPoint(portal, 2)->v);\n" +" b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" +" &b3MprSimplexPoint(portal, 2)->v);\n" +" b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" +" &b3MprSimplexPoint(portal, 1)->v);\n" +" b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" +" &b3MprSimplexPoint(portal, 1)->v);\n" +" b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" +" sum = b[0] + b[1] + b[2] + b[3];\n" +" if (b3MprIsZero(sum) || sum < 0.f){\n" +" b[0] = 0.f;\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" +" &b3MprSimplexPoint(portal, 3)->v);\n" +" b[1] = b3MprVec3Dot(&vec, &dir);\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" +" &b3MprSimplexPoint(portal, 1)->v);\n" +" b[2] = b3MprVec3Dot(&vec, &dir);\n" +" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" +" &b3MprSimplexPoint(portal, 2)->v);\n" +" b[3] = b3MprVec3Dot(&vec, &dir);\n" +" sum = b[1] + b[2] + b[3];\n" +" }\n" +" inv = 1.f / sum;\n" +" b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" +" b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" +" for (i = 0; i < 4; i++){\n" +" b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" +" b3MprVec3Scale(&vec, b[i]);\n" +" b3MprVec3Add(&p1, &vec);\n" +" b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" +" b3MprVec3Scale(&vec, b[i]);\n" +" b3MprVec3Add(&p2, &vec);\n" +" }\n" +" b3MprVec3Scale(&p1, inv);\n" +" b3MprVec3Scale(&p2, inv);\n" +" b3MprVec3Copy(pos, &p1);\n" +" b3MprVec3Add(pos, &p2);\n" +" b3MprVec3Scale(pos, 0.5);\n" +"}\n" +"inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" +"{\n" +" b3Float4 ab;\n" +" b3MprVec3Sub2(&ab, a, b);\n" +" return b3MprVec3Len2(&ab);\n" +"}\n" +"inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" +" const b3Float4 *x0,\n" +" const b3Float4 *b,\n" +" b3Float4 *witness)\n" +"{\n" +" // The computation comes from solving equation of segment:\n" +" // S(t) = x0 + t.d\n" +" // where - x0 is initial point of segment\n" +" // - d is direction of segment from x0 (|d| > 0)\n" +" // - t belongs to <0, 1> interval\n" +" // \n" +" // Than, distance from a segment to some point P can be expressed:\n" +" // D(t) = |x0 + t.d - P|^2\n" +" // which is distance from any point on segment. Minimization\n" +" // of this function brings distance from P to segment.\n" +" // Minimization of D(t) leads to simple quadratic equation that's\n" +" // solving is straightforward.\n" +" //\n" +" // Bonus of this method is witness point for free.\n" +" float dist, t;\n" +" b3Float4 d, a;\n" +" // direction of segment\n" +" b3MprVec3Sub2(&d, b, x0);\n" +" // precompute vector from P to x0\n" +" b3MprVec3Sub2(&a, x0, P);\n" +" t = -1.f * b3MprVec3Dot(&a, &d);\n" +" t /= b3MprVec3Len2(&d);\n" +" if (t < 0.f || b3MprIsZero(t)){\n" +" dist = b3MprVec3Dist2(x0, P);\n" +" if (witness)\n" +" b3MprVec3Copy(witness, x0);\n" +" }else if (t > 1.f || b3MprEq(t, 1.f)){\n" +" dist = b3MprVec3Dist2(b, P);\n" +" if (witness)\n" +" b3MprVec3Copy(witness, b);\n" +" }else{\n" +" if (witness){\n" +" b3MprVec3Copy(witness, &d);\n" +" b3MprVec3Scale(witness, t);\n" +" b3MprVec3Add(witness, x0);\n" +" dist = b3MprVec3Dist2(witness, P);\n" +" }else{\n" +" // recycling variables\n" +" b3MprVec3Scale(&d, t);\n" +" b3MprVec3Add(&d, &a);\n" +" dist = b3MprVec3Len2(&d);\n" +" }\n" +" }\n" +" return dist;\n" +"}\n" +"inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" +" const b3Float4 *x0, const b3Float4 *B,\n" +" const b3Float4 *C,\n" +" b3Float4 *witness)\n" +"{\n" +" // Computation comes from analytic expression for triangle (x0, B, C)\n" +" // T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" +" // Then equation for distance is:\n" +" // D(s, t) = | T(s, t) - P |^2\n" +" // This leads to minimization of quadratic function of two variables.\n" +" // The solution from is taken only if s is between 0 and 1, t is\n" +" // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" +" // computed.\n" +" b3Float4 d1, d2, a;\n" +" float u, v, w, p, q, r;\n" +" float s, t, dist, dist2;\n" +" b3Float4 witness2;\n" +" b3MprVec3Sub2(&d1, B, x0);\n" +" b3MprVec3Sub2(&d2, C, x0);\n" +" b3MprVec3Sub2(&a, x0, P);\n" +" u = b3MprVec3Dot(&a, &a);\n" +" v = b3MprVec3Dot(&d1, &d1);\n" +" w = b3MprVec3Dot(&d2, &d2);\n" +" p = b3MprVec3Dot(&a, &d1);\n" +" q = b3MprVec3Dot(&a, &d2);\n" +" r = b3MprVec3Dot(&d1, &d2);\n" +" s = (q * r - w * p) / (w * v - r * r);\n" +" t = (-s * r - q) / w;\n" +" if ((b3MprIsZero(s) || s > 0.f)\n" +" && (b3MprEq(s, 1.f) || s < 1.f)\n" +" && (b3MprIsZero(t) || t > 0.f)\n" +" && (b3MprEq(t, 1.f) || t < 1.f)\n" +" && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" +" if (witness){\n" +" b3MprVec3Scale(&d1, s);\n" +" b3MprVec3Scale(&d2, t);\n" +" b3MprVec3Copy(witness, x0);\n" +" b3MprVec3Add(witness, &d1);\n" +" b3MprVec3Add(witness, &d2);\n" +" dist = b3MprVec3Dist2(witness, P);\n" +" }else{\n" +" dist = s * s * v;\n" +" dist += t * t * w;\n" +" dist += 2.f * s * t * r;\n" +" dist += 2.f * s * p;\n" +" dist += 2.f * t * q;\n" +" dist += u;\n" +" }\n" +" }else{\n" +" dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" +" dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" +" if (dist2 < dist){\n" +" dist = dist2;\n" +" if (witness)\n" +" b3MprVec3Copy(witness, &witness2);\n" +" }\n" +" dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" +" if (dist2 < dist){\n" +" dist = dist2;\n" +" if (witness)\n" +" b3MprVec3Copy(witness, &witness2);\n" +" }\n" +" }\n" +" return dist;\n" +"}\n" +"B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" +" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +" b3ConstArray(b3Collidable_t) cpuCollidables,\n" +" b3ConstArray(b3Float4) cpuVertices,\n" +" __global b3Float4* sepAxis,\n" +" b3MprSimplex_t *portal,\n" +" float *depth, b3Float4 *pdir, b3Float4 *pos)\n" +"{\n" +" b3Float4 dir;\n" +" b3MprSupport_t v4;\n" +" unsigned long iterations;\n" +" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +" b3Float4* b3mpr_vec3_origin = &zero;\n" +" iterations = 1UL;\n" +" for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" +" //while (1)\n" +" {\n" +" // compute portal direction and obtain next support point\n" +" b3PortalDir(portal, &dir);\n" +" \n" +" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" +" // reached tolerance -> find penetration info\n" +" if (portalReachTolerance(portal, &v4, &dir)\n" +" || iterations ==B3_MPR_MAX_ITERATIONS)\n" +" {\n" +" *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" +" *depth = B3_MPR_SQRT(*depth);\n" +" \n" +" if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" +" {\n" +" \n" +" *pdir = dir;\n" +" } \n" +" b3MprVec3Normalize(pdir);\n" +" \n" +" // barycentric coordinates:\n" +" b3FindPos(portal, pos);\n" +" return;\n" +" }\n" +" b3ExpandPortal(portal, &v4);\n" +" iterations++;\n" +" }\n" +"}\n" +"B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" +"{\n" +" // Touching contact on portal's v1 - so depth is zero and direction\n" +" // is unimportant and pos can be guessed\n" +" *depth = 0.f;\n" +" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" +" b3Float4* b3mpr_vec3_origin = &zero;\n" +" b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" +" b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" +" b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" +" b3MprVec3Scale(pos, 0.5);\n" +"}\n" +"B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" +" float *depth, b3Float4 *dir, b3Float4 *pos)\n" +"{\n" +" \n" +" // Origin lies on v0-v1 segment.\n" +" // Depth is distance to v1, direction also and position must be\n" +" // computed\n" +" b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" +" b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" +" b3MprVec3Scale(pos, 0.5f);\n" +" \n" +" b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" +" *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" +" b3MprVec3Normalize(dir);\n" +"}\n" +"inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" +" b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" +" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" +" b3ConstArray(b3Collidable_t) cpuCollidables,\n" +" b3ConstArray(b3Float4) cpuVertices,\n" +" __global b3Float4* sepAxis,\n" +" __global int* hasSepAxis,\n" +" float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" +"{\n" +" \n" +" b3MprSimplex_t portal;\n" +" \n" +"// if (!hasSepAxis[pairIndex])\n" +" // return -1;\n" +" \n" +" hasSepAxis[pairIndex] = 0;\n" +" int res;\n" +" // Phase 1: Portal discovery\n" +" res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" +" \n" +" \n" +" //sepAxis[pairIndex] = *pdir;//or -dir?\n" +" switch (res)\n" +" {\n" +" case 0:\n" +" {\n" +" // Phase 2: Portal refinement\n" +" \n" +" res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" +" if (res < 0)\n" +" return -1;\n" +" // Phase 3. Penetration info\n" +" b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" +" hasSepAxis[pairIndex] = 1;\n" +" sepAxis[pairIndex] = -*dirOut;\n" +" break;\n" +" }\n" +" case 1:\n" +" {\n" +" // Touching contact on portal's v1.\n" +" b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" +" break;\n" +" }\n" +" case 2:\n" +" {\n" +" \n" +" b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" +" break;\n" +" }\n" +" default:\n" +" {\n" +" hasSepAxis[pairIndex]=0;\n" +" //if (res < 0)\n" +" //{\n" +" // Origin isn't inside portal - no collision.\n" +" return -1;\n" +" //}\n" +" }\n" +" };\n" +" \n" +" return 0;\n" +"};\n" +"#endif //B3_MPR_PENETRATION_H\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPosB[4];\n" +"// b3Float4 m_localPosA[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormalOnB.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormalOnB.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" +"#define AppendInc(x, out) out = atomic_inc(x)\n" +"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +"#ifdef cl_ext_atomic_counters_32\n" +" #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +"#else\n" +" #define counter32_t volatile __global int*\n" +"#endif\n" +"__kernel void mprPenetrationKernel( __global int4* pairs,\n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes, \n" +" __global const float4* vertices,\n" +" __global float4* separatingNormals,\n" +" __global int* hasSeparatingAxis,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int contactCapacity,\n" +" int numPairs)\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" if (i<numPairs)\n" +" {\n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" //once the broadphase avoids static-static pairs, we can remove this test\n" +" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +" {\n" +" return;\n" +" }\n" +" \n" +" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +" {\n" +" return;\n" +" }\n" +" float depthOut;\n" +" b3Float4 dirOut;\n" +" b3Float4 posOut;\n" +" int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" +" \n" +" \n" +" \n" +" \n" +" if (res==0)\n" +" {\n" +" //add a contact\n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" if (dstIdx<contactCapacity)\n" +" {\n" +" pairs[pairIndex].z = dstIdx;\n" +" __global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" +" c->m_worldNormalOnB = -dirOut;//normal;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" //for (int i=0;i<nContacts;i++)\n" +" posOut.w = -depthOut;\n" +" c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" +" GET_NPOINTS(*c) = 1;//nContacts;\n" +" }\n" +" }\n" +" }\n" +"}\n" +"typedef float4 Quaternion;\n" +"#define make_float4 (float4)\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +"}\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;i<numVerts;i++)\n" +" {\n" +" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* vertices,\n" +" __global const float4* unitSphereDirections,\n" +" int numUnitSphereDirections,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" int curEdgeEdge = 0;\n" +" // Test unit sphere directions\n" +" for (int i=0;i<numUnitSphereDirections;i++)\n" +" {\n" +" float4 crossje;\n" +" crossje = unitSphereDirections[i]; \n" +" if (dot3F4(DeltaC2,crossje)>0)\n" +" crossje *= -1.f;\n" +" {\n" +" float dist;\n" +" bool result = true;\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +" \n" +" if(Max0<Min1 || Max1<Min0)\n" +" return false;\n" +" \n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" dist = d0<d1 ? d0:d1;\n" +" result = true;\n" +" \n" +" if(dist<*dmin)\n" +" {\n" +" *dmin = dist;\n" +" *sep = crossje;\n" +" }\n" +" }\n" +" }\n" +" \n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* unitSphereDirections,\n" +" __global float4* separatingNormals,\n" +" __global int* hasSeparatingAxis,\n" +" __global float* dmins,\n" +" int numUnitSphereDirections,\n" +" int numPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" if (hasSeparatingAxis[i])\n" +" {\n" +" \n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" \n" +" float dmin = dmins[i];\n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" float4 sepNormal = separatingNormals[i];\n" +" \n" +" int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" +" if (numEdgeEdgeDirections>numUnitSphereDirections)\n" +" {\n" +" bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" +" if (!sepEE)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis[i] = 1;\n" +" separatingNormals[i] = sepNormal;\n" +" }\n" +" }\n" +" } //if (hasSeparatingAxis[i])\n" +" }//(i<numPairs)\n" +"}\n" +; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl new file mode 100644 index 0000000000..9c9e920f13 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl @@ -0,0 +1,1374 @@ +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" + +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_PLANE 4 +#define SHAPE_CONCAVE_TRIMESH 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 +#define SHAPE_SPHERE 7 + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable + +#ifdef cl_ext_atomic_counters_32 +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else +#define counter32_t volatile __global int* +#endif + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + +#define max2 max +#define min2 min + +typedef unsigned int u32; + + + + +typedef struct +{ + union + { + float4 m_min; + float m_minElems[4]; + int m_minIndices[4]; + }; + union + { + float4 m_max; + float m_maxElems[4]; + int m_maxIndices[4]; + }; +} btAabbCL; + +///keep this in sync with btCollidable.h +typedef struct +{ + int m_numChildShapes; + float m_radius; + int m_shapeType; + int m_shapeIndex; + +} btCollidableGpu; + +typedef struct +{ + float4 m_childPosition; + float4 m_childOrientation; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +} btGpuChildShape; + +#define GET_NPOINTS(x) (x).m_worldNormalOnB.w + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + + +typedef struct +{ + float4 m_localCenter; + float4 m_extents; + float4 mC; + float4 mE; + + float m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; + +} ConvexPolyhedronCL; + +typedef struct +{ + float4 m_plane; + int m_indexOffset; + int m_numIndices; +} btGpuFace; + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +__inline +float fastDiv(float numerator, float denominator) +{ + return native_divide(numerator, denominator); +// return numerator/denominator; +} + +__inline +float4 fastDiv4(float4 numerator, float4 denominator) +{ + return native_divide(numerator, denominator); +} + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} + +//#define dot3F4 dot + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float4 fastNormalize4(float4 v) +{ + return fast_normalize(v); +} + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + +void trInverse(float4 translationIn, Quaternion orientationIn, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtInvert(orientationIn); + *translationOut = qtRotate(*orientationOut, -translationIn); +} + +void trMul(float4 translationA, Quaternion orientationA, + float4 translationB, Quaternion orientationB, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtMul(orientationA,orientationB); + *translationOut = transform(&translationB,&translationA,&orientationA); +} + + + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +} + + +__inline float4 lerp3(const float4 a,const float4 b, float t) +{ + return make_float4( a.x + (b.x - a.x) * t, + a.y + (b.y - a.y) * t, + a.z + (b.z - a.z) * t, + 0.f); +} + + +float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace) +{ + float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0); + float dist = dot3F4(n, point) + planeEqn.w; + *closestPointOnFace = point - dist * n; + return dist; +} + + + +inline bool IsPointInPolygon(float4 p, + const btGpuFace* face, + __global const float4* baseVertex, + __global const int* convexIndices, + float4* out) +{ + float4 a; + float4 b; + float4 ab; + float4 ap; + float4 v; + + float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); + + if (face->m_numIndices<2) + return false; + + + float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; + + b = v0; + + for(unsigned i=0; i != face->m_numIndices; ++i) + { + a = b; + float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; + b = vi; + ab = b-a; + ap = p-a; + v = cross3(ab,plane); + + if (dot(ap, v) > 0.f) + { + float ab_m2 = dot(ab, ab); + float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f; + if (rt <= 0.f) + { + *out = a; + } + else if (rt >= 1.f) + { + *out = b; + } + else + { + float s = 1.f - rt; + out[0].x = s * a.x + rt * b.x; + out[0].y = s * a.y + rt * b.y; + out[0].z = s * a.z + rt * b.z; + } + return false; + } + } + return true; +} + + + + +void computeContactSphereConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* convexVertices, + __global const int* convexIndices, + __global const btGpuFace* faces, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int maxContactCapacity, + float4 spherePos2, + float radius, + float4 pos, + float4 quat + ) +{ + + float4 invPos; + float4 invOrn; + + trInverse(pos,quat, &invPos,&invOrn); + + float4 spherePos = transform(&spherePos2,&invPos,&invOrn); + + int shapeIndex = collidables[collidableIndexB].m_shapeIndex; + int numFaces = convexShapes[shapeIndex].m_numFaces; + float4 closestPnt = (float4)(0, 0, 0, 0); + float4 hitNormalWorld = (float4)(0, 0, 0, 0); + float minDist = -1000000.f; + bool bCollide = true; + + for ( int f = 0; f < numFaces; f++ ) + { + btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; + + // set up a plane equation + float4 planeEqn; + float4 n1 = face.m_plane; + n1.w = 0.f; + planeEqn = n1; + planeEqn.w = face.m_plane.w; + + + // compute a signed distance from the vertex in cloth to the face of rigidbody. + float4 pntReturn; + float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); + + // If the distance is positive, the plane is a separating plane. + if ( dist > radius ) + { + bCollide = false; + break; + } + + + if (dist>0) + { + //might hit an edge or vertex + float4 out; + float4 zeroPos = make_float4(0,0,0,0); + + bool isInPoly = IsPointInPolygon(spherePos, + &face, + &convexVertices[convexShapes[shapeIndex].m_vertexOffset], + convexIndices, + &out); + if (isInPoly) + { + if (dist>minDist) + { + minDist = dist; + closestPnt = pntReturn; + hitNormalWorld = planeEqn; + + } + } else + { + float4 tmp = spherePos-out; + float l2 = dot(tmp,tmp); + if (l2<radius*radius) + { + dist = sqrt(l2); + if (dist>minDist) + { + minDist = dist; + closestPnt = out; + hitNormalWorld = tmp/dist; + + } + + } else + { + bCollide = false; + break; + } + } + } else + { + if ( dist > minDist ) + { + minDist = dist; + closestPnt = pntReturn; + hitNormalWorld.xyz = planeEqn.xyz; + } + } + + } + + + + if (bCollide && minDist > -10000) + { + float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); + float4 pOnB1 = transform(&closestPnt,&pos,&quat); + + float actualDepth = minDist-radius; + if (actualDepth<=0.f) + { + + + pOnB1.w = actualDepth; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + + if (1)//dstIdx < maxContactCapacity) + { + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normalOnSurfaceB1; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_worldPosB[0] = pOnB1; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + + GET_NPOINTS(*c) = 1; + } + + } + }//if (hasCollision) + +} + + + +int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) +{ + if( nPoints == 0 ) + return 0; + + if (nPoints <=4) + return nPoints; + + + if (nPoints >64) + nPoints = 64; + + float4 center = make_float4(0.f); + { + + for (int i=0;i<nPoints;i++) + center += p[i]; + center /= (float)nPoints; + } + + + + // sample 4 directions + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + + //keep point with deepest penetration + float minW= FLT_MAX; + + int minIndex=-1; + + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for(int ie = 0; ie<nPoints; ie++ ) + { + if (p[ie].w<minW) + { + minW = p[ie].w; + minIndex=ie; + } + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + if (f<maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4( -u, r ); + if (f<maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + + f = dot3F4( v, r ); + if (f<maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4( -v, r ); + if (f<maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; + +} + +#define MAX_PLANE_CONVEX_POINTS 64 + +int computeContactPlaneConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + __global const BodyData* rigidBodies, + __global const btCollidableGpu*collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* convexVertices, + __global const int* convexIndices, + __global const btGpuFace* faces, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int maxContactCapacity, + float4 posB, + Quaternion ornB + ) +{ + int resultIndex=-1; + + int shapeIndex = collidables[collidableIndexB].m_shapeIndex; + __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex]; + + float4 posA; + posA = rigidBodies[bodyIndexA].m_pos; + Quaternion ornA; + ornA = rigidBodies[bodyIndexA].m_quat; + + int numContactsOut = 0; + int numWorldVertsB1= 0; + + float4 planeEq; + planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; + float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); + float4 planeNormalWorld; + planeNormalWorld = qtRotate(ornA,planeNormal); + float planeConstant = planeEq.w; + + float4 invPosA;Quaternion invOrnA; + float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; + { + + trInverse(posA,ornA,&invPosA,&invOrnA); + trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); + } + float4 invPosB;Quaternion invOrnB; + float4 planeInConvexPos1; Quaternion planeInConvexOrn1; + { + + trInverse(posB,ornB,&invPosB,&invOrnB); + trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); + } + + + float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal); + float maxDot = -1e30; + int hitVertex=-1; + float4 hitVtx; + + + + float4 contactPoints[MAX_PLANE_CONVEX_POINTS]; + int numPoints = 0; + + int4 contactIdx; + contactIdx=make_int4(0,1,2,3); + + + for (int i=0;i<hullB->m_numVertices;i++) + { + float4 vtx = convexVertices[hullB->m_vertexOffset+i]; + float curDot = dot(vtx,planeNormalInConvex); + + + if (curDot>maxDot) + { + hitVertex=i; + maxDot=curDot; + hitVtx = vtx; + //make sure the deepest points is always included + if (numPoints==MAX_PLANE_CONVEX_POINTS) + numPoints--; + } + + if (numPoints<MAX_PLANE_CONVEX_POINTS) + { + float4 vtxWorld = transform(&vtx, &posB, &ornB); + float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld; + float dist = dot(planeNormal,vtxInPlane)-planeConstant; + if (dist<0.f) + { + vtxWorld.w = dist; + contactPoints[numPoints] = vtxWorld; + numPoints++; + } + } + + } + + int numReducedPoints = numPoints; + if (numPoints>4) + { + numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + } + + if (numReducedPoints>0) + { + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < maxContactCapacity) + { + resultIndex = dstIdx; + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -planeNormalWorld; + //c->setFrictionCoeff(0.7); + //c->setRestituitionCoeff(0.f); + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + + switch (numReducedPoints) + { + case 4: + c->m_worldPosB[3] = contactPoints[contactIdx.w]; + case 3: + c->m_worldPosB[2] = contactPoints[contactIdx.z]; + case 2: + c->m_worldPosB[1] = contactPoints[contactIdx.y]; + case 1: + c->m_worldPosB[0] = contactPoints[contactIdx.x]; + default: + { + } + }; + + GET_NPOINTS(*c) = numReducedPoints; + }//if (dstIdx < numPairs) + } + + return resultIndex; +} + + +void computeContactPlaneSphere(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const btGpuFace* faces, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int maxContactCapacity) +{ + float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; + float radius = collidables[collidableIndexB].m_radius; + float4 posA1 = rigidBodies[bodyIndexA].m_pos; + float4 ornA1 = rigidBodies[bodyIndexA].m_quat; + float4 posB1 = rigidBodies[bodyIndexB].m_pos; + float4 ornB1 = rigidBodies[bodyIndexB].m_quat; + + bool hasCollision = false; + float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); + float planeConstant = planeEq.w; + float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; + { + float4 invPosA;Quaternion invOrnA; + trInverse(posA1,ornA1,&invPosA,&invOrnA); + trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); + } + float4 planeInConvexPos1; Quaternion planeInConvexOrn1; + { + float4 invPosB;Quaternion invOrnB; + trInverse(posB1,ornB1,&invPosB,&invOrnB); + trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); + } + float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius; + float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); + float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant; + hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold(); + if (hasCollision) + { + float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1; + float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1); + float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1); + float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance; + pOnB1.w = distance; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < maxContactCapacity) + { + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normalOnSurfaceB1; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_worldPosB[0] = pOnB1; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if (hasCollision) +} + + +__kernel void primitiveContactsKernel( __global int4* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numPairs, int maxContactCapacity) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && + collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) + { + + float4 posB; + posB = rigidBodies[bodyIndexB].m_pos; + Quaternion ornB; + ornB = rigidBodies[bodyIndexB].m_quat; + int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, + rigidBodies,collidables,convexShapes,vertices,indices, + faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB); + if (contactIndex>=0) + pairs[pairIndex].z = contactIndex; + + return; + } + + + if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && + collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) + { + + float4 posA; + posA = rigidBodies[bodyIndexA].m_pos; + Quaternion ornA; + ornA = rigidBodies[bodyIndexA].m_quat; + + + int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, + rigidBodies,collidables,convexShapes,vertices,indices, + faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); + + if (contactIndex>=0) + pairs[pairIndex].z = contactIndex; + + return; + } + + if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && + collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) + { + computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, + rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); + return; + } + + + if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && + collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) + { + + + computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, + rigidBodies,collidables, + faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); + + return; + } + + + + + if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && + collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) + { + + float4 spherePos = rigidBodies[bodyIndexA].m_pos; + float sphereRadius = collidables[collidableIndexA].m_radius; + float4 convexPos = rigidBodies[bodyIndexB].m_pos; + float4 convexOrn = rigidBodies[bodyIndexB].m_quat; + + computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, + rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, + spherePos,sphereRadius,convexPos,convexOrn); + + return; + } + + if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && + collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) + { + + float4 spherePos = rigidBodies[bodyIndexB].m_pos; + float sphereRadius = collidables[collidableIndexB].m_radius; + float4 convexPos = rigidBodies[bodyIndexA].m_pos; + float4 convexOrn = rigidBodies[bodyIndexA].m_quat; + + computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, + rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, + spherePos,sphereRadius,convexPos,convexOrn); + return; + } + + + + + + + if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && + collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) + { + //sphere-sphere + float radiusA = collidables[collidableIndexA].m_radius; + float radiusB = collidables[collidableIndexB].m_radius; + float4 posA = rigidBodies[bodyIndexA].m_pos; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + float4 diff = posA-posB; + float len = length(diff); + + ///iff distance positive, don't generate a new contact + if ( len <= (radiusA+radiusB)) + { + ///distance (negative means penetration) + float dist = len - (radiusA+radiusB); + float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); + if (len > 0.00001) + { + normalOnSurfaceB = diff / len; + } + float4 contactPosB = posB + normalOnSurfaceB*radiusB; + contactPosB.w = dist; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < maxContactCapacity) + { + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = normalOnSurfaceB; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_worldPosB[0] = contactPosB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if ( len <= (radiusA+radiusB)) + + return; + }//SHAPE_SPHERE SHAPE_SPHERE + + }// if (i<numPairs) + +} + + +// work-in-progress +__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global const btGpuChildShape* gpuChildShapes, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numCompoundPairs, int maxContactCapacity + ) +{ + + int i = get_global_id(0); + if (i<numCompoundPairs) + { + int bodyIndexA = gpuCompoundPairs[i].x; + int bodyIndexB = gpuCompoundPairs[i].y; + + int childShapeIndexA = gpuCompoundPairs[i].z; + int childShapeIndexB = gpuCompoundPairs[i].w; + + int collidableIndexA = -1; + int collidableIndexB = -1; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 posA = rigidBodies[bodyIndexA].m_pos; + + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + if (childShapeIndexA >= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int shapeTypeA = collidables[collidableIndexA].m_shapeType; + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + + int pairIndex = i; + if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL)) + { + + computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, + rigidBodies,collidables,convexShapes,vertices,indices, + faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB); + return; + } + + if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE)) + { + + computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, + rigidBodies,collidables,convexShapes,vertices,indices, + faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); + return; + } + + if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE)) + { + float4 spherePos = rigidBodies[bodyIndexB].m_pos; + float sphereRadius = collidables[collidableIndexB].m_radius; + float4 convexPos = posA; + float4 convexOrn = ornA; + + computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, + rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, + spherePos,sphereRadius,convexPos,convexOrn); + + return; + } + + if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL)) + { + + float4 spherePos = rigidBodies[bodyIndexA].m_pos; + float sphereRadius = collidables[collidableIndexA].m_radius; + float4 convexPos = posB; + float4 convexOrn = ornB; + + + computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, + rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, + spherePos,sphereRadius,convexPos,convexOrn); + + return; + } + }// if (i<numCompoundPairs) +} + + +bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p ) +{ + + const float4* p1 = &vertices[0]; + const float4* p2 = &vertices[1]; + const float4* p3 = &vertices[2]; + + float4 edge1; edge1 = (*p2 - *p1); + float4 edge2; edge2 = ( *p3 - *p2 ); + float4 edge3; edge3 = ( *p1 - *p3 ); + + + float4 p1_to_p; p1_to_p = ( *p - *p1 ); + float4 p2_to_p; p2_to_p = ( *p - *p2 ); + float4 p3_to_p; p3_to_p = ( *p - *p3 ); + + float4 edge1_normal; edge1_normal = ( cross(edge1,*normal)); + float4 edge2_normal; edge2_normal = ( cross(edge2,*normal)); + float4 edge3_normal; edge3_normal = ( cross(edge3,*normal)); + + + + float r1, r2, r3; + r1 = dot(edge1_normal,p1_to_p ); + r2 = dot(edge2_normal,p2_to_p ); + r3 = dot(edge3_normal,p3_to_p ); + + if ( r1 > 0 && r2 > 0 && r3 > 0 ) + return true; + if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) + return true; + return false; + +} + + +float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) +{ + float4 diff = p - from; + float4 v = to - from; + float t = dot(v,diff); + + if (t > 0) + { + float dotVV = dot(v,v); + if (t < dotVV) + { + t /= dotVV; + diff -= t*v; + } else + { + t = 1; + diff -= v; + } + } else + { + t = 0; + } + *nearest = from + t*v; + return dot(diff,diff); +} + + +void computeContactSphereTriangle(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + const float4* triangleVertices, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int maxContactCapacity, + float4 spherePos2, + float radius, + float4 pos, + float4 quat, + int faceIndex + ) +{ + + float4 invPos; + float4 invOrn; + + trInverse(pos,quat, &invPos,&invOrn); + float4 spherePos = transform(&spherePos2,&invPos,&invOrn); + int numFaces = 3; + float4 closestPnt = (float4)(0, 0, 0, 0); + float4 hitNormalWorld = (float4)(0, 0, 0, 0); + float minDist = -1000000.f; + bool bCollide = false; + + + ////////////////////////////////////// + + float4 sphereCenter; + sphereCenter = spherePos; + + const float4* vertices = triangleVertices; + float contactBreakingThreshold = 0.f;//todo? + float radiusWithThreshold = radius + contactBreakingThreshold; + float4 edge10; + edge10 = vertices[1]-vertices[0]; + edge10.w = 0.f;//is this needed? + float4 edge20; + edge20 = vertices[2]-vertices[0]; + edge20.w = 0.f;//is this needed? + float4 normal = cross3(edge10,edge20); + normal = normalize(normal); + float4 p1ToCenter; + p1ToCenter = sphereCenter - vertices[0]; + + float distanceFromPlane = dot(p1ToCenter,normal); + + if (distanceFromPlane < 0.f) + { + //triangle facing the other way + distanceFromPlane *= -1.f; + normal *= -1.f; + } + hitNormalWorld = normal; + + bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold; + + // Check for contact / intersection + bool hasContact = false; + float4 contactPoint; + if (isInsideContactPlane) + { + + if (pointInTriangle(vertices,&normal, &sphereCenter)) + { + // Inside the contact wedge - touches a point on the shell plane + hasContact = true; + contactPoint = sphereCenter - normal*distanceFromPlane; + + } else { + // Could be inside one of the contact capsules + float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold; + float4 nearestOnEdge; + int numEdges = 3; + for (int i = 0; i < numEdges; i++) + { + float4 pa =vertices[i]; + float4 pb = vertices[(i+1)%3]; + + float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge); + if (distanceSqr < contactCapsuleRadiusSqr) + { + // Yep, we're inside a capsule + hasContact = true; + contactPoint = nearestOnEdge; + + } + + } + } + } + + if (hasContact) + { + + closestPnt = contactPoint; + float4 contactToCenter = sphereCenter - contactPoint; + minDist = length(contactToCenter); + if (minDist>FLT_EPSILON) + { + hitNormalWorld = normalize(contactToCenter);//*(1./minDist); + bCollide = true; + } + + } + + + ///////////////////////////////////// + + if (bCollide && minDist > -10000) + { + + float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); + float4 pOnB1 = transform(&closestPnt,&pos,&quat); + float actualDepth = minDist-radius; + + + if (actualDepth<=0.f) + { + pOnB1.w = actualDepth; + int dstIdx; + + + float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1); + if (lenSqr>FLT_EPSILON) + { + AppendInc( nGlobalContactsOut, dstIdx ); + + if (dstIdx < maxContactCapacity) + { + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normalOnSurfaceB1; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_worldPosB[0] = pOnB1; + + c->m_childIndexA = -1; + c->m_childIndexB = faceIndex; + + GET_NPOINTS(*c) = 1; + } + } + + } + }//if (hasCollision) + +} + + + +// work-in-progress +__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numConcavePairs, int maxContactCapacity + ) +{ + + int i = get_global_id(0); + if (i>=numConcavePairs) + return; + int pairIdx = i; + + int bodyIndexA = concavePairs[i].x; + int bodyIndexB = concavePairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE) + { + int f = concavePairs[i].z; + btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + + float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + } + + float4 spherePos = rigidBodies[bodyIndexB].m_pos; + float sphereRadius = collidables[collidableIndexB].m_radius; + float4 convexPos = rigidBodies[bodyIndexA].m_pos; + float4 convexOrn = rigidBodies[bodyIndexA].m_quat; + + computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, + rigidBodies,collidables, + verticesA, + globalContactsOut, nGlobalContactsOut,maxContactCapacity, + spherePos,sphereRadius,convexPos,convexOrn, f); + + return; + } +}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h new file mode 100644 index 0000000000..b0103fe674 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h @@ -0,0 +1,1289 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* primitiveContactsKernelsCL= \ +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +"#define B3_LARGE_FLOAT 1e18f\n" +"#define B3_INFINITY 1e18f\n" +"#define b3Assert(a)\n" +"#define b3ConstArray(a) __global const a*\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" +"#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" +"#define B3_STATIC\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" +"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +"{\n" +" float maxDot = -B3_INFINITY;\n" +" int i = 0;\n" +" int ptIndex = -1;\n" +" for( i = 0; i < vecLen; i++ )\n" +" {\n" +" float dot = b3Dot3F4(vecArray[i],vec);\n" +" \n" +" if( dot > maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPosB[4];\n" +"// b3Float4 m_localPosA[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormalOnB.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormalOnB.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" +"#define SHAPE_CONVEX_HULL 3\n" +"#define SHAPE_PLANE 4\n" +"#define SHAPE_CONCAVE_TRIMESH 5\n" +"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +"#define SHAPE_SPHERE 7\n" +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +"#ifdef cl_ext_atomic_counters_32\n" +"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +"#else\n" +"#define counter32_t volatile __global int*\n" +"#endif\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GET_NUM_GROUPS get_num_groups(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"#define AppendInc(x, out) out = atomic_inc(x)\n" +"#define AtomAdd(x, value) atom_add(&(x), value)\n" +"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +"#define max2 max\n" +"#define min2 min\n" +"typedef unsigned int u32;\n" +"typedef struct \n" +"{\n" +" union\n" +" {\n" +" float4 m_min;\n" +" float m_minElems[4];\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float4 m_max;\n" +" float m_maxElems[4];\n" +" int m_maxIndices[4];\n" +" };\n" +"} btAabbCL;\n" +"///keep this in sync with btCollidable.h\n" +"typedef struct\n" +"{\n" +" int m_numChildShapes;\n" +" float m_radius;\n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +" \n" +"} btCollidableGpu;\n" +"typedef struct\n" +"{\n" +" float4 m_childPosition;\n" +" float4 m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"} btGpuChildShape;\n" +"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +" u32 m_collidableIdx; \n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} BodyData;\n" +"typedef struct \n" +"{\n" +" float4 m_localCenter;\n" +" float4 m_extents;\n" +" float4 mC;\n" +" float4 mE;\n" +" \n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" \n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"} ConvexPolyhedronCL;\n" +"typedef struct\n" +"{\n" +" float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +"} btGpuFace;\n" +"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +"#define make_float4 (float4)\n" +"#define make_float2 (float2)\n" +"#define make_uint4 (uint4)\n" +"#define make_int4 (int4)\n" +"#define make_uint2 (uint2)\n" +"#define make_int2 (int2)\n" +"__inline\n" +"float fastDiv(float numerator, float denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"// return numerator/denominator; \n" +"}\n" +"__inline\n" +"float4 fastDiv4(float4 numerator, float4 denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"}\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +"}\n" +"//#define dot3F4 dot\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +"float4 fastNormalize4(float4 v)\n" +"{\n" +" return fast_normalize(v);\n" +"}\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"typedef float4 Quaternion;\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fastNormalize4(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"void trInverse(float4 translationIn, Quaternion orientationIn,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtInvert(orientationIn);\n" +" *translationOut = qtRotate(*orientationOut, -translationIn);\n" +"}\n" +"void trMul(float4 translationA, Quaternion orientationA,\n" +" float4 translationB, Quaternion orientationB,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtMul(orientationA,orientationB);\n" +" *translationOut = transform(&translationB,&translationA,&orientationA);\n" +"}\n" +"__inline\n" +"float4 normalize3(const float4 a)\n" +"{\n" +" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +" return fastNormalize4( n );\n" +"}\n" +"__inline float4 lerp3(const float4 a,const float4 b, float t)\n" +"{\n" +" return make_float4( a.x + (b.x - a.x) * t,\n" +" a.y + (b.y - a.y) * t,\n" +" a.z + (b.z - a.z) * t,\n" +" 0.f);\n" +"}\n" +"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" +"{\n" +" float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" +" float dist = dot3F4(n, point) + planeEqn.w;\n" +" *closestPointOnFace = point - dist * n;\n" +" return dist;\n" +"}\n" +"inline bool IsPointInPolygon(float4 p, \n" +" const btGpuFace* face,\n" +" __global const float4* baseVertex,\n" +" __global const int* convexIndices,\n" +" float4* out)\n" +"{\n" +" float4 a;\n" +" float4 b;\n" +" float4 ab;\n" +" float4 ap;\n" +" float4 v;\n" +" float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" +" \n" +" if (face->m_numIndices<2)\n" +" return false;\n" +" \n" +" float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" +" \n" +" b = v0;\n" +" for(unsigned i=0; i != face->m_numIndices; ++i)\n" +" {\n" +" a = b;\n" +" float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" +" b = vi;\n" +" ab = b-a;\n" +" ap = p-a;\n" +" v = cross3(ab,plane);\n" +" if (dot(ap, v) > 0.f)\n" +" {\n" +" float ab_m2 = dot(ab, ab);\n" +" float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" +" if (rt <= 0.f)\n" +" {\n" +" *out = a;\n" +" }\n" +" else if (rt >= 1.f) \n" +" {\n" +" *out = b;\n" +" }\n" +" else\n" +" {\n" +" float s = 1.f - rt;\n" +" out[0].x = s * a.x + rt * b.x;\n" +" out[0].y = s * a.y + rt * b.y;\n" +" out[0].z = s * a.z + rt * b.z;\n" +" }\n" +" return false;\n" +" }\n" +" }\n" +" return true;\n" +"}\n" +"void computeContactSphereConvex(int pairIndex,\n" +" int bodyIndexA, int bodyIndexB, \n" +" int collidableIndexA, int collidableIndexB, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes,\n" +" __global const float4* convexVertices,\n" +" __global const int* convexIndices,\n" +" __global const btGpuFace* faces,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int maxContactCapacity,\n" +" float4 spherePos2,\n" +" float radius,\n" +" float4 pos,\n" +" float4 quat\n" +" )\n" +"{\n" +" float4 invPos;\n" +" float4 invOrn;\n" +" trInverse(pos,quat, &invPos,&invOrn);\n" +" float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" +" int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" +" int numFaces = convexShapes[shapeIndex].m_numFaces;\n" +" float4 closestPnt = (float4)(0, 0, 0, 0);\n" +" float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" +" float minDist = -1000000.f;\n" +" bool bCollide = true;\n" +" for ( int f = 0; f < numFaces; f++ )\n" +" {\n" +" btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" +" // set up a plane equation \n" +" float4 planeEqn;\n" +" float4 n1 = face.m_plane;\n" +" n1.w = 0.f;\n" +" planeEqn = n1;\n" +" planeEqn.w = face.m_plane.w;\n" +" \n" +" \n" +" // compute a signed distance from the vertex in cloth to the face of rigidbody.\n" +" float4 pntReturn;\n" +" float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" +" // If the distance is positive, the plane is a separating plane. \n" +" if ( dist > radius )\n" +" {\n" +" bCollide = false;\n" +" break;\n" +" }\n" +" if (dist>0)\n" +" {\n" +" //might hit an edge or vertex\n" +" float4 out;\n" +" float4 zeroPos = make_float4(0,0,0,0);\n" +" bool isInPoly = IsPointInPolygon(spherePos,\n" +" &face,\n" +" &convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" +" convexIndices,\n" +" &out);\n" +" if (isInPoly)\n" +" {\n" +" if (dist>minDist)\n" +" {\n" +" minDist = dist;\n" +" closestPnt = pntReturn;\n" +" hitNormalWorld = planeEqn;\n" +" \n" +" }\n" +" } else\n" +" {\n" +" float4 tmp = spherePos-out;\n" +" float l2 = dot(tmp,tmp);\n" +" if (l2<radius*radius)\n" +" {\n" +" dist = sqrt(l2);\n" +" if (dist>minDist)\n" +" {\n" +" minDist = dist;\n" +" closestPnt = out;\n" +" hitNormalWorld = tmp/dist;\n" +" \n" +" }\n" +" \n" +" } else\n" +" {\n" +" bCollide = false;\n" +" break;\n" +" }\n" +" }\n" +" } else\n" +" {\n" +" if ( dist > minDist )\n" +" {\n" +" minDist = dist;\n" +" closestPnt = pntReturn;\n" +" hitNormalWorld.xyz = planeEqn.xyz;\n" +" }\n" +" }\n" +" \n" +" }\n" +" \n" +" if (bCollide && minDist > -10000)\n" +" {\n" +" float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" +" float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" +" \n" +" float actualDepth = minDist-radius;\n" +" if (actualDepth<=0.f)\n" +" {\n" +" \n" +" pOnB1.w = actualDepth;\n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" \n" +" if (1)//dstIdx < maxContactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +" c->m_worldPosB[0] = pOnB1;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" GET_NPOINTS(*c) = 1;\n" +" } \n" +" }\n" +" }//if (hasCollision)\n" +"}\n" +" \n" +"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" +"{\n" +" if( nPoints == 0 )\n" +" return 0;\n" +" \n" +" if (nPoints <=4)\n" +" return nPoints;\n" +" \n" +" \n" +" if (nPoints >64)\n" +" nPoints = 64;\n" +" \n" +" float4 center = make_float4(0.f);\n" +" {\n" +" \n" +" for (int i=0;i<nPoints;i++)\n" +" center += p[i];\n" +" center /= (float)nPoints;\n" +" }\n" +" \n" +" \n" +" \n" +" // sample 4 directions\n" +" \n" +" float4 aVector = p[0] - center;\n" +" float4 u = cross3( nearNormal, aVector );\n" +" float4 v = cross3( nearNormal, u );\n" +" u = normalize3( u );\n" +" v = normalize3( v );\n" +" \n" +" \n" +" //keep point with deepest penetration\n" +" float minW= FLT_MAX;\n" +" \n" +" int minIndex=-1;\n" +" \n" +" float4 maxDots;\n" +" maxDots.x = FLT_MIN;\n" +" maxDots.y = FLT_MIN;\n" +" maxDots.z = FLT_MIN;\n" +" maxDots.w = FLT_MIN;\n" +" \n" +" // idx, distance\n" +" for(int ie = 0; ie<nPoints; ie++ )\n" +" {\n" +" if (p[ie].w<minW)\n" +" {\n" +" minW = p[ie].w;\n" +" minIndex=ie;\n" +" }\n" +" float f;\n" +" float4 r = p[ie]-center;\n" +" f = dot3F4( u, r );\n" +" if (f<maxDots.x)\n" +" {\n" +" maxDots.x = f;\n" +" contactIdx[0].x = ie;\n" +" }\n" +" \n" +" f = dot3F4( -u, r );\n" +" if (f<maxDots.y)\n" +" {\n" +" maxDots.y = f;\n" +" contactIdx[0].y = ie;\n" +" }\n" +" \n" +" \n" +" f = dot3F4( v, r );\n" +" if (f<maxDots.z)\n" +" {\n" +" maxDots.z = f;\n" +" contactIdx[0].z = ie;\n" +" }\n" +" \n" +" f = dot3F4( -v, r );\n" +" if (f<maxDots.w)\n" +" {\n" +" maxDots.w = f;\n" +" contactIdx[0].w = ie;\n" +" }\n" +" \n" +" }\n" +" \n" +" if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" +" {\n" +" //replace the first contact with minimum (todo: replace contact with least penetration)\n" +" contactIdx[0].x = minIndex;\n" +" }\n" +" \n" +" return 4;\n" +" \n" +"}\n" +"#define MAX_PLANE_CONVEX_POINTS 64\n" +"int computeContactPlaneConvex(int pairIndex,\n" +" int bodyIndexA, int bodyIndexB, \n" +" int collidableIndexA, int collidableIndexB, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu*collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes,\n" +" __global const float4* convexVertices,\n" +" __global const int* convexIndices,\n" +" __global const btGpuFace* faces,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int maxContactCapacity,\n" +" float4 posB,\n" +" Quaternion ornB\n" +" )\n" +"{\n" +" int resultIndex=-1;\n" +" int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" +" __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" +" \n" +" float4 posA;\n" +" posA = rigidBodies[bodyIndexA].m_pos;\n" +" Quaternion ornA;\n" +" ornA = rigidBodies[bodyIndexA].m_quat;\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +" float4 planeEq;\n" +" planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +" float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +" float4 planeNormalWorld;\n" +" planeNormalWorld = qtRotate(ornA,planeNormal);\n" +" float planeConstant = planeEq.w;\n" +" \n" +" float4 invPosA;Quaternion invOrnA;\n" +" float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +" {\n" +" \n" +" trInverse(posA,ornA,&invPosA,&invOrnA);\n" +" trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +" }\n" +" float4 invPosB;Quaternion invOrnB;\n" +" float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" +" {\n" +" \n" +" trInverse(posB,ornB,&invPosB,&invOrnB);\n" +" trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); \n" +" }\n" +" \n" +" float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" +" float maxDot = -1e30;\n" +" int hitVertex=-1;\n" +" float4 hitVtx;\n" +" float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" +" int numPoints = 0;\n" +" int4 contactIdx;\n" +" contactIdx=make_int4(0,1,2,3);\n" +" \n" +" \n" +" for (int i=0;i<hullB->m_numVertices;i++)\n" +" {\n" +" float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" +" float curDot = dot(vtx,planeNormalInConvex);\n" +" if (curDot>maxDot)\n" +" {\n" +" hitVertex=i;\n" +" maxDot=curDot;\n" +" hitVtx = vtx;\n" +" //make sure the deepest points is always included\n" +" if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" +" numPoints--;\n" +" }\n" +" if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" +" {\n" +" float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" +" float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" +" float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" +" if (dist<0.f)\n" +" {\n" +" vtxWorld.w = dist;\n" +" contactPoints[numPoints] = vtxWorld;\n" +" numPoints++;\n" +" }\n" +" }\n" +" }\n" +" int numReducedPoints = numPoints;\n" +" if (numPoints>4)\n" +" {\n" +" numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" +" }\n" +" if (numReducedPoints>0)\n" +" {\n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" if (dstIdx < maxContactCapacity)\n" +" {\n" +" resultIndex = dstIdx;\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = -planeNormalWorld;\n" +" //c->setFrictionCoeff(0.7);\n" +" //c->setRestituitionCoeff(0.f);\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" switch (numReducedPoints)\n" +" {\n" +" case 4:\n" +" c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" +" case 3:\n" +" c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" +" case 2:\n" +" c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" +" case 1:\n" +" c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" +" default:\n" +" {\n" +" }\n" +" };\n" +" \n" +" GET_NPOINTS(*c) = numReducedPoints;\n" +" }//if (dstIdx < numPairs)\n" +" } \n" +" return resultIndex;\n" +"}\n" +"void computeContactPlaneSphere(int pairIndex,\n" +" int bodyIndexA, int bodyIndexB, \n" +" int collidableIndexA, int collidableIndexB, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const btGpuFace* faces,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int maxContactCapacity)\n" +"{\n" +" float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" +" float radius = collidables[collidableIndexB].m_radius;\n" +" float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" +" float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" +" float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" +" float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" +" \n" +" bool hasCollision = false;\n" +" float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" +" float planeConstant = planeEq.w;\n" +" float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" +" {\n" +" float4 invPosA;Quaternion invOrnA;\n" +" trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" +" trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +" }\n" +" float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" +" {\n" +" float4 invPosB;Quaternion invOrnB;\n" +" trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" +" trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); \n" +" }\n" +" float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" +" float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" +" float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" +" hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" +" if (hasCollision)\n" +" {\n" +" float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1;\n" +" float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" +" float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" +" float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" +" pOnB1.w = distance;\n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" if (dstIdx < maxContactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +" c->m_worldPosB[0] = pOnB1;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" GET_NPOINTS(*c) = 1;\n" +" }//if (dstIdx < numPairs)\n" +" }//if (hasCollision)\n" +"}\n" +"__kernel void primitiveContactsKernel( __global int4* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numPairs, int maxContactCapacity)\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" float4 worldVertsB1[64];\n" +" float4 worldVertsB2[64];\n" +" int capacityWorldVerts = 64; \n" +" float4 localContactsOut[64];\n" +" int localContactCapacity=64;\n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" if (i<numPairs)\n" +" {\n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" +" {\n" +" float4 posB;\n" +" posB = rigidBodies[bodyIndexB].m_pos;\n" +" Quaternion ornB;\n" +" ornB = rigidBodies[bodyIndexB].m_quat;\n" +" int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,\n" +" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" +" if (contactIndex>=0)\n" +" pairs[pairIndex].z = contactIndex;\n" +" return;\n" +" }\n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" +" {\n" +" float4 posA;\n" +" posA = rigidBodies[bodyIndexA].m_pos;\n" +" Quaternion ornA;\n" +" ornA = rigidBodies[bodyIndexA].m_quat;\n" +" int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,\n" +" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" +" if (contactIndex>=0)\n" +" pairs[pairIndex].z = contactIndex;\n" +" return;\n" +" }\n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +" rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" +" return;\n" +" }\n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" +" {\n" +" computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" +" rigidBodies,collidables,\n" +" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" +" return;\n" +" }\n" +" \n" +" \n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" +" {\n" +" \n" +" float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" +" float sphereRadius = collidables[collidableIndexA].m_radius;\n" +" float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" +" float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" +" computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +" spherePos,sphereRadius,convexPos,convexOrn);\n" +" return;\n" +" }\n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" \n" +" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +" float sphereRadius = collidables[collidableIndexB].m_radius;\n" +" float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" +" float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" +" computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +" spherePos,sphereRadius,convexPos,convexOrn);\n" +" return;\n" +" }\n" +" \n" +" \n" +" \n" +" \n" +" \n" +" \n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" //sphere-sphere\n" +" float radiusA = collidables[collidableIndexA].m_radius;\n" +" float radiusB = collidables[collidableIndexB].m_radius;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" float4 diff = posA-posB;\n" +" float len = length(diff);\n" +" \n" +" ///iff distance positive, don't generate a new contact\n" +" if ( len <= (radiusA+radiusB))\n" +" {\n" +" ///distance (negative means penetration)\n" +" float dist = len - (radiusA+radiusB);\n" +" float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" +" if (len > 0.00001)\n" +" {\n" +" normalOnSurfaceB = diff / len;\n" +" }\n" +" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +" contactPosB.w = dist;\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" if (dstIdx < maxContactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = normalOnSurfaceB;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_worldPosB[0] = contactPosB;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" GET_NPOINTS(*c) = 1;\n" +" }//if (dstIdx < numPairs)\n" +" }//if ( len <= (radiusA+radiusB))\n" +" return;\n" +" }//SHAPE_SPHERE SHAPE_SPHERE\n" +" }// if (i<numPairs)\n" +"}\n" +"// work-in-progress\n" +"__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numCompoundPairs, int maxContactCapacity\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i<numCompoundPairs)\n" +" {\n" +" int bodyIndexA = gpuCompoundPairs[i].x;\n" +" int bodyIndexB = gpuCompoundPairs[i].y;\n" +" int childShapeIndexA = gpuCompoundPairs[i].z;\n" +" int childShapeIndexB = gpuCompoundPairs[i].w;\n" +" \n" +" int collidableIndexA = -1;\n" +" int collidableIndexB = -1;\n" +" \n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" \n" +" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" \n" +" if (childShapeIndexA >= 0)\n" +" {\n" +" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +" float4 newOrnA = qtMul(ornA,childOrnA);\n" +" posA = newPosA;\n" +" ornA = newOrnA;\n" +" } else\n" +" {\n" +" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" }\n" +" \n" +" if (childShapeIndexB>=0)\n" +" {\n" +" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" } else\n" +" {\n" +" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" +" }\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" +" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +" int pairIndex = i;\n" +" if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" +" {\n" +" computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,\n" +" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" +" return;\n" +" }\n" +" if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" +" {\n" +" computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,\n" +" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" +" return;\n" +" }\n" +" if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" +" {\n" +" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +" float sphereRadius = collidables[collidableIndexB].m_radius;\n" +" float4 convexPos = posA;\n" +" float4 convexOrn = ornA;\n" +" \n" +" computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +" spherePos,sphereRadius,convexPos,convexOrn);\n" +" \n" +" return;\n" +" }\n" +" if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" +" {\n" +" float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" +" float sphereRadius = collidables[collidableIndexA].m_radius;\n" +" float4 convexPos = posB;\n" +" float4 convexOrn = ornB;\n" +" \n" +" computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" +" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +" spherePos,sphereRadius,convexPos,convexOrn);\n" +" \n" +" return;\n" +" }\n" +" }// if (i<numCompoundPairs)\n" +"}\n" +"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" +"{\n" +" const float4* p1 = &vertices[0];\n" +" const float4* p2 = &vertices[1];\n" +" const float4* p3 = &vertices[2];\n" +" float4 edge1; edge1 = (*p2 - *p1);\n" +" float4 edge2; edge2 = ( *p3 - *p2 );\n" +" float4 edge3; edge3 = ( *p1 - *p3 );\n" +" \n" +" float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" +" float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" +" float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" +" float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" +" float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" +" float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" +" \n" +" \n" +" float r1, r2, r3;\n" +" r1 = dot(edge1_normal,p1_to_p );\n" +" r2 = dot(edge2_normal,p2_to_p );\n" +" r3 = dot(edge3_normal,p3_to_p );\n" +" \n" +" if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" +" return true;\n" +" if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" +" return true;\n" +" return false;\n" +"}\n" +"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" +"{\n" +" float4 diff = p - from;\n" +" float4 v = to - from;\n" +" float t = dot(v,diff);\n" +" \n" +" if (t > 0) \n" +" {\n" +" float dotVV = dot(v,v);\n" +" if (t < dotVV) \n" +" {\n" +" t /= dotVV;\n" +" diff -= t*v;\n" +" } else \n" +" {\n" +" t = 1;\n" +" diff -= v;\n" +" }\n" +" } else\n" +" {\n" +" t = 0;\n" +" }\n" +" *nearest = from + t*v;\n" +" return dot(diff,diff); \n" +"}\n" +"void computeContactSphereTriangle(int pairIndex,\n" +" int bodyIndexA, int bodyIndexB,\n" +" int collidableIndexA, int collidableIndexB, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" const float4* triangleVertices,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int maxContactCapacity,\n" +" float4 spherePos2,\n" +" float radius,\n" +" float4 pos,\n" +" float4 quat,\n" +" int faceIndex\n" +" )\n" +"{\n" +" float4 invPos;\n" +" float4 invOrn;\n" +" trInverse(pos,quat, &invPos,&invOrn);\n" +" float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" +" int numFaces = 3;\n" +" float4 closestPnt = (float4)(0, 0, 0, 0);\n" +" float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" +" float minDist = -1000000.f;\n" +" bool bCollide = false;\n" +" \n" +" //////////////////////////////////////\n" +" float4 sphereCenter;\n" +" sphereCenter = spherePos;\n" +" const float4* vertices = triangleVertices;\n" +" float contactBreakingThreshold = 0.f;//todo?\n" +" float radiusWithThreshold = radius + contactBreakingThreshold;\n" +" float4 edge10;\n" +" edge10 = vertices[1]-vertices[0];\n" +" edge10.w = 0.f;//is this needed?\n" +" float4 edge20;\n" +" edge20 = vertices[2]-vertices[0];\n" +" edge20.w = 0.f;//is this needed?\n" +" float4 normal = cross3(edge10,edge20);\n" +" normal = normalize(normal);\n" +" float4 p1ToCenter;\n" +" p1ToCenter = sphereCenter - vertices[0];\n" +" \n" +" float distanceFromPlane = dot(p1ToCenter,normal);\n" +" if (distanceFromPlane < 0.f)\n" +" {\n" +" //triangle facing the other way\n" +" distanceFromPlane *= -1.f;\n" +" normal *= -1.f;\n" +" }\n" +" hitNormalWorld = normal;\n" +" bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" +" \n" +" // Check for contact / intersection\n" +" bool hasContact = false;\n" +" float4 contactPoint;\n" +" if (isInsideContactPlane) \n" +" {\n" +" \n" +" if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" +" {\n" +" // Inside the contact wedge - touches a point on the shell plane\n" +" hasContact = true;\n" +" contactPoint = sphereCenter - normal*distanceFromPlane;\n" +" \n" +" } else {\n" +" // Could be inside one of the contact capsules\n" +" float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" +" float4 nearestOnEdge;\n" +" int numEdges = 3;\n" +" for (int i = 0; i < numEdges; i++) \n" +" {\n" +" float4 pa =vertices[i];\n" +" float4 pb = vertices[(i+1)%3];\n" +" float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" +" if (distanceSqr < contactCapsuleRadiusSqr) \n" +" {\n" +" // Yep, we're inside a capsule\n" +" hasContact = true;\n" +" contactPoint = nearestOnEdge;\n" +" \n" +" }\n" +" \n" +" }\n" +" }\n" +" }\n" +" if (hasContact) \n" +" {\n" +" closestPnt = contactPoint;\n" +" float4 contactToCenter = sphereCenter - contactPoint;\n" +" minDist = length(contactToCenter);\n" +" if (minDist>FLT_EPSILON)\n" +" {\n" +" hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" +" bCollide = true;\n" +" }\n" +" \n" +" }\n" +" /////////////////////////////////////\n" +" if (bCollide && minDist > -10000)\n" +" {\n" +" \n" +" float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" +" float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" +" float actualDepth = minDist-radius;\n" +" \n" +" if (actualDepth<=0.f)\n" +" {\n" +" pOnB1.w = actualDepth;\n" +" int dstIdx;\n" +" \n" +" float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" +" if (lenSqr>FLT_EPSILON)\n" +" {\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" \n" +" if (dstIdx < maxContactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = -normalOnSurfaceB1;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" +" c->m_worldPosB[0] = pOnB1;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = faceIndex;\n" +" GET_NPOINTS(*c) = 1;\n" +" } \n" +" }\n" +" }\n" +" }//if (hasCollision)\n" +"}\n" +"// work-in-progress\n" +"__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n" +" __global const BodyData* rigidBodies,\n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numConcavePairs, int maxContactCapacity\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i>=numConcavePairs)\n" +" return;\n" +" int pairIdx = i;\n" +" int bodyIndexA = concavePairs[i].x;\n" +" int bodyIndexB = concavePairs[i].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" +" {\n" +" int f = concavePairs[i].z;\n" +" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +" \n" +" float4 verticesA[3];\n" +" for (int i=0;i<3;i++)\n" +" {\n" +" int index = indices[face.m_indexOffset+i];\n" +" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +" verticesA[i] = vert;\n" +" }\n" +" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" +" float sphereRadius = collidables[collidableIndexB].m_radius;\n" +" float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" +" float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" +" computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" +" rigidBodies,collidables,\n" +" verticesA,\n" +" globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" +" spherePos,sphereRadius,convexPos,convexOrn, f);\n" +" return;\n" +" }\n" +"}\n" +; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl new file mode 100644 index 0000000000..a6565fd6fa --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl @@ -0,0 +1,2018 @@ +//keep this enum in sync with the CPU version (in btCollidable.h) +//written by Erwin Coumans + + +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_CONCAVE_TRIMESH 5 +#define TRIANGLE_NUM_CONVEX_FACES 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 + +#define B3_MAX_STACK_DEPTH 256 + + +typedef unsigned int u32; + +///keep this in sync with btCollidable.h +typedef struct +{ + union { + int m_numChildShapes; + int m_bvhIndex; + }; + union + { + float m_radius; + int m_compoundBvhIndex; + }; + + int m_shapeType; + int m_shapeIndex; + +} btCollidableGpu; + +#define MAX_NUM_PARTS_IN_BITS 10 + +///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes + int m_escapeIndexOrTriangleIndex; +} b3QuantizedBvhNode; + +typedef struct +{ + float4 m_aabbMin; + float4 m_aabbMax; + float4 m_quantization; + int m_numNodes; + int m_numSubTrees; + int m_nodeOffset; + int m_subTreeOffset; + +} b3BvhInfo; + + +int getTriangleIndex(const b3QuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int isLeafNode(const b3QuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int getEscapeIndex(const b3QuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + +int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + + +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; +} b3BvhSubtreeInfo; + + + + + + + +typedef struct +{ + float4 m_childPosition; + float4 m_childOrientation; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +} btGpuChildShape; + + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + + +typedef struct +{ + float4 m_localCenter; + float4 m_extents; + float4 mC; + float4 mE; + + float m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; +} ConvexPolyhedronCL; + +typedef struct +{ + union + { + float4 m_min; + float m_minElems[4]; + int m_minIndices[4]; + }; + union + { + float4 m_max; + float m_maxElems[4]; + int m_maxIndices[4]; + }; +} btAabbCL; + +#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" +#include "Bullet3Common/shared/b3Int2.h" + + + +typedef struct +{ + float4 m_plane; + int m_indexOffset; + int m_numIndices; +} btGpuFace; + +#define make_float4 (float4) + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); + + +// float4 a1 = make_float4(a.xyz,0.f); +// float4 b1 = make_float4(b.xyz,0.f); + +// return cross(a1,b1); + +//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); + + // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); + + //return c; +} + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float4 fastNormalize4(float4 v) +{ + v = make_float4(v.xyz,0.f); + return fast_normalize(v); +} + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +} + +inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, +const float4* dir, const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;i<numVerts;i++) + { + float dp = dot(vertices[hull->m_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + +inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, +const float4* dir, __global const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;i<numVerts;i++) + { + float dp = dot(vertices[hull->m_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + +inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA,const float4 ornA, + const float4 posB,const float4 ornB, + float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) +{ + float Min0,Max0; + float Min1,Max1; + projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); + project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + *depth = d0<d1 ? d0:d1; + return true; +} + + + + +inline bool IsAlmostZero(const float4 v) +{ + if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) + return false; + return true; +} + + + +bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + + const float4* verticesA, + const float4* uniqueEdgesA, + const btGpuFace* facesA, + const int* indicesA, + + __global const float4* verticesB, + __global const float4* uniqueEdgesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + int curPlaneTests=0; + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;i<numFacesA;i++) + { + const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + curPlaneTests++; + float d; + if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + +bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* verticesA, + __global const float4* uniqueEdgesA, + __global const btGpuFace* facesA, + __global const int* indicesA, + const float4* verticesB, + const float4* uniqueEdgesB, + const btGpuFace* facesB, + const int* indicesB, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + int curPlaneTests=0; + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;i<numFacesA;i++) + { + const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS *= -1.f; + curPlaneTests++; + float d; + if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + + +bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + const float4* verticesA, + const float4* uniqueEdgesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const float4* uniqueEdgesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = qtRotate(ornA,edge0); + + for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = qtRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje *= -1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + result = false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + dist = d0<d1 ? d0:d1; + result = true; + + } + + + if(dist<*dmin) + { + *dmin = dist; + *sep = crossje; + } + } + } + + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + +inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA,const float4 ornA, + const float4 posB,const float4 ornB, + float4* sep_axis, __global const float4* vertices,float* depth) +{ + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0); + project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + *depth = d0<d1 ? d0:d1; + return true; +} + + +bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;i<numFacesA;i++) + { + const float4 normal = faces[hullA->m_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + + curPlaneTests++; + + float d; + if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d)) + return false; + + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + + return true; +} + + + + +bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* vertices, + __global const float4* unitSphereDirections, + int numUnitSphereDirections, + float4* sep, + float* dmin) +{ + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test unit sphere directions + for (int i=0;i<numUnitSphereDirections;i++) + { + + float4 crossje; + crossje = unitSphereDirections[i]; + + if (dot3F4(DeltaC2,crossje)>0) + crossje *= -1.f; + { + float dist; + bool result = true; + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + dist = d0<d1 ? d0:d1; + result = true; + + if(dist<*dmin) + { + *dmin = dist; + *sep = crossje; + } + } + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + +bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = qtRotate(ornA,edge0); + + for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = qtRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje*=-1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + dist = d0<d1 ? d0:d1; + result = true; + + } + + + if(dist<*dmin) + { + *dmin = dist; + *sep = crossje; + } + } + } + + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + +// work-in-progress +__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global const btGpuChildShape* gpuChildShapes, + __global volatile float4* gpuCompoundSepNormalsOut, + __global volatile int* gpuHasCompoundSepNormalsOut, + int numCompoundPairs + ) +{ + + int i = get_global_id(0); + if (i<numCompoundPairs) + { + int bodyIndexA = gpuCompoundPairs[i].x; + int bodyIndexB = gpuCompoundPairs[i].y; + + int childShapeIndexA = gpuCompoundPairs[i].z; + int childShapeIndexB = gpuCompoundPairs[i].w; + + int collidableIndexA = -1; + int collidableIndexB = -1; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 posA = rigidBodies[bodyIndexA].m_pos; + + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + if (childShapeIndexA >= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + gpuHasCompoundSepNormalsOut[i] = 0; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int shapeTypeA = collidables[collidableIndexA].m_shapeType; + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + + + if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) + { + return; + } + + int hasSeparatingAxis = 5; + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + posA.w = 0.f; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal = make_float4(1,0,0,0); + bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + hasSeparatingAxis = 4; + if (!sepA) + { + hasSeparatingAxis = 0; + } else + { + bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + + if (!sepB) + { + hasSeparatingAxis = 0; + } else//(!sepB) + { + bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + if (sepEE) + { + gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); + gpuHasCompoundSepNormalsOut[i] = 1; + }//sepEE + }//(!sepB) + }//(!sepA) + + + } + +} + + +inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) +{ + b3Float4 vecOut; + vecOut = b3MakeFloat4( + (float)(vecIn[0]) / (quantization.x), + (float)(vecIn[1]) / (quantization.y), + (float)(vecIn[2]) / (quantization.z), + 0.f); + + vecOut += bvhAabbMin; + return vecOut; +} + +inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) +{ + b3Float4 vecOut; + vecOut = b3MakeFloat4( + (float)(vecIn[0]) / (quantization.x), + (float)(vecIn[1]) / (quantization.y), + (float)(vecIn[2]) / (quantization.z), + 0.f); + + vecOut += bvhAabbMin; + return vecOut; +} + + +// work-in-progress +__kernel void findCompoundPairsKernel( __global const int4* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global b3Aabb_t* aabbLocalSpace, + __global const btGpuChildShape* gpuChildShapes, + __global volatile int4* gpuCompoundPairsOut, + __global volatile int* numCompoundPairsOut, + __global const b3BvhSubtreeInfo* subtrees, + __global const b3QuantizedBvhNode* quantizedNodes, + __global const b3BvhInfo* bvhInfos, + int numPairs, + int maxNumCompoundPairsCapacity + ) +{ + + int i = get_global_id(0); + + if (i<numPairs) + { + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; + int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; + int numSubTreesA = bvhInfos[bvhA].m_numSubTrees; + int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset; + int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset; + + + int numSubTreesB = bvhInfos[bvhB].m_numSubTrees; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + + for (int p=0;p<numSubTreesA;p++) + { + b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p]; + //bvhInfos[bvhA].m_quantization + b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); + b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); + + b3Float4 aabbAMinOut,aabbAMaxOut; + float margin=0.f; + b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut); + + for (int q=0;q<numSubTreesB;q++) + { + b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q]; + + b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); + b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); + + b3Float4 aabbBMinOut,aabbBMaxOut; + float margin=0.f; + b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut); + + + + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + if (aabbOverlap) + { + + int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset; + int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; + + int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset; + int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; + + + b3Int2 nodeStack[B3_MAX_STACK_DEPTH]; + b3Int2 node0; + node0.x = startNodeIndexA; + node0.y = startNodeIndexB; + int maxStackDepth = B3_MAX_STACK_DEPTH; + int depth=0; + nodeStack[depth++]=node0; + + do + { + b3Int2 node = nodeStack[--depth]; + + b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); + b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); + + b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); + b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); + + float margin=0.f; + b3Float4 aabbAMinOut,aabbAMaxOut; + b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut); + + b3Float4 aabbBMinOut,aabbBMaxOut; + b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut); + + + bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + if (nodeOverlap) + { + bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]); + bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]); + bool isInternalA = !isLeafA; + bool isInternalB = !isLeafB; + + //fail, even though it might hit two leaf nodes + if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) + { + //printf("Error: traversal exceeded maxStackDepth"); + continue; + } + + if(isInternalA) + { + int nodeAleftChild = node.x+1; + bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]); + int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]); + + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); + + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); + } + else + { + nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); + nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); + } + } + else + { + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); + } + else + { + int compoundPairIdx = atomic_inc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]); + int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]); + gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); + } + } + } + } + } while (depth); + } + } + } + + return; + } + + + + + + if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + + if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + + int numChildrenA = collidables[collidableIndexA].m_numChildShapes; + for (int c=0;c<numChildrenA;c++) + { + int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; + int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + + int shapeIndexA = collidables[childColIndexA].m_shapeIndex; + b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA]; + float margin = 0.f; + + b3Float4 aabbAMinWS; + b3Float4 aabbAMaxWS; + + b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin, + newPosA, + newOrnA, + &aabbAMinWS,&aabbAMaxWS); + + + if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + for (int b=0;b<numChildrenB;b++) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + + int shapeIndexB = collidables[childColIndexB].m_shapeIndex; + b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB]; + + b3Float4 aabbBMinWS; + b3Float4 aabbBMaxWS; + + b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin, + newPosB, + newOrnB, + &aabbBMinWS,&aabbBMaxWS); + + + + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS); + if (aabbOverlap) + { + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + float4 posA = newPosA; + posA.w = 0.f; + float4 posB = newPosB; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = newOrnA; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB =newOrnB; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + + {// + int compoundPairIdx = atomic_inc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); + } + }// + }//fi(1) + } //for (int b=0 + }//if (collidables[collidableIndexB]. + else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + if (1) + { + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + float4 posA = newPosA; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = newOrnA; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + + { + int compoundPairIdx = atomic_inc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1); + }//if (compoundPairIdx<maxNumCompoundPairsCapacity) + }// + }//fi (1) + }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + }//for (int b=0;b<numChildrenB;b++) + return; + }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) + && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + int numChildrenB = collidables[collidableIndexB].m_numChildShapes; + for (int b=0;b<numChildrenB;b++) + { + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = qtRotate(ornB,childPosB)+posB; + float4 newOrnB = qtMul(ornB,childOrnB); + + int shapeIndexB = collidables[childColIndexB].m_shapeIndex; + + + ////////////////////////////////////// + + if (1) + { + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = newPosB; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB =newOrnB; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + {// + int compoundPairIdx = atomic_inc(numCompoundPairsOut); + if (compoundPairIdx<maxNumCompoundPairsCapacity) + { + gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB); + }//fi (compoundPairIdx<maxNumCompoundPairsCapacity) + }// + }//fi (1) + }//for (int b=0;b<numChildrenB;b++) + return; + }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + return; + }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + }//i<numPairs +} + +// work-in-progress +__kernel void findSeparatingAxisKernel( __global const int4* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global volatile float4* separatingNormals, + __global volatile int* hasSeparatingAxis, + int numPairs + ) +{ + + int i = get_global_id(0); + + if (i<numPairs) + { + + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + hasSeparatingAxis[i] = 0; + return; + } + + + if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) + { + hasSeparatingAxis[i] = 0; + return; + } + + if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH)) + { + hasSeparatingAxis[i] = 0; + return; + } + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + + float dmin = FLT_MAX; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal; + + bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, + posB,ornB, + DeltaC2, + vertices,uniqueEdges,faces, + indices,&sepNormal,&dmin); + hasSeparatingAxis[i] = 4; + if (!sepA) + { + hasSeparatingAxis[i] = 0; + } else + { + bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB, + posA,ornA, + DeltaC2, + vertices,uniqueEdges,faces, + indices,&sepNormal,&dmin); + + if (!sepB) + { + hasSeparatingAxis[i] = 0; + } else + { + bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, + posB,ornB, + DeltaC2, + vertices,uniqueEdges,faces, + indices,&sepNormal,&dmin); + if (!sepEE) + { + hasSeparatingAxis[i] = 0; + } else + { + hasSeparatingAxis[i] = 1; + separatingNormals[i] = sepNormal; + } + } + } + + } + +} + + +__kernel void findSeparatingAxisVertexFaceKernel( __global const int4* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global volatile float4* separatingNormals, + __global volatile int* hasSeparatingAxis, + __global float* dmins, + int numPairs + ) +{ + + int i = get_global_id(0); + + if (i<numPairs) + { + + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + hasSeparatingAxis[i] = 0; + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + + if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) + { + return; + } + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + + float dmin = FLT_MAX; + + dmins[i] = dmin; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal; + + bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, + posB,ornB, + DeltaC2, + vertices,uniqueEdges,faces, + indices,&sepNormal,&dmin); + hasSeparatingAxis[i] = 4; + if (!sepA) + { + hasSeparatingAxis[i] = 0; + } else + { + bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB, + posA,ornA, + DeltaC2, + vertices,uniqueEdges,faces, + indices,&sepNormal,&dmin); + + if (sepB) + { + dmins[i] = dmin; + hasSeparatingAxis[i] = 1; + separatingNormals[i] = sepNormal; + } + } + + } + +} + + +__kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global btAabbCL* aabbs, + __global float4* separatingNormals, + __global int* hasSeparatingAxis, + __global float* dmins, + __global const float4* unitSphereDirections, + int numUnitSphereDirections, + int numPairs + ) +{ + + int i = get_global_id(0); + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + + float dmin = dmins[i]; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal = separatingNormals[i]; + + + + bool sepEE = false; + int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; + if (numEdgeEdgeDirections<=numUnitSphereDirections) + { + sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, + posB,ornB, + DeltaC2, + vertices,uniqueEdges,faces, + indices,&sepNormal,&dmin); + + if (!sepEE) + { + hasSeparatingAxis[i] = 0; + } else + { + hasSeparatingAxis[i] = 1; + separatingNormals[i] = sepNormal; + } + } + /* + ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy + else + { + sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, + posB,ornB, + DeltaC2, + vertices,unitSphereDirections,numUnitSphereDirections, + &sepNormal,&dmin); + if (!sepEE) + { + hasSeparatingAxis[i] = 0; + } else + { + hasSeparatingAxis[i] = 1; + separatingNormals[i] = sepNormal; + } + } + */ + } //if (hasSeparatingAxis[i]) + }//(i<numPairs) +} + + + + + +inline int findClippingFaces(const float4 separatingNormal, + const ConvexPolyhedronCL* hullA, + __global const ConvexPolyhedronCL* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + __global int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=0; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; + int numVertices = polyB.m_numIndices; + if (numVertices>capacityWorldVerts) + numVertices = capacityWorldVerts; + + for(int e0=0;e0<numVertices;e0++) + { + if (e0<capacityWorldVerts) + { + const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + } + + int closestFaceA=0; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; + if (numVerticesA>capacityWorldVerts) + numVerticesA = capacityWorldVerts; + + for(int e0=0;e0<numVerticesA;e0++) + { + if (e0<capacityWorldVerts) + { + const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + + +// work-in-progress +__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global const btGpuChildShape* gpuChildShapes, + __global btAabbCL* aabbs, + __global float4* concaveSeparatingNormalsOut, + __global int* concaveHasSeparatingNormals, + __global int4* clippingFacesOut, + __global float4* worldVertsA1GPU, + __global float4* worldNormalsAGPU, + __global float4* worldVertsB1GPU, + int vertexFaceCapacity, + int numConcavePairs + ) +{ + + int i = get_global_id(0); + if (i>=numConcavePairs) + return; + + concaveHasSeparatingNormals[i] = 0; + + int pairIdx = i; + + int bodyIndexA = concavePairs[i].x; + int bodyIndexB = concavePairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& + collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + concavePairs[pairIdx].w = -1; + return; + } + + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + int numActualConcaveConvexTests = 0; + + int f = concavePairs[i].z; + + bool overlap = false; + + ConvexPolyhedronCL convexPolyhedronA; + + //add 3 vertices of the triangle + convexPolyhedronA.m_numVertices = 3; + convexPolyhedronA.m_vertexOffset = 0; + float4 localCenter = make_float4(0.f,0.f,0.f,0.f); + + btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + float4 triMinAabb, triMaxAabb; + btAabbCL triAabb; + triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); + triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); + + float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + localCenter += vert; + + triAabb.m_min = min(triAabb.m_min,vert); + triAabb.m_max = max(triAabb.m_max,vert); + + } + + overlap = true; + overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; + overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; + overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; + + if (overlap) + { + float dmin = FLT_MAX; + int hasSeparatingAxis=5; + float4 sepAxis=make_float4(1,2,3,4); + + int localCC=0; + numActualConcaveConvexTests++; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = dot(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;i<numVertices;i++) + { + float4 v0 = verticesA[i]; + float4 v1 = verticesA[prevVertex]; + + float4 edgeNormal = normalize(cross(normal,v1-v0)); + float c = -dot(edgeNormal,v0); + + facesA[fidx].m_numIndices = 2; + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[curUsedIndices++]=i; + indicesA[curUsedIndices++]=prevVertex; + + facesA[fidx].m_plane.x = edgeNormal.x; + facesA[fidx].m_plane.y = edgeNormal.y; + facesA[fidx].m_plane.z = edgeNormal.z; + facesA[fidx].m_plane.w = c; + fidx++; + prevVertex = i; + } + } + convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; + convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); + + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + + + + + /////////////////// + ///compound shape support + + if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + int compoundChild = concavePairs[pairIdx].w; + int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + shapeIndexB = collidables[childColIndexB].m_shapeIndex; + } + ////////////////// + + float4 c0local = convexPolyhedronA.m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + + + bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + DeltaC2, + verticesA,uniqueEdgesA,facesA,indicesA, + vertices,uniqueEdges,faces,indices, + &sepAxis,&dmin); + hasSeparatingAxis = 4; + if (!sepA) + { + hasSeparatingAxis = 0; + } else + { + bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA, + posB,ornB, + posA,ornA, + DeltaC2, + vertices,uniqueEdges,faces,indices, + verticesA,uniqueEdgesA,facesA,indicesA, + &sepAxis,&dmin); + + if (!sepB) + { + hasSeparatingAxis = 0; + } else + { + bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + DeltaC2, + verticesA,uniqueEdgesA,facesA,indicesA, + vertices,uniqueEdges,faces,indices, + &sepAxis,&dmin); + + if (!sepEE) + { + hasSeparatingAxis = 0; + } else + { + hasSeparatingAxis = 1; + } + } + } + + if (hasSeparatingAxis) + { + sepAxis.w = dmin; + concaveSeparatingNormalsOut[pairIdx]=sepAxis; + concaveHasSeparatingNormals[i]=1; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + + + findClippingFaces(sepAxis, + &convexPolyhedronA, + &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsA1GPU, + worldNormalsAGPU, + worldVertsB1GPU, + vertexFaceCapacity, + minDist, maxDist, + verticesA, + facesA, + indicesA, + vertices, + faces, + indices, + clippingFacesOut, pairIdx); + + + } else + { + //mark this pair as in-active + concavePairs[pairIdx].w = -1; + } + } + else + { + //mark this pair as in-active + concavePairs[pairIdx].w = -1; + } + + concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts +} + + + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl new file mode 100644 index 0000000000..f433971741 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl @@ -0,0 +1,1888 @@ + +#define TRIANGLE_NUM_CONVEX_FACES 5 + + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable + +#ifdef cl_ext_atomic_counters_32 +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else +#define counter32_t volatile __global int* +#endif + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + +#define max2 max +#define min2 min + +typedef unsigned int u32; + + + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" + + + +#define GET_NPOINTS(x) (x).m_worldNormalOnB.w + + + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +__inline +float fastDiv(float numerator, float denominator) +{ + return native_divide(numerator, denominator); +// return numerator/denominator; +} + +__inline +float4 fastDiv4(float4 numerator, float4 denominator) +{ + return native_divide(numerator, denominator); +} + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} + +//#define dot3F4 dot + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float4 fastNormalize4(float4 v) +{ + return fast_normalize(v); +} + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +} + + +__inline float4 lerp3(const float4 a,const float4 b, float t) +{ + return make_float4( a.x + (b.x - a.x) * t, + a.y + (b.y - a.y) * t, + a.z + (b.z - a.z) * t, + 0.f); +} + + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; + //double-check next test + if (numVertsIn < 2) + return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; +//double-check next test + if (numVertsIn < 2) + return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + +int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, + const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + __global const float4* vertices, + __global const b3GpuFace_t* faces, + __global const int* indices, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + faces[hullA->m_faceOffset+face].m_plane.x, + faces[hullA->m_faceOffset+face].m_plane.y, + faces[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]]; + const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = qtRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;i<numVertsIn;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + //resultOut.addContactPoint(separatingNormal,point,depth); + contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + } + + return numContactsOut; +} + + + +int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, + const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + const float4* verticesA, + const b3GpuFace_t* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const b3GpuFace_t* facesB, + __global const int* indicesB, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; + const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = qtRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;i<numVertsIn;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + //resultOut.addContactPoint(separatingNormal,point,depth); + contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + } + + return numContactsOut; +} + +int clipHullAgainstHull(const float4 separatingNormal, + __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + __global const float4* vertices, + __global const b3GpuFace_t* faces, + __global const int* indices, + float4* localContactsOut, + int localContactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, + faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices, + faces, + indices,localContactsOut,localContactCapacity); + } + + return numContactsOut; +} + + +int clipHullAgainstHullLocalA(const float4 separatingNormal, + const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const b3GpuFace_t* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const b3GpuFace_t* facesB, + __global const int* indicesB, + float4* localContactsOut, + int localContactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, + verticesA,facesA,indicesA, + verticesB,facesB,indicesB, + localContactsOut,localContactCapacity); + } + + return numContactsOut; +} + +#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; +#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} +#define REDUCE_MAX(v, n) {int i=0;\ +for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } +#define REDUCE_MIN(v, n) {int i=0;\ +for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } + +int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) +{ + if( nPoints == 0 ) + return 0; + + if (nPoints <=4) + return nPoints; + + + if (nPoints >64) + nPoints = 64; + + float4 center = make_float4(0.f); + { + + for (int i=0;i<nPoints;i++) + center += p[i]; + center /= (float)nPoints; + } + + + + // sample 4 directions + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + + //keep point with deepest penetration + float minW= FLT_MAX; + + int minIndex=-1; + + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for(int ie = 0; ie<nPoints; ie++ ) + { + if (p[ie].w<minW) + { + minW = p[ie].w; + minIndex=ie; + } + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + if (f<maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4( -u, r ); + if (f<maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + + f = dot3F4( v, r ); + if (f<maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4( -v, r ); + if (f<maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; + +} + + +int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx) +{ + contactIdx[0] = 0; + contactIdx[1] = 1; + contactIdx[2] = 2; + contactIdx[3] = 3; + + if( nPoints == 0 ) return 0; + + nPoints = min2( nPoints, 4 ); + return nPoints; + +} + + + +int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx) +{ + if( nPoints == 0 ) return 0; + + nPoints = min2( nPoints, 64 ); + + float4 center = make_float4(0.f); + { + float4 v[64]; + for (int i=0;i<nPoints;i++) + v[i] = p[i]; + //memcpy( v, p, nPoints*sizeof(float4) ); + PARALLEL_SUM( v, nPoints ); + center = v[0]/(float)nPoints; + } + + + + { // sample 4 directions + if( nPoints < 4 ) + { + for(int i=0; i<nPoints; i++) + contactIdx[i] = i; + return nPoints; + } + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + int idx[4]; + + float2 max00 = make_float2(0,FLT_MAX); + { + // idx, distance + { + { + int4 a[64]; + for(int ie = 0; ie<nPoints; ie++ ) + { + + + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + + f = dot3F4( -u, r ); + a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + + f = dot3F4( v, r ); + a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + + f = dot3F4( -v, r ); + a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + } + + for(int ie=0; ie<nPoints; ie++) + { + a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x; + a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; + a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; + a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; + } + + idx[0] = (int)a[0].x & 0xff; + idx[1] = (int)a[0].y & 0xff; + idx[2] = (int)a[0].z & 0xff; + idx[3] = (int)a[0].w & 0xff; + } + } + + { + float2 h[64]; + PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); + REDUCE_MIN( h, nPoints ); + max00 = h[0]; + } + } + + contactIdx[0] = idx[0]; + contactIdx[1] = idx[1]; + contactIdx[2] = idx[2]; + contactIdx[3] = idx[3]; + + + return 4; + } +} + + + +__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const float4* closestPointsWorld, + __global const float4* separatingNormalsWorld, + __global const int* contactCounts, + __global const int* contactOffsets, + __global struct b3Contact4Data* restrict contactsOut, + counter32_t nContactsOut, + int contactCapacity, + int numPairs, + int pairIndex + ) +{ + int idx = get_global_id(0); + + if (idx<numPairs) + { + float4 normal = separatingNormalsWorld[idx]; + int nPoints = contactCounts[idx]; + __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]]; + float4 localPoints[64]; + for (int i=0;i<nPoints;i++) + { + localPoints[i] = pointsIn[i]; + } + + int contactIdx[4];// = {-1,-1,-1,-1}; + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nContactsOut, dstIdx ); + if (dstIdx<contactCapacity) + { + __global struct b3Contact4Data* c = contactsOut + dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = idx; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + for (int i=0;i<nContacts;i++) + { + c->m_worldPosB[i] = localPoints[contactIdx[i]]; + } + GET_NPOINTS(*c) = nContacts; + } + } +} + + +void trInverse(float4 translationIn, Quaternion orientationIn, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtInvert(orientationIn); + *translationOut = qtRotate(*orientationOut, -translationIn); +} + +void trMul(float4 translationA, Quaternion orientationA, + float4 translationB, Quaternion orientationB, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtMul(orientationA,orientationB); + *translationOut = transform(&translationB,&translationA,&orientationA); +} + + + + +__kernel void clipHullHullKernel( __global int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numPairs, + int contactCapacity) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + if (hasSeparatingAxis[i]) + { + + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + + + int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, + rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -separatingNormals[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + + int mprContactIndex = pairs[pairIndex].z; + + int dstIdx = mprContactIndex; + if (dstIdx<0) + { + AppendInc( nGlobalContactsOut, dstIdx ); + } + + if (dstIdx<contactCapacity) + { + pairs[pairIndex].z = dstIdx; + + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + + for (int i=0;i<nReducedContacts;i++) + { + //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact + if (i>0||(mprContactIndex<0)) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + +} + + +__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const b3GpuChildShape_t* gpuChildShapes, + __global const float4* gpuCompoundSepNormalsOut, + __global const int* gpuHasCompoundSepNormalsOut, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numCompoundPairs, int maxContactCapacity) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numCompoundPairs) + { + + if (gpuHasCompoundSepNormalsOut[i]) + { + + int bodyIndexA = gpuCompoundPairs[i].x; + int bodyIndexB = gpuCompoundPairs[i].y; + + int childShapeIndexA = gpuCompoundPairs[i].z; + int childShapeIndexB = gpuCompoundPairs[i].w; + + int collidableIndexA = -1; + int collidableIndexB = -1; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 posA = rigidBodies[bodyIndexA].m_pos; + + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + if (childShapeIndexA >= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -gpuCompoundSepNormalsOut[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if ((dstIdx+nReducedContacts) < maxContactCapacity) + { + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = gpuCompoundPairs[pairIndex].x; + int bodyB = gpuCompoundPairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = childShapeIndexA; + c->m_childIndexB = childShapeIndexB; + for (int i=0;i<nReducedContacts;i++) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (gpuHasCompoundSepNormalsOut[i]) + }// if (i<numCompoundPairs) + +} + + + +__kernel void sphereSphereCollisionKernel( __global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int contactCapacity, + int numPairs) +{ + + int i = get_global_id(0); + int pairIndex = i; + + if (i<numPairs) + { + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && + collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) + { + //sphere-sphere + float radiusA = collidables[collidableIndexA].m_radius; + float radiusB = collidables[collidableIndexB].m_radius; + float4 posA = rigidBodies[bodyIndexA].m_pos; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + float4 diff = posA-posB; + float len = length(diff); + + ///iff distance positive, don't generate a new contact + if ( len <= (radiusA+radiusB)) + { + ///distance (negative means penetration) + float dist = len - (radiusA+radiusB); + float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); + if (len > 0.00001) + { + normalOnSurfaceB = diff / len; + } + float4 contactPosB = posB + normalOnSurfaceB*radiusB; + contactPosB.w = dist; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if (dstIdx < contactCapacity) + { + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normalOnSurfaceB; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_worldPosB[0] = contactPosB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if ( len <= (radiusA+radiusB)) + }//SHAPE_SPHERE SHAPE_SPHERE + }//if (i<numPairs) +} + +__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const b3GpuChildShape_t* gpuChildShapes, + __global const float4* separatingNormals, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int contactCapacity, + int numConcavePairs) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numConcavePairs) + { + //negative value means that the pair is invalid + if (concavePairsIn[i].w<0) + return; + + int bodyIndexA = concavePairsIn[i].x; + int bodyIndexB = concavePairsIn[i].y; + int f = concavePairsIn[i].z; + int childShapeIndexA = f; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + /////////////////////////////////////////////////////////////// + + + bool overlap = false; + + b3ConvexPolyhedronData_t convexPolyhedronA; + + //add 3 vertices of the triangle + convexPolyhedronA.m_numVertices = 3; + convexPolyhedronA.m_vertexOffset = 0; + float4 localCenter = make_float4(0.f,0.f,0.f,0.f); + + b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + + float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + localCenter += vert; + } + + float dmin = FLT_MAX; + + int localCC=0; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = dot3F4(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;i<numVertices;i++) + { + float4 v0 = verticesA[i]; + float4 v1 = verticesA[prevVertex]; + + float4 edgeNormal = normalize(cross(normal,v1-v0)); + float c = -dot3F4(edgeNormal,v0); + + facesA[fidx].m_numIndices = 2; + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[curUsedIndices++]=i; + indicesA[curUsedIndices++]=prevVertex; + + facesA[fidx].m_plane.x = edgeNormal.x; + facesA[fidx].m_plane.y = edgeNormal.y; + facesA[fidx].m_plane.z = edgeNormal.z; + facesA[fidx].m_plane.w = c; + fidx++; + prevVertex = i; + } + } + convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; + convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); + + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + + + float4 sepAxis = separatingNormals[i]; + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + int childShapeIndexB =-1; + if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + /////////////////// + ///compound shape support + + childShapeIndexB = concavePairsIn[pairIndex].w; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + shapeIndexB = collidables[childColIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + + } + + //////////////////////////////////////// + + + + int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis, + &convexPolyhedronA, &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + &verticesA,&facesA,&indicesA, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -separatingNormals[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if (dstIdx<contactCapacity) + { + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = concavePairsIn[pairIndex].x; + int bodyB = concavePairsIn[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = childShapeIndexA; + c->m_childIndexB = childShapeIndexB; + for (int i=0;i<nReducedContacts;i++) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (i<numPairs) +} + + + + + + +int findClippingFaces(const float4 separatingNormal, + __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + __global const float4* vertices, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, + faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + faces[hullA->m_faceOffset+face].m_plane.x, + faces[hullA->m_faceOffset+face].m_plane.y, + faces[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + +int clipFaces(__global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + __global float4* worldVertsB2, + int capacityWorldVertsB2, + const float minDist, float maxDist, + __global int4* clippingFaces, + int pairIndex) +{ + int numContactsOut = 0; + + int closestFaceA = clippingFaces[pairIndex].x; + int closestFaceB = clippingFaces[pairIndex].y; + int numVertsInA = clippingFaces[pairIndex].z; + int numVertsInB = clippingFaces[pairIndex].w; + + int numVertsOut = 0; + + if (closestFaceA<0) + return numContactsOut; + + __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; + __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; + + + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + + for(int e0=0;e0<numVertsInA;e0++) + { + const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; + const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; + const float4 WorldEdge0 = aw - bw; + float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = aw; + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); + __global float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsInB = numVertsOut; + numVertsOut = 0; + } + + //float4 planeNormalWS = worldNormalsA1[pairIndex]; + //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); + + + + /*for (int i=0;i<numVertsInB;i++) + { + pVtxOut[i] = pVtxIn[i]; + }*/ + + + + + //numVertsInB=0; + + float4 planeNormalWS = worldNormalsA1[pairIndex]; + float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); + + for (int i=0;i<numVertsInB;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + + clippingFaces[pairIndex].w =numContactsOut; + + + return numContactsOut; + +} + + + + +__kernel void findClippingFacesKernel( __global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global int4* clippingFacesOut, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + int numPairs + ) +{ + + int i = get_global_id(0); + int pairIndex = i; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + + int numLocalContactsOut = findClippingFaces(separatingNormals[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, + rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, + worldVertsA1, + worldNormalsA1, + worldVertsB1,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + clippingFacesOut,i); + + + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + +} + + + + +__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global int4* clippingFacesOut, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + __global float4* worldVertsB2, + int vertexFaceCapacity, + int numPairs, + int debugMode + ) +{ + int i = get_global_id(0); + int pairIndex = i; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + +// int bodyIndexA = pairs[i].x; + // int bodyIndexB = pairs[i].y; + + int numLocalContactsOut = 0; + + int capacityWorldVertsB2 = vertexFaceCapacity; + + __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; + __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; + + + { + __global int4* clippingFaces = clippingFacesOut; + + + int closestFaceA = clippingFaces[pairIndex].x; + int closestFaceB = clippingFaces[pairIndex].y; + int numVertsInA = clippingFaces[pairIndex].z; + int numVertsInB = clippingFaces[pairIndex].w; + + int numVertsOut = 0; + + if (closestFaceA>=0) + { + + + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + + for(int e0=0;e0<numVertsInA;e0++) + { + const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; + const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; + const float4 WorldEdge0 = aw - bw; + float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = aw; + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); + __global float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsInB = numVertsOut; + numVertsOut = 0; + } + + float4 planeNormalWS = worldNormalsA1[pairIndex]; + float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); + + for (int i=0;i<numVertsInB;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + + } + clippingFaces[pairIndex].w =numLocalContactsOut; + + + } + + for (int i=0;i<numLocalContactsOut;i++) + pVtxIn[i] = pVtxOut[i]; + + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + +} + + + + + +__kernel void newContactReductionKernel( __global int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global struct b3Contact4Data* globalContactsOut, + __global int4* clippingFaces, + __global float4* worldVertsB2, + volatile __global int* nGlobalContactsOut, + int vertexFaceCapacity, + int contactCapacity, + int numPairs + ) +{ + int i = get_global_id(0); + int pairIndex = i; + + int4 contactIdx; + contactIdx=make_int4(0,1,2,3); + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + + + + + int nPoints = clippingFaces[pairIndex].w; + + if (nPoints>0) + { + + __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity]; + float4 normal = -separatingNormals[i]; + + int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); + + int mprContactIndex = pairs[pairIndex].z; + + int dstIdx = mprContactIndex; + + if (dstIdx<0) + { + AppendInc( nGlobalContactsOut, dstIdx ); + } +//#if 0 + + if (dstIdx < contactCapacity) + { + + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + + pairs[pairIndex].w = dstIdx; + + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA =-1; + c->m_childIndexB =-1; + + switch (nReducedContacts) + { + case 4: + c->m_worldPosB[3] = pointsIn[contactIdx.w]; + case 3: + c->m_worldPosB[2] = pointsIn[contactIdx.z]; + case 2: + c->m_worldPosB[1] = pointsIn[contactIdx.y]; + case 1: + if (mprContactIndex<0)//test + c->m_worldPosB[0] = pointsIn[contactIdx.x]; + default: + { + } + }; + + GET_NPOINTS(*c) = nReducedContacts; + + } + + +//#endif + + }// if (numContactsOut>0) + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + + + +} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h new file mode 100644 index 0000000000..f0ecfc7851 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h @@ -0,0 +1,2099 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* satClipKernelsCL= \ +"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" +"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" +"#ifdef cl_ext_atomic_counters_32\n" +"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" +"#else\n" +"#define counter32_t volatile __global int*\n" +"#endif\n" +"#define GET_GROUP_IDX get_group_id(0)\n" +"#define GET_LOCAL_IDX get_local_id(0)\n" +"#define GET_GLOBAL_IDX get_global_id(0)\n" +"#define GET_GROUP_SIZE get_local_size(0)\n" +"#define GET_NUM_GROUPS get_num_groups(0)\n" +"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" +"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" +"#define AtomInc(x) atom_inc(&(x))\n" +"#define AtomInc1(x, out) out = atom_inc(&(x))\n" +"#define AppendInc(x, out) out = atomic_inc(x)\n" +"#define AtomAdd(x, value) atom_add(&(x), value)\n" +"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" +"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" +"#define max2 max\n" +"#define min2 min\n" +"typedef unsigned int u32;\n" +"#ifndef B3_CONTACT4DATA_H\n" +"#define B3_CONTACT4DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +"#define B3_LARGE_FLOAT 1e18f\n" +"#define B3_INFINITY 1e18f\n" +"#define b3Assert(a)\n" +"#define b3ConstArray(a) __global const a*\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" +"#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" +"#define B3_STATIC\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" +"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +"{\n" +" float maxDot = -B3_INFINITY;\n" +" int i = 0;\n" +" int ptIndex = -1;\n" +" for( i = 0; i < vecLen; i++ )\n" +" {\n" +" float dot = b3Dot3F4(vecArray[i],vec);\n" +" \n" +" if( dot > maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" +"#endif //B3_FLOAT4_H\n" +"typedef struct b3Contact4Data b3Contact4Data_t;\n" +"struct b3Contact4Data\n" +"{\n" +" b3Float4 m_worldPosB[4];\n" +"// b3Float4 m_localPosA[4];\n" +"// b3Float4 m_localPosB[4];\n" +" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" +" unsigned short m_restituitionCoeffCmp;\n" +" unsigned short m_frictionCoeffCmp;\n" +" int m_batchIdx;\n" +" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" +" int m_bodyBPtrAndSignBit;\n" +" int m_childIndexA;\n" +" int m_childIndexB;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"};\n" +"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" +"{\n" +" return (int)contact->m_worldNormalOnB.w;\n" +"};\n" +"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" +"{\n" +" contact->m_worldNormalOnB.w = (float)numPoints;\n" +"};\n" +"#endif //B3_CONTACT4DATA_H\n" +"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" +"#define B3_CONVEX_POLYHEDRON_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" +"{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +"{\n" +" b3Quat q;\n" +" q=in;\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" +"}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"typedef struct b3GpuFace b3GpuFace_t;\n" +"struct b3GpuFace\n" +"{\n" +" b3Float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +" int m_unusedPadding1;\n" +" int m_unusedPadding2;\n" +"};\n" +"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" +"struct b3ConvexPolyhedronData\n" +"{\n" +" b3Float4 m_localCenter;\n" +" b3Float4 m_extents;\n" +" b3Float4 mC;\n" +" b3Float4 mE;\n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"};\n" +"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" +"#ifndef B3_COLLIDABLE_H\n" +"#define B3_COLLIDABLE_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"enum b3ShapeTypes\n" +"{\n" +" SHAPE_HEIGHT_FIELD=1,\n" +" SHAPE_CONVEX_HULL=3,\n" +" SHAPE_PLANE=4,\n" +" SHAPE_CONCAVE_TRIMESH=5,\n" +" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" +" SHAPE_SPHERE=7,\n" +" MAX_NUM_SHAPE_TYPES,\n" +"};\n" +"typedef struct b3Collidable b3Collidable_t;\n" +"struct b3Collidable\n" +"{\n" +" union {\n" +" int m_numChildShapes;\n" +" int m_bvhIndex;\n" +" };\n" +" union\n" +" {\n" +" float m_radius;\n" +" int m_compoundBvhIndex;\n" +" };\n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +"};\n" +"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" +"struct b3GpuChildShape\n" +"{\n" +" b3Float4 m_childPosition;\n" +" b3Quat m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"};\n" +"struct b3CompoundOverlappingPair\n" +"{\n" +" int m_bodyIndexA;\n" +" int m_bodyIndexB;\n" +"// int m_pairType;\n" +" int m_childShapeIndexA;\n" +" int m_childShapeIndexB;\n" +"};\n" +"#endif //B3_COLLIDABLE_H\n" +"#ifndef B3_RIGIDBODY_DATA_H\n" +"#define B3_RIGIDBODY_DATA_H\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"typedef struct\n" +"{\n" +" b3Float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" +"struct b3RigidBodyData\n" +"{\n" +" b3Float4 m_pos;\n" +" b3Quat m_quat;\n" +" b3Float4 m_linVel;\n" +" b3Float4 m_angVel;\n" +" int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"};\n" +"typedef struct b3InertiaData b3InertiaData_t;\n" +"struct b3InertiaData\n" +"{\n" +" b3Mat3x3 m_invInertiaWorld;\n" +" b3Mat3x3 m_initInvInertia;\n" +"};\n" +"#endif //B3_RIGIDBODY_DATA_H\n" +" \n" +"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" +"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" +"#define make_float4 (float4)\n" +"#define make_float2 (float2)\n" +"#define make_uint4 (uint4)\n" +"#define make_int4 (int4)\n" +"#define make_uint2 (uint2)\n" +"#define make_int2 (int2)\n" +"__inline\n" +"float fastDiv(float numerator, float denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"// return numerator/denominator; \n" +"}\n" +"__inline\n" +"float4 fastDiv4(float4 numerator, float4 denominator)\n" +"{\n" +" return native_divide(numerator, denominator); \n" +"}\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +"}\n" +"//#define dot3F4 dot\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +"float4 fastNormalize4(float4 v)\n" +"{\n" +" return fast_normalize(v);\n" +"}\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"typedef float4 Quaternion;\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fastNormalize4(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"__inline\n" +"float4 normalize3(const float4 a)\n" +"{\n" +" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +" return fastNormalize4( n );\n" +"}\n" +"__inline float4 lerp3(const float4 a,const float4 b, float t)\n" +"{\n" +" return make_float4( a.x + (b.x - a.x) * t,\n" +" a.y + (b.y - a.y) * t,\n" +" a.z + (b.z - a.z) * t,\n" +" 0.f);\n" +"}\n" +"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" +"{\n" +" \n" +" int ve;\n" +" float ds, de;\n" +" int numVertsOut = 0;\n" +" //double-check next test\n" +" if (numVertsIn < 2)\n" +" return 0;\n" +" \n" +" float4 firstVertex=pVtxIn[numVertsIn-1];\n" +" float4 endVertex = pVtxIn[0];\n" +" \n" +" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +" \n" +" for (ve = 0; ve < numVertsIn; ve++)\n" +" {\n" +" endVertex=pVtxIn[ve];\n" +" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +" if (ds<0)\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start < 0, end < 0, so output endVertex\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" else\n" +" {\n" +" // Start < 0, end >= 0, so output intersection\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start >= 0, end < 0 so output intersection and end\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" }\n" +" firstVertex = endVertex;\n" +" ds = de;\n" +" }\n" +" return numVertsOut;\n" +"}\n" +"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" +"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" +"{\n" +" \n" +" int ve;\n" +" float ds, de;\n" +" int numVertsOut = 0;\n" +"//double-check next test\n" +" if (numVertsIn < 2)\n" +" return 0;\n" +" float4 firstVertex=pVtxIn[numVertsIn-1];\n" +" float4 endVertex = pVtxIn[0];\n" +" \n" +" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" +" for (ve = 0; ve < numVertsIn; ve++)\n" +" {\n" +" endVertex=pVtxIn[ve];\n" +" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" +" if (ds<0)\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start < 0, end < 0, so output endVertex\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" else\n" +" {\n" +" // Start < 0, end >= 0, so output intersection\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if (de<0)\n" +" {\n" +" // Start >= 0, end < 0 so output intersection and end\n" +" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" +" ppVtxOut[numVertsOut++] = endVertex;\n" +" }\n" +" }\n" +" firstVertex = endVertex;\n" +" ds = de;\n" +" }\n" +" return numVertsOut;\n" +"}\n" +"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, \n" +" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +" float4* worldVertsB2, int capacityWorldVertsB2,\n" +" const float minDist, float maxDist,\n" +" __global const float4* vertices,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" float4* contactsOut,\n" +" int contactCapacity)\n" +"{\n" +" int numContactsOut = 0;\n" +" float4* pVtxIn = worldVertsB1;\n" +" float4* pVtxOut = worldVertsB2;\n" +" \n" +" int numVertsIn = numWorldVertsB1;\n" +" int numVertsOut = 0;\n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;face<hullA->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" faces[hullA->m_faceOffset+face].m_plane.x, \n" +" faces[hullA->m_faceOffset+face].m_plane.y, \n" +" faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" }\n" +" }\n" +" }\n" +" if (closestFaceA<0)\n" +" return numContactsOut;\n" +" b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" int numVerticesA = polyA.m_numIndices;\n" +" for(int e0=0;e0<numVerticesA;e0++)\n" +" {\n" +" const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" +" const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +" const float4 edge0 = a - b;\n" +" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +" float4 worldA1 = transform(&a,&posA,&ornA);\n" +" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +" \n" +" float4 planeNormalWS = planeNormalWS1;\n" +" float planeEqWS=planeEqWS1;\n" +" \n" +" //clip face\n" +" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +" //btSwap(pVtxIn,pVtxOut);\n" +" float4* tmp = pVtxOut;\n" +" pVtxOut = pVtxIn;\n" +" pVtxIn = tmp;\n" +" numVertsIn = numVertsOut;\n" +" numVertsOut = 0;\n" +" }\n" +" \n" +" // only keep points that are behind the witness face\n" +" {\n" +" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float localPlaneEq = polyA.m_plane.w;\n" +" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +" for (int i=0;i<numVertsIn;i++)\n" +" {\n" +" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +" if (depth <=minDist)\n" +" {\n" +" depth = minDist;\n" +" }\n" +" if (depth <=maxDist)\n" +" {\n" +" float4 pointInWorld = pVtxIn[i];\n" +" //resultOut.addContactPoint(separatingNormal,point,depth);\n" +" contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +" }\n" +" }\n" +" }\n" +" return numContactsOut;\n" +"}\n" +"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, \n" +" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" +" float4* worldVertsB2, int capacityWorldVertsB2,\n" +" const float minDist, float maxDist,\n" +" const float4* verticesA,\n" +" const b3GpuFace_t* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB,\n" +" __global const b3GpuFace_t* facesB,\n" +" __global const int* indicesB,\n" +" float4* contactsOut,\n" +" int contactCapacity)\n" +"{\n" +" int numContactsOut = 0;\n" +" float4* pVtxIn = worldVertsB1;\n" +" float4* pVtxOut = worldVertsB2;\n" +" \n" +" int numVertsIn = numWorldVertsB1;\n" +" int numVertsOut = 0;\n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;face<hullA->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" facesA[hullA->m_faceOffset+face].m_plane.x, \n" +" facesA[hullA->m_faceOffset+face].m_plane.y, \n" +" facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" }\n" +" }\n" +" }\n" +" if (closestFaceA<0)\n" +" return numContactsOut;\n" +" b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" int numVerticesA = polyA.m_numIndices;\n" +" for(int e0=0;e0<numVerticesA;e0++)\n" +" {\n" +" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" +" const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" +" const float4 edge0 = a - b;\n" +" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" +" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" +" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +" float4 worldA1 = transform(&a,&posA,&ornA);\n" +" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +" \n" +" float4 planeNormalWS = planeNormalWS1;\n" +" float planeEqWS=planeEqWS1;\n" +" \n" +" //clip face\n" +" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" +" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" +" //btSwap(pVtxIn,pVtxOut);\n" +" float4* tmp = pVtxOut;\n" +" pVtxOut = pVtxIn;\n" +" pVtxIn = tmp;\n" +" numVertsIn = numVertsOut;\n" +" numVertsOut = 0;\n" +" }\n" +" \n" +" // only keep points that are behind the witness face\n" +" {\n" +" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" +" float localPlaneEq = polyA.m_plane.w;\n" +" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" +" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" +" for (int i=0;i<numVertsIn;i++)\n" +" {\n" +" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +" if (depth <=minDist)\n" +" {\n" +" depth = minDist;\n" +" }\n" +" if (depth <=maxDist)\n" +" {\n" +" float4 pointInWorld = pVtxIn[i];\n" +" //resultOut.addContactPoint(separatingNormal,point,depth);\n" +" contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +" }\n" +" }\n" +" }\n" +" return numContactsOut;\n" +"}\n" +"int clipHullAgainstHull(const float4 separatingNormal,\n" +" __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +" const float minDist, float maxDist,\n" +" __global const float4* vertices,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" float4* localContactsOut,\n" +" int localContactCapacity)\n" +"{\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +" int closestFaceB=-1;\n" +" float dmax = -FLT_MAX;\n" +" {\n" +" for(int face=0;face<hullB->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" +" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" {\n" +" const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0<numVertices;e0++)\n" +" {\n" +" const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" if (closestFaceB>=0)\n" +" {\n" +" numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" +" posA,ornA,\n" +" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" +" faces,\n" +" indices,localContactsOut,localContactCapacity);\n" +" }\n" +" return numContactsOut;\n" +"}\n" +"int clipHullAgainstHullLocalA(const float4 separatingNormal,\n" +" const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" +" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" +" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" +" const float minDist, float maxDist,\n" +" const float4* verticesA,\n" +" const b3GpuFace_t* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB,\n" +" __global const b3GpuFace_t* facesB,\n" +" __global const int* indicesB,\n" +" float4* localContactsOut,\n" +" int localContactCapacity)\n" +"{\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +" int closestFaceB=-1;\n" +" float dmax = -FLT_MAX;\n" +" {\n" +" for(int face=0;face<hullB->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" +" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" {\n" +" const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0<numVertices;e0++)\n" +" {\n" +" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" if (closestFaceB>=0)\n" +" {\n" +" numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" +" posA,ornA,\n" +" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" +" verticesA,facesA,indicesA,\n" +" verticesB,facesB,indicesB,\n" +" localContactsOut,localContactCapacity);\n" +" }\n" +" return numContactsOut;\n" +"}\n" +"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" +"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" +"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" +"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" +"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" +"{\n" +" if( nPoints == 0 )\n" +" return 0;\n" +" \n" +" if (nPoints <=4)\n" +" return nPoints;\n" +" \n" +" \n" +" if (nPoints >64)\n" +" nPoints = 64;\n" +" \n" +" float4 center = make_float4(0.f);\n" +" {\n" +" \n" +" for (int i=0;i<nPoints;i++)\n" +" center += p[i];\n" +" center /= (float)nPoints;\n" +" }\n" +" \n" +" \n" +" \n" +" // sample 4 directions\n" +" \n" +" float4 aVector = p[0] - center;\n" +" float4 u = cross3( nearNormal, aVector );\n" +" float4 v = cross3( nearNormal, u );\n" +" u = normalize3( u );\n" +" v = normalize3( v );\n" +" \n" +" \n" +" //keep point with deepest penetration\n" +" float minW= FLT_MAX;\n" +" \n" +" int minIndex=-1;\n" +" \n" +" float4 maxDots;\n" +" maxDots.x = FLT_MIN;\n" +" maxDots.y = FLT_MIN;\n" +" maxDots.z = FLT_MIN;\n" +" maxDots.w = FLT_MIN;\n" +" \n" +" // idx, distance\n" +" for(int ie = 0; ie<nPoints; ie++ )\n" +" {\n" +" if (p[ie].w<minW)\n" +" {\n" +" minW = p[ie].w;\n" +" minIndex=ie;\n" +" }\n" +" float f;\n" +" float4 r = p[ie]-center;\n" +" f = dot3F4( u, r );\n" +" if (f<maxDots.x)\n" +" {\n" +" maxDots.x = f;\n" +" contactIdx[0].x = ie;\n" +" }\n" +" \n" +" f = dot3F4( -u, r );\n" +" if (f<maxDots.y)\n" +" {\n" +" maxDots.y = f;\n" +" contactIdx[0].y = ie;\n" +" }\n" +" \n" +" \n" +" f = dot3F4( v, r );\n" +" if (f<maxDots.z)\n" +" {\n" +" maxDots.z = f;\n" +" contactIdx[0].z = ie;\n" +" }\n" +" \n" +" f = dot3F4( -v, r );\n" +" if (f<maxDots.w)\n" +" {\n" +" maxDots.w = f;\n" +" contactIdx[0].w = ie;\n" +" }\n" +" \n" +" }\n" +" \n" +" if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" +" {\n" +" //replace the first contact with minimum (todo: replace contact with least penetration)\n" +" contactIdx[0].x = minIndex;\n" +" }\n" +" \n" +" return 4;\n" +" \n" +"}\n" +"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" +"{\n" +" contactIdx[0] = 0;\n" +" contactIdx[1] = 1;\n" +" contactIdx[2] = 2;\n" +" contactIdx[3] = 3;\n" +" \n" +" if( nPoints == 0 ) return 0;\n" +" \n" +" nPoints = min2( nPoints, 4 );\n" +" return nPoints;\n" +" \n" +"}\n" +"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" +"{\n" +" if( nPoints == 0 ) return 0;\n" +" nPoints = min2( nPoints, 64 );\n" +" float4 center = make_float4(0.f);\n" +" {\n" +" float4 v[64];\n" +" for (int i=0;i<nPoints;i++)\n" +" v[i] = p[i];\n" +" //memcpy( v, p, nPoints*sizeof(float4) );\n" +" PARALLEL_SUM( v, nPoints );\n" +" center = v[0]/(float)nPoints;\n" +" }\n" +" \n" +" { // sample 4 directions\n" +" if( nPoints < 4 )\n" +" {\n" +" for(int i=0; i<nPoints; i++) \n" +" contactIdx[i] = i;\n" +" return nPoints;\n" +" }\n" +" float4 aVector = p[0] - center;\n" +" float4 u = cross3( nearNormal, aVector );\n" +" float4 v = cross3( nearNormal, u );\n" +" u = normalize3( u );\n" +" v = normalize3( v );\n" +" int idx[4];\n" +" float2 max00 = make_float2(0,FLT_MAX);\n" +" {\n" +" // idx, distance\n" +" {\n" +" {\n" +" int4 a[64];\n" +" for(int ie = 0; ie<nPoints; ie++ )\n" +" {\n" +" \n" +" \n" +" float f;\n" +" float4 r = p[ie]-center;\n" +" f = dot3F4( u, r );\n" +" a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +" f = dot3F4( -u, r );\n" +" a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +" f = dot3F4( v, r );\n" +" a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +" f = dot3F4( -v, r );\n" +" a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" +" }\n" +" for(int ie=0; ie<nPoints; ie++)\n" +" {\n" +" a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" +" a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" +" a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" +" a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" +" }\n" +" idx[0] = (int)a[0].x & 0xff;\n" +" idx[1] = (int)a[0].y & 0xff;\n" +" idx[2] = (int)a[0].z & 0xff;\n" +" idx[3] = (int)a[0].w & 0xff;\n" +" }\n" +" }\n" +" {\n" +" float2 h[64];\n" +" PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" +" REDUCE_MIN( h, nPoints );\n" +" max00 = h[0];\n" +" }\n" +" }\n" +" contactIdx[0] = idx[0];\n" +" contactIdx[1] = idx[1];\n" +" contactIdx[2] = idx[2];\n" +" contactIdx[3] = idx[3];\n" +" return 4;\n" +" }\n" +"}\n" +"__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const float4* closestPointsWorld,\n" +" __global const float4* separatingNormalsWorld,\n" +" __global const int* contactCounts,\n" +" __global const int* contactOffsets,\n" +" __global struct b3Contact4Data* restrict contactsOut,\n" +" counter32_t nContactsOut,\n" +" int contactCapacity,\n" +" int numPairs,\n" +" int pairIndex\n" +" )\n" +"{\n" +" int idx = get_global_id(0);\n" +" \n" +" if (idx<numPairs)\n" +" {\n" +" float4 normal = separatingNormalsWorld[idx];\n" +" int nPoints = contactCounts[idx];\n" +" __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" +" float4 localPoints[64];\n" +" for (int i=0;i<nPoints;i++)\n" +" {\n" +" localPoints[i] = pointsIn[i];\n" +" }\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" +" int dstIdx;\n" +" AppendInc( nContactsOut, dstIdx );\n" +" if (dstIdx<contactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = contactsOut + dstIdx;\n" +" c->m_worldNormalOnB = -normal;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = idx;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" for (int i=0;i<nContacts;i++)\n" +" {\n" +" c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nContacts;\n" +" }\n" +" }\n" +"}\n" +"void trInverse(float4 translationIn, Quaternion orientationIn,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtInvert(orientationIn);\n" +" *translationOut = qtRotate(*orientationOut, -translationIn);\n" +"}\n" +"void trMul(float4 translationA, Quaternion orientationA,\n" +" float4 translationB, Quaternion orientationB,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtMul(orientationA,orientationB);\n" +" *translationOut = transform(&translationB,&translationA,&orientationA);\n" +"}\n" +"__kernel void clipHullHullKernel( __global int4* pairs, \n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" __global const float4* separatingNormals,\n" +" __global const int* hasSeparatingAxis,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numPairs,\n" +" int contactCapacity)\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" float4 worldVertsB1[64];\n" +" float4 worldVertsB2[64];\n" +" int capacityWorldVerts = 64; \n" +" float4 localContactsOut[64];\n" +" int localContactCapacity=64;\n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" if (i<numPairs)\n" +" {\n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" if (hasSeparatingAxis[i])\n" +" {\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" +" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +" minDist, maxDist,\n" +" vertices,faces,indices,\n" +" localContactsOut,localContactCapacity);\n" +" \n" +" if (numLocalContactsOut>0)\n" +" {\n" +" float4 normal = -separatingNormals[i];\n" +" int nPoints = numLocalContactsOut;\n" +" float4* pointsIn = localContactsOut;\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" \n" +" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +" \n" +" \n" +" int mprContactIndex = pairs[pairIndex].z;\n" +" int dstIdx = mprContactIndex;\n" +" if (dstIdx<0)\n" +" {\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" }\n" +" if (dstIdx<contactCapacity)\n" +" {\n" +" pairs[pairIndex].z = dstIdx;\n" +" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +" c->m_worldNormalOnB = -normal;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" for (int i=0;i<nReducedContacts;i++)\n" +" {\n" +" //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" +" if (i>0||(mprContactIndex<0))\n" +" {\n" +" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +" }\n" +" }\n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" }\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (hasSeparatingAxis[i])\n" +" }// if (i<numPairs)\n" +"}\n" +"__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" __global const b3GpuChildShape_t* gpuChildShapes,\n" +" __global const float4* gpuCompoundSepNormalsOut,\n" +" __global const int* gpuHasCompoundSepNormalsOut,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int numCompoundPairs, int maxContactCapacity)\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" float4 worldVertsB1[64];\n" +" float4 worldVertsB2[64];\n" +" int capacityWorldVerts = 64; \n" +" float4 localContactsOut[64];\n" +" int localContactCapacity=64;\n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" if (i<numCompoundPairs)\n" +" {\n" +" if (gpuHasCompoundSepNormalsOut[i])\n" +" {\n" +" int bodyIndexA = gpuCompoundPairs[i].x;\n" +" int bodyIndexB = gpuCompoundPairs[i].y;\n" +" \n" +" int childShapeIndexA = gpuCompoundPairs[i].z;\n" +" int childShapeIndexB = gpuCompoundPairs[i].w;\n" +" \n" +" int collidableIndexA = -1;\n" +" int collidableIndexB = -1;\n" +" \n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" \n" +" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" \n" +" if (childShapeIndexA >= 0)\n" +" {\n" +" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +" float4 newOrnA = qtMul(ornA,childOrnA);\n" +" posA = newPosA;\n" +" ornA = newOrnA;\n" +" } else\n" +" {\n" +" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" }\n" +" \n" +" if (childShapeIndexB>=0)\n" +" {\n" +" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" } else\n" +" {\n" +" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" +" }\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" +" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +" minDist, maxDist,\n" +" vertices,faces,indices,\n" +" localContactsOut,localContactCapacity);\n" +" \n" +" if (numLocalContactsOut>0)\n" +" {\n" +" float4 normal = -gpuCompoundSepNormalsOut[i];\n" +" int nPoints = numLocalContactsOut;\n" +" float4* pointsIn = localContactsOut;\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" \n" +" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +" c->m_worldNormalOnB = -normal;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = gpuCompoundPairs[pairIndex].x;\n" +" int bodyB = gpuCompoundPairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_childIndexA = childShapeIndexA;\n" +" c->m_childIndexB = childShapeIndexB;\n" +" for (int i=0;i<nReducedContacts;i++)\n" +" {\n" +" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" }\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (gpuHasCompoundSepNormalsOut[i])\n" +" }// if (i<numCompoundPairs)\n" +"}\n" +"__kernel void sphereSphereCollisionKernel( __global const int4* pairs, \n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const float4* separatingNormals,\n" +" __global const int* hasSeparatingAxis,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int contactCapacity,\n" +" int numPairs)\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" +" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" //sphere-sphere\n" +" float radiusA = collidables[collidableIndexA].m_radius;\n" +" float radiusB = collidables[collidableIndexB].m_radius;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" float4 diff = posA-posB;\n" +" float len = length(diff);\n" +" \n" +" ///iff distance positive, don't generate a new contact\n" +" if ( len <= (radiusA+radiusB))\n" +" {\n" +" ///distance (negative means penetration)\n" +" float dist = len - (radiusA+radiusB);\n" +" float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" +" if (len > 0.00001)\n" +" {\n" +" normalOnSurfaceB = diff / len;\n" +" }\n" +" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" +" contactPosB.w = dist;\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" if (dstIdx < contactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = -normalOnSurfaceB;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_worldPosB[0] = contactPosB;\n" +" c->m_childIndexA = -1;\n" +" c->m_childIndexB = -1;\n" +" GET_NPOINTS(*c) = 1;\n" +" }//if (dstIdx < numPairs)\n" +" }//if ( len <= (radiusA+radiusB))\n" +" }//SHAPE_SPHERE SHAPE_SPHERE\n" +" }//if (i<numPairs)\n" +"} \n" +"__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" +" __global const b3RigidBodyData_t* rigidBodies, \n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" __global const b3GpuChildShape_t* gpuChildShapes,\n" +" __global const float4* separatingNormals,\n" +" __global struct b3Contact4Data* restrict globalContactsOut,\n" +" counter32_t nGlobalContactsOut,\n" +" int contactCapacity,\n" +" int numConcavePairs)\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" float4 worldVertsB1[64];\n" +" float4 worldVertsB2[64];\n" +" int capacityWorldVerts = 64; \n" +" float4 localContactsOut[64];\n" +" int localContactCapacity=64;\n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" if (i<numConcavePairs)\n" +" {\n" +" //negative value means that the pair is invalid\n" +" if (concavePairsIn[i].w<0)\n" +" return;\n" +" int bodyIndexA = concavePairsIn[i].x;\n" +" int bodyIndexB = concavePairsIn[i].y;\n" +" int f = concavePairsIn[i].z;\n" +" int childShapeIndexA = f;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" ///////////////////////////////////////////////////////////////\n" +" \n" +" \n" +" bool overlap = false;\n" +" \n" +" b3ConvexPolyhedronData_t convexPolyhedronA;\n" +" //add 3 vertices of the triangle\n" +" convexPolyhedronA.m_numVertices = 3;\n" +" convexPolyhedronA.m_vertexOffset = 0;\n" +" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +" b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +" \n" +" float4 verticesA[3];\n" +" for (int i=0;i<3;i++)\n" +" {\n" +" int index = indices[face.m_indexOffset+i];\n" +" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +" verticesA[i] = vert;\n" +" localCenter += vert;\n" +" }\n" +" float dmin = FLT_MAX;\n" +" int localCC=0;\n" +" //a triangle has 3 unique edges\n" +" convexPolyhedronA.m_numUniqueEdges = 3;\n" +" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +" float4 uniqueEdgesA[3];\n" +" \n" +" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +" convexPolyhedronA.m_faceOffset = 0;\n" +" \n" +" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +" \n" +" b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +" int indicesA[3+3+2+2+2];\n" +" int curUsedIndices=0;\n" +" int fidx=0;\n" +" //front size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[0] = 0;\n" +" indicesA[1] = 1;\n" +" indicesA[2] = 2;\n" +" curUsedIndices+=3;\n" +" float c = face.m_plane.w;\n" +" facesA[fidx].m_plane.x = normal.x;\n" +" facesA[fidx].m_plane.y = normal.y;\n" +" facesA[fidx].m_plane.z = normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" //back size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[3]=2;\n" +" indicesA[4]=1;\n" +" indicesA[5]=0;\n" +" curUsedIndices+=3;\n" +" float c = dot3F4(normal,verticesA[0]);\n" +" float c1 = -face.m_plane.w;\n" +" facesA[fidx].m_plane.x = -normal.x;\n" +" facesA[fidx].m_plane.y = -normal.y;\n" +" facesA[fidx].m_plane.z = -normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" bool addEdgePlanes = true;\n" +" if (addEdgePlanes)\n" +" {\n" +" int numVertices=3;\n" +" int prevVertex = numVertices-1;\n" +" for (int i=0;i<numVertices;i++)\n" +" {\n" +" float4 v0 = verticesA[i];\n" +" float4 v1 = verticesA[prevVertex];\n" +" \n" +" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +" float c = -dot3F4(edgeNormal,v0);\n" +" facesA[fidx].m_numIndices = 2;\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[curUsedIndices++]=i;\n" +" indicesA[curUsedIndices++]=prevVertex;\n" +" \n" +" facesA[fidx].m_plane.x = edgeNormal.x;\n" +" facesA[fidx].m_plane.y = edgeNormal.y;\n" +" facesA[fidx].m_plane.z = edgeNormal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" fidx++;\n" +" prevVertex = i;\n" +" }\n" +" }\n" +" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" float4 sepAxis = separatingNormals[i];\n" +" \n" +" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +" int childShapeIndexB =-1;\n" +" if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" ///////////////////\n" +" ///compound shape support\n" +" \n" +" childShapeIndexB = concavePairsIn[pairIndex].w;\n" +" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" \n" +" }\n" +" \n" +" ////////////////////////////////////////\n" +" \n" +" \n" +" \n" +" int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" +" &convexPolyhedronA, &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" +" minDist, maxDist,\n" +" &verticesA,&facesA,&indicesA,\n" +" vertices,faces,indices,\n" +" localContactsOut,localContactCapacity);\n" +" \n" +" if (numLocalContactsOut>0)\n" +" {\n" +" float4 normal = -separatingNormals[i];\n" +" int nPoints = numLocalContactsOut;\n" +" float4* pointsIn = localContactsOut;\n" +" int contactIdx[4];// = {-1,-1,-1,-1};\n" +" contactIdx[0] = -1;\n" +" contactIdx[1] = -1;\n" +" contactIdx[2] = -1;\n" +" contactIdx[3] = -1;\n" +" \n" +" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" +" \n" +" int dstIdx;\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" if (dstIdx<contactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" +" c->m_worldNormalOnB = -normal;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = concavePairsIn[pairIndex].x;\n" +" int bodyB = concavePairsIn[pairIndex].y;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_childIndexA = childShapeIndexA;\n" +" c->m_childIndexB = childShapeIndexB;\n" +" for (int i=0;i<nReducedContacts;i++)\n" +" {\n" +" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" +" }\n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" }\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (i<numPairs)\n" +"}\n" +"int findClippingFaces(const float4 separatingNormal,\n" +" __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" +" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +" __global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" int capacityWorldVerts,\n" +" const float minDist, float maxDist,\n" +" __global const float4* vertices,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" __global int4* clippingFaces, int pairIndex)\n" +"{\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +" \n" +" \n" +" int closestFaceB=-1;\n" +" float dmax = -FLT_MAX;\n" +" \n" +" {\n" +" for(int face=0;face<hullB->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" +" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" \n" +" {\n" +" const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" +" const int numVertices = polyB.m_numIndices;\n" +" for(int e0=0;e0<numVertices;e0++)\n" +" {\n" +" const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" \n" +" int closestFaceA=-1;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;face<hullA->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" faces[hullA->m_faceOffset+face].m_plane.x,\n" +" faces[hullA->m_faceOffset+face].m_plane.y,\n" +" faces[hullA->m_faceOffset+face].m_plane.z,\n" +" 0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" worldNormalsA1[pairIndex] = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" \n" +" int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +" for(int e0=0;e0<numVerticesA;e0++)\n" +" {\n" +" const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +" }\n" +" \n" +" clippingFaces[pairIndex].x = closestFaceA;\n" +" clippingFaces[pairIndex].y = closestFaceB;\n" +" clippingFaces[pairIndex].z = numVerticesA;\n" +" clippingFaces[pairIndex].w = numWorldVertsB1;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" +"int clipFaces(__global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" __global float4* worldVertsB2, \n" +" int capacityWorldVertsB2,\n" +" const float minDist, float maxDist,\n" +" __global int4* clippingFaces,\n" +" int pairIndex)\n" +"{\n" +" int numContactsOut = 0;\n" +" \n" +" int closestFaceA = clippingFaces[pairIndex].x;\n" +" int closestFaceB = clippingFaces[pairIndex].y;\n" +" int numVertsInA = clippingFaces[pairIndex].z;\n" +" int numVertsInB = clippingFaces[pairIndex].w;\n" +" \n" +" int numVertsOut = 0;\n" +" \n" +" if (closestFaceA<0)\n" +" return numContactsOut;\n" +" \n" +" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +" \n" +" \n" +" \n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" \n" +" for(int e0=0;e0<numVertsInA;e0++)\n" +" {\n" +" const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" +" const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" +" const float4 WorldEdge0 = aw - bw;\n" +" float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" +" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +" float4 worldA1 = aw;\n" +" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +" float4 planeNormalWS = planeNormalWS1;\n" +" float planeEqWS=planeEqWS1;\n" +" numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" +" __global float4* tmp = pVtxOut;\n" +" pVtxOut = pVtxIn;\n" +" pVtxIn = tmp;\n" +" numVertsInB = numVertsOut;\n" +" numVertsOut = 0;\n" +" }\n" +" \n" +" //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +" //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +" \n" +" /*for (int i=0;i<numVertsInB;i++)\n" +" {\n" +" pVtxOut[i] = pVtxIn[i];\n" +" }*/\n" +" \n" +" \n" +" \n" +" \n" +" //numVertsInB=0;\n" +" \n" +" float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +" float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +" for (int i=0;i<numVertsInB;i++)\n" +" {\n" +" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +" if (depth <=minDist)\n" +" {\n" +" depth = minDist;\n" +" }\n" +" \n" +" if (depth <=maxDist)\n" +" {\n" +" float4 pointInWorld = pVtxIn[i];\n" +" pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +" }\n" +" }\n" +" \n" +" clippingFaces[pairIndex].w =numContactsOut;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" +"__kernel void findClippingFacesKernel( __global const int4* pairs,\n" +" __global const b3RigidBodyData_t* rigidBodies,\n" +" __global const b3Collidable_t* collidables,\n" +" __global const b3ConvexPolyhedronData_t* convexShapes,\n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const b3GpuFace_t* faces,\n" +" __global const int* indices,\n" +" __global const float4* separatingNormals,\n" +" __global const int* hasSeparatingAxis,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" int capacityWorldVerts,\n" +" int numPairs\n" +" )\n" +"{\n" +" \n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" \n" +" if (hasSeparatingAxis[i])\n" +" {\n" +" \n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" \n" +" int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" +" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" +" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" +" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" +" worldVertsA1,\n" +" worldNormalsA1,\n" +" worldVertsB1,capacityWorldVerts,\n" +" minDist, maxDist,\n" +" vertices,faces,indices,\n" +" clippingFacesOut,i);\n" +" \n" +" \n" +" }// if (hasSeparatingAxis[i])\n" +" }// if (i<numPairs)\n" +" \n" +"}\n" +"__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,\n" +" __global const int* hasSeparatingAxis,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" __global float4* worldVertsB2,\n" +" int vertexFaceCapacity,\n" +" int numPairs,\n" +" int debugMode\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" \n" +" if (hasSeparatingAxis[i])\n" +" {\n" +" \n" +"// int bodyIndexA = pairs[i].x;\n" +" // int bodyIndexB = pairs[i].y;\n" +" \n" +" int numLocalContactsOut = 0;\n" +" int capacityWorldVertsB2 = vertexFaceCapacity;\n" +" \n" +" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" +" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" +" \n" +" {\n" +" __global int4* clippingFaces = clippingFacesOut;\n" +" \n" +" \n" +" int closestFaceA = clippingFaces[pairIndex].x;\n" +" int closestFaceB = clippingFaces[pairIndex].y;\n" +" int numVertsInA = clippingFaces[pairIndex].z;\n" +" int numVertsInB = clippingFaces[pairIndex].w;\n" +" \n" +" int numVertsOut = 0;\n" +" \n" +" if (closestFaceA>=0)\n" +" {\n" +" \n" +" \n" +" \n" +" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" +" \n" +" for(int e0=0;e0<numVertsInA;e0++)\n" +" {\n" +" const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" +" const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" +" const float4 WorldEdge0 = aw - bw;\n" +" float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" +" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" +" float4 worldA1 = aw;\n" +" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" +" float4 planeNormalWS = planeNormalWS1;\n" +" float planeEqWS=planeEqWS1;\n" +" numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" +" __global float4* tmp = pVtxOut;\n" +" pVtxOut = pVtxIn;\n" +" pVtxIn = tmp;\n" +" numVertsInB = numVertsOut;\n" +" numVertsOut = 0;\n" +" }\n" +" \n" +" float4 planeNormalWS = worldNormalsA1[pairIndex];\n" +" float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" +" \n" +" for (int i=0;i<numVertsInB;i++)\n" +" {\n" +" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" +" if (depth <=minDist)\n" +" {\n" +" depth = minDist;\n" +" }\n" +" \n" +" if (depth <=maxDist)\n" +" {\n" +" float4 pointInWorld = pVtxIn[i];\n" +" pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" +" }\n" +" }\n" +" \n" +" }\n" +" clippingFaces[pairIndex].w =numLocalContactsOut;\n" +" \n" +" }\n" +" \n" +" for (int i=0;i<numLocalContactsOut;i++)\n" +" pVtxIn[i] = pVtxOut[i];\n" +" \n" +" }// if (hasSeparatingAxis[i])\n" +" }// if (i<numPairs)\n" +" \n" +"}\n" +"__kernel void newContactReductionKernel( __global int4* pairs,\n" +" __global const b3RigidBodyData_t* rigidBodies,\n" +" __global const float4* separatingNormals,\n" +" __global const int* hasSeparatingAxis,\n" +" __global struct b3Contact4Data* globalContactsOut,\n" +" __global int4* clippingFaces,\n" +" __global float4* worldVertsB2,\n" +" volatile __global int* nGlobalContactsOut,\n" +" int vertexFaceCapacity,\n" +" int contactCapacity,\n" +" int numPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" int pairIndex = i;\n" +" \n" +" int4 contactIdx;\n" +" contactIdx=make_int4(0,1,2,3);\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" \n" +" if (hasSeparatingAxis[i])\n" +" {\n" +" \n" +" \n" +" \n" +" \n" +" int nPoints = clippingFaces[pairIndex].w;\n" +" \n" +" if (nPoints>0)\n" +" {\n" +" __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" +" float4 normal = -separatingNormals[i];\n" +" \n" +" int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" +" \n" +" int mprContactIndex = pairs[pairIndex].z;\n" +" int dstIdx = mprContactIndex;\n" +" if (dstIdx<0)\n" +" {\n" +" AppendInc( nGlobalContactsOut, dstIdx );\n" +" }\n" +"//#if 0\n" +" \n" +" if (dstIdx < contactCapacity)\n" +" {\n" +" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" +" c->m_worldNormalOnB = -normal;\n" +" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" +" c->m_batchIdx = pairIndex;\n" +" int bodyA = pairs[pairIndex].x;\n" +" int bodyB = pairs[pairIndex].y;\n" +" pairs[pairIndex].w = dstIdx;\n" +" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" +" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" +" c->m_childIndexA =-1;\n" +" c->m_childIndexB =-1;\n" +" switch (nReducedContacts)\n" +" {\n" +" case 4:\n" +" c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" +" case 3:\n" +" c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" +" case 2:\n" +" c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" +" case 1:\n" +" if (mprContactIndex<0)//test\n" +" c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" +" default:\n" +" {\n" +" }\n" +" };\n" +" \n" +" GET_NPOINTS(*c) = nReducedContacts;\n" +" \n" +" }\n" +" \n" +" \n" +"//#endif\n" +" \n" +" }// if (numContactsOut>0)\n" +" }// if (hasSeparatingAxis[i])\n" +" }// if (i<numPairs)\n" +" \n" +" \n" +"}\n" +; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl new file mode 100644 index 0000000000..31ca43b8cd --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl @@ -0,0 +1,1220 @@ + +//keep this enum in sync with the CPU version (in btCollidable.h) +//written by Erwin Coumans + + +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_CONCAVE_TRIMESH 5 +#define TRIANGLE_NUM_CONVEX_FACES 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 + +#define B3_MAX_STACK_DEPTH 256 + + +typedef unsigned int u32; + +///keep this in sync with btCollidable.h +typedef struct +{ + union { + int m_numChildShapes; + int m_bvhIndex; + }; + union + { + float m_radius; + int m_compoundBvhIndex; + }; + + int m_shapeType; + int m_shapeIndex; + +} btCollidableGpu; + +#define MAX_NUM_PARTS_IN_BITS 10 + +///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. +///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes + int m_escapeIndexOrTriangleIndex; +} b3QuantizedBvhNode; + +typedef struct +{ + float4 m_aabbMin; + float4 m_aabbMax; + float4 m_quantization; + int m_numNodes; + int m_numSubTrees; + int m_nodeOffset; + int m_subTreeOffset; + +} b3BvhInfo; + + +int getTriangleIndex(const b3QuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + unsigned int x=0; + unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); + // Get only the lower bits where the triangle index is stored + return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); +} + +int isLeafNode(const b3QuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + //skipindex is negative (internal node), triangleindex >=0 (leafnode) + return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; +} + +int getEscapeIndex(const b3QuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + +int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) +{ + return -rootNode->m_escapeIndexOrTriangleIndex; +} + + +typedef struct +{ + //12 bytes + unsigned short int m_quantizedAabbMin[3]; + unsigned short int m_quantizedAabbMax[3]; + //4 bytes, points to the root of the subtree + int m_rootNodeIndex; + //4 bytes + int m_subtreeSize; + int m_padding[3]; +} b3BvhSubtreeInfo; + + + + + + + +typedef struct +{ + float4 m_childPosition; + float4 m_childOrientation; + int m_shapeIndex; + int m_unused0; + int m_unused1; + int m_unused2; +} btGpuChildShape; + + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + u32 m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} BodyData; + + +typedef struct +{ + float4 m_localCenter; + float4 m_extents; + float4 mC; + float4 mE; + + float m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; +} ConvexPolyhedronCL; + +typedef struct +{ + union + { + float4 m_min; + float m_minElems[4]; + int m_minIndices[4]; + }; + union + { + float4 m_max; + float m_maxElems[4]; + int m_maxIndices[4]; + }; +} btAabbCL; + +#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" +#include "Bullet3Common/shared/b3Int2.h" + + + +typedef struct +{ + float4 m_plane; + int m_indexOffset; + int m_numIndices; +} btGpuFace; + +#define make_float4 (float4) + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); + + +// float4 a1 = make_float4(a.xyz,0.f); +// float4 b1 = make_float4(b.xyz,0.f); + +// return cross(a1,b1); + +//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); + + // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); + + //return c; +} + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float4 fastNormalize4(float4 v) +{ + v = make_float4(v.xyz,0.f); + return fast_normalize(v); +} + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +} + +inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, +const float4* dir, const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;i<numVerts;i++) + { + float dp = dot(vertices[hull->m_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + +inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, +const float4* dir, __global const float4* vertices, float* min, float* max) +{ + min[0] = FLT_MAX; + max[0] = -FLT_MAX; + int numVerts = hull->m_numVertices; + + const float4 localDir = qtInvRotate(orn,*dir); + float offset = dot(pos,*dir); + for(int i=0;i<numVerts;i++) + { + float dp = dot(vertices[hull->m_vertexOffset+i],localDir); + if(dp < min[0]) + min[0] = dp; + if(dp > max[0]) + max[0] = dp; + } + if(min[0]>max[0]) + { + float tmp = min[0]; + min[0] = max[0]; + max[0] = tmp; + } + min[0] += offset; + max[0] += offset; +} + +inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA,const float4 ornA, + const float4 posB,const float4 ornB, + float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) +{ + float Min0,Max0; + float Min1,Max1; + projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); + project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + return false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + *depth = d0<d1 ? d0:d1; + return true; +} + + + + +inline bool IsAlmostZero(const float4 v) +{ + if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) + return false; + return true; +} + + + +bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + + const float4* verticesA, + const float4* uniqueEdgesA, + const btGpuFace* facesA, + const int* indicesA, + + __global const float4* verticesB, + __global const float4* uniqueEdgesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + int curPlaneTests=0; + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;i<numFacesA;i++) + { + const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS*=-1.f; + curPlaneTests++; + float d; + if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + +bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + __global const float4* verticesA, + __global const float4* uniqueEdgesA, + __global const btGpuFace* facesA, + __global const int* indicesA, + const float4* verticesB, + const float4* uniqueEdgesB, + const btGpuFace* facesB, + const int* indicesB, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + int curPlaneTests=0; + { + int numFacesA = hullA->m_numFaces; + // Test normals from hullA + for(int i=0;i<numFacesA;i++) + { + const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; + float4 faceANormalWS = qtRotate(ornA,normal); + if (dot3F4(DeltaC2,faceANormalWS)<0) + faceANormalWS *= -1.f; + curPlaneTests++; + float d; + if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) + return false; + if(d<*dmin) + { + *dmin = d; + *sep = faceANormalWS; + } + } + } + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + + +bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, + const float4 posA1, + const float4 ornA, + const float4 posB1, + const float4 ornB, + const float4 DeltaC2, + const float4* verticesA, + const float4* uniqueEdgesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const float4* uniqueEdgesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + float4* sep, + float* dmin) +{ + + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = qtRotate(ornA,edge0); + + for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = qtRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje *= -1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); + project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); + + if(Max0<Min1 || Max1<Min0) + result = false; + + float d0 = Max0 - Min1; + float d1 = Max1 - Min0; + dist = d0<d1 ? d0:d1; + result = true; + + } + + + if(dist<*dmin) + { + *dmin = dist; + *sep = crossje; + } + } + } + + } + + + if((dot3F4(-DeltaC2,*sep))>0.0f) + { + *sep = -(*sep); + } + return true; +} + + + +inline int findClippingFaces(const float4 separatingNormal, + const ConvexPolyhedronCL* hullA, + __global const ConvexPolyhedronCL* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const btGpuFace* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const btGpuFace* facesB, + __global const int* indicesB, + __global int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=0; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; + int numVertices = polyB.m_numIndices; + if (numVertices>capacityWorldVerts) + numVertices = capacityWorldVerts; + if (numVertices<0) + numVertices = 0; + + for(int e0=0;e0<numVertices;e0++) + { + if (e0<capacityWorldVerts) + { + const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + } + + int closestFaceA=0; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; + if (numVerticesA>capacityWorldVerts) + numVerticesA = capacityWorldVerts; + if (numVerticesA<0) + numVerticesA=0; + + for(int e0=0;e0<numVerticesA;e0++) + { + if (e0<capacityWorldVerts) + { + const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + + +// work-in-progress +__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global const btGpuChildShape* gpuChildShapes, + __global btAabbCL* aabbs, + __global float4* concaveSeparatingNormalsOut, + __global int* concaveHasSeparatingNormals, + __global int4* clippingFacesOut, + __global float4* worldVertsA1GPU, + __global float4* worldNormalsAGPU, + __global float4* worldVertsB1GPU, + __global float* dmins, + int vertexFaceCapacity, + int numConcavePairs + ) +{ + + int i = get_global_id(0); + if (i>=numConcavePairs) + return; + + concaveHasSeparatingNormals[i] = 0; + + int pairIdx = i; + + int bodyIndexA = concavePairs[i].x; + int bodyIndexB = concavePairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& + collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + concavePairs[pairIdx].w = -1; + return; + } + + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + int numActualConcaveConvexTests = 0; + + int f = concavePairs[i].z; + + bool overlap = false; + + ConvexPolyhedronCL convexPolyhedronA; + + //add 3 vertices of the triangle + convexPolyhedronA.m_numVertices = 3; + convexPolyhedronA.m_vertexOffset = 0; + float4 localCenter = make_float4(0.f,0.f,0.f,0.f); + + btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + float4 triMinAabb, triMaxAabb; + btAabbCL triAabb; + triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); + triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); + + float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + localCenter += vert; + + triAabb.m_min = min(triAabb.m_min,vert); + triAabb.m_max = max(triAabb.m_max,vert); + + } + + overlap = true; + overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; + overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; + overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; + + if (overlap) + { + float dmin = FLT_MAX; + int hasSeparatingAxis=5; + float4 sepAxis=make_float4(1,2,3,4); + + int localCC=0; + numActualConcaveConvexTests++; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = dot(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;i<numVertices;i++) + { + float4 v0 = verticesA[i]; + float4 v1 = verticesA[prevVertex]; + + float4 edgeNormal = normalize(cross(normal,v1-v0)); + float c = -dot(edgeNormal,v0); + + facesA[fidx].m_numIndices = 2; + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[curUsedIndices++]=i; + indicesA[curUsedIndices++]=prevVertex; + + facesA[fidx].m_plane.x = edgeNormal.x; + facesA[fidx].m_plane.y = edgeNormal.y; + facesA[fidx].m_plane.z = edgeNormal.z; + facesA[fidx].m_plane.w = c; + fidx++; + prevVertex = i; + } + } + convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; + convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); + + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + + + + + /////////////////// + ///compound shape support + + if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + int compoundChild = concavePairs[pairIdx].w; + int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + shapeIndexB = collidables[childColIndexB].m_shapeIndex; + } + ////////////////// + + float4 c0local = convexPolyhedronA.m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + + + bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + DeltaC2, + verticesA,uniqueEdgesA,facesA,indicesA, + vertices,uniqueEdges,faces,indices, + &sepAxis,&dmin); + hasSeparatingAxis = 4; + if (!sepA) + { + hasSeparatingAxis = 0; + } else + { + bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA, + posB,ornB, + posA,ornA, + DeltaC2, + vertices,uniqueEdges,faces,indices, + verticesA,uniqueEdgesA,facesA,indicesA, + &sepAxis,&dmin); + + if (!sepB) + { + hasSeparatingAxis = 0; + } else + { + hasSeparatingAxis = 1; + } + } + + if (hasSeparatingAxis) + { + dmins[i] = dmin; + concaveSeparatingNormalsOut[pairIdx]=sepAxis; + concaveHasSeparatingNormals[i]=1; + + } else + { + //mark this pair as in-active + concavePairs[pairIdx].w = -1; + } + } + else + { + //mark this pair as in-active + concavePairs[pairIdx].w = -1; + } +} + + + + +// work-in-progress +__kernel void findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs, + __global const BodyData* rigidBodies, + __global const btCollidableGpu* collidables, + __global const ConvexPolyhedronCL* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const btGpuFace* faces, + __global const int* indices, + __global const btGpuChildShape* gpuChildShapes, + __global btAabbCL* aabbs, + __global float4* concaveSeparatingNormalsOut, + __global int* concaveHasSeparatingNormals, + __global int4* clippingFacesOut, + __global float4* worldVertsA1GPU, + __global float4* worldNormalsAGPU, + __global float4* worldVertsB1GPU, + __global float* dmins, + int vertexFaceCapacity, + int numConcavePairs + ) +{ + + int i = get_global_id(0); + if (i>=numConcavePairs) + return; + + if (!concaveHasSeparatingNormals[i]) + return; + + int pairIdx = i; + + int bodyIndexA = concavePairs[i].x; + int bodyIndexB = concavePairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + int numFacesA = convexShapes[shapeIndexA].m_numFaces; + int numActualConcaveConvexTests = 0; + + int f = concavePairs[i].z; + + bool overlap = false; + + ConvexPolyhedronCL convexPolyhedronA; + + //add 3 vertices of the triangle + convexPolyhedronA.m_numVertices = 3; + convexPolyhedronA.m_vertexOffset = 0; + float4 localCenter = make_float4(0.f,0.f,0.f,0.f); + + btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + float4 triMinAabb, triMaxAabb; + btAabbCL triAabb; + triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); + triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); + + float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + localCenter += vert; + + triAabb.m_min = min(triAabb.m_min,vert); + triAabb.m_max = max(triAabb.m_max,vert); + + } + + overlap = true; + overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; + overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; + overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; + + if (overlap) + { + float dmin = dmins[i]; + int hasSeparatingAxis=5; + float4 sepAxis=make_float4(1,2,3,4); + sepAxis = concaveSeparatingNormalsOut[pairIdx]; + + int localCC=0; + numActualConcaveConvexTests++; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = dot(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;i<numVertices;i++) + { + float4 v0 = verticesA[i]; + float4 v1 = verticesA[prevVertex]; + + float4 edgeNormal = normalize(cross(normal,v1-v0)); + float c = -dot(edgeNormal,v0); + + facesA[fidx].m_numIndices = 2; + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[curUsedIndices++]=i; + indicesA[curUsedIndices++]=prevVertex; + + facesA[fidx].m_plane.x = edgeNormal.x; + facesA[fidx].m_plane.y = edgeNormal.y; + facesA[fidx].m_plane.z = edgeNormal.z; + facesA[fidx].m_plane.w = c; + fidx++; + prevVertex = i; + } + } + convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; + convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); + + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + + + + + /////////////////// + ///compound shape support + + if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + int compoundChild = concavePairs[pairIdx].w; + int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + shapeIndexB = collidables[childColIndexB].m_shapeIndex; + } + ////////////////// + + float4 c0local = convexPolyhedronA.m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + + + { + bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + DeltaC2, + verticesA,uniqueEdgesA,facesA,indicesA, + vertices,uniqueEdges,faces,indices, + &sepAxis,&dmin); + + if (!sepEE) + { + hasSeparatingAxis = 0; + } else + { + hasSeparatingAxis = 1; + } + } + + + if (hasSeparatingAxis) + { + sepAxis.w = dmin; + dmins[i] = dmin; + concaveSeparatingNormalsOut[pairIdx]=sepAxis; + concaveHasSeparatingNormals[i]=1; + + float minDist = -1e30f; + float maxDist = 0.02f; + + + findClippingFaces(sepAxis, + &convexPolyhedronA, + &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsA1GPU, + worldNormalsAGPU, + worldVertsB1GPU, + vertexFaceCapacity, + minDist, maxDist, + verticesA, + facesA, + indicesA, + vertices, + faces, + indices, + clippingFacesOut, pairIdx); + + + } else + { + //mark this pair as in-active + concavePairs[pairIdx].w = -1; + } + } + else + { + //mark this pair as in-active + concavePairs[pairIdx].w = -1; + } + + concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points +} + diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h new file mode 100644 index 0000000000..611569cacf --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h @@ -0,0 +1,1457 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* satConcaveKernelsCL= \ +"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +"//written by Erwin Coumans\n" +"#define SHAPE_CONVEX_HULL 3\n" +"#define SHAPE_CONCAVE_TRIMESH 5\n" +"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +"#define B3_MAX_STACK_DEPTH 256\n" +"typedef unsigned int u32;\n" +"///keep this in sync with btCollidable.h\n" +"typedef struct\n" +"{\n" +" union {\n" +" int m_numChildShapes;\n" +" int m_bvhIndex;\n" +" };\n" +" union\n" +" {\n" +" float m_radius;\n" +" int m_compoundBvhIndex;\n" +" };\n" +" \n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +" \n" +"} btCollidableGpu;\n" +"#define MAX_NUM_PARTS_IN_BITS 10\n" +"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes\n" +" int m_escapeIndexOrTriangleIndex;\n" +"} b3QuantizedBvhNode;\n" +"typedef struct\n" +"{\n" +" float4 m_aabbMin;\n" +" float4 m_aabbMax;\n" +" float4 m_quantization;\n" +" int m_numNodes;\n" +" int m_numSubTrees;\n" +" int m_nodeOffset;\n" +" int m_subTreeOffset;\n" +"} b3BvhInfo;\n" +"int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +" \n" +"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes, points to the root of the subtree\n" +" int m_rootNodeIndex;\n" +" //4 bytes\n" +" int m_subtreeSize;\n" +" int m_padding[3];\n" +"} b3BvhSubtreeInfo;\n" +"typedef struct\n" +"{\n" +" float4 m_childPosition;\n" +" float4 m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"} btGpuChildShape;\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +" u32 m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} BodyData;\n" +"typedef struct \n" +"{\n" +" float4 m_localCenter;\n" +" float4 m_extents;\n" +" float4 mC;\n" +" float4 mE;\n" +" \n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"} ConvexPolyhedronCL;\n" +"typedef struct \n" +"{\n" +" union\n" +" {\n" +" float4 m_min;\n" +" float m_minElems[4];\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float4 m_max;\n" +" float m_maxElems[4];\n" +" int m_maxIndices[4];\n" +" };\n" +"} btAabbCL;\n" +"#ifndef B3_AABB_H\n" +"#define B3_AABB_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +"#define B3_LARGE_FLOAT 1e18f\n" +"#define B3_INFINITY 1e18f\n" +"#define b3Assert(a)\n" +"#define b3ConstArray(a) __global const a*\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" +"#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" +"#define B3_STATIC\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" +"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +"{\n" +" float maxDot = -B3_INFINITY;\n" +" int i = 0;\n" +" int ptIndex = -1;\n" +" for( i = 0; i < vecLen; i++ )\n" +" {\n" +" float dot = b3Dot3F4(vecArray[i],vec);\n" +" \n" +" if( dot > maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" +"{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +"{\n" +" b3Quat q;\n" +" q=in;\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" +"}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"typedef struct\n" +"{\n" +" b3Float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3Aabb b3Aabb_t;\n" +"struct b3Aabb\n" +"{\n" +" union\n" +" {\n" +" float m_min[4];\n" +" b3Float4 m_minVec;\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float m_max[4];\n" +" b3Float4 m_maxVec;\n" +" int m_signedMaxIndices[4];\n" +" };\n" +"};\n" +"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +" b3Float4ConstArg pos,\n" +" b3QuatConstArg orn,\n" +" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +"{\n" +" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +" b3Mat3x3 m;\n" +" m = b3QuatGetRotationMatrix(orn);\n" +" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +" \n" +" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +" 0.f);\n" +" *aabbMinOut = center-extent;\n" +" *aabbMaxOut = center+extent;\n" +"}\n" +"/// conservative test for overlap between two aabbs\n" +"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +"{\n" +" bool overlap = true;\n" +" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +" return overlap;\n" +"}\n" +"#endif //B3_AABB_H\n" +"/*\n" +"Bullet Continuous Collision Detection and Physics Library\n" +"Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" +"This software is provided 'as-is', without any express or implied warranty.\n" +"In no event will the authors be held liable for any damages arising from the use of this software.\n" +"Permission is granted to anyone to use this software for any purpose,\n" +"including commercial applications, and to alter it and redistribute it freely,\n" +"subject to the following restrictions:\n" +"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +"3. This notice may not be removed or altered from any source distribution.\n" +"*/\n" +"#ifndef B3_INT2_H\n" +"#define B3_INT2_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define b3UnsignedInt2 uint2\n" +"#define b3Int2 int2\n" +"#define b3MakeInt2 (int2)\n" +"#endif //__cplusplus\n" +"#endif\n" +"typedef struct\n" +"{\n" +" float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +"} btGpuFace;\n" +"#define make_float4 (float4)\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +" \n" +"// float4 a1 = make_float4(a.xyz,0.f);\n" +"// float4 b1 = make_float4(b.xyz,0.f);\n" +"// return cross(a1,b1);\n" +"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" +" \n" +" // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" +" \n" +" //return c;\n" +"}\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +"float4 fastNormalize4(float4 v)\n" +"{\n" +" v = make_float4(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"typedef float4 Quaternion;\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fastNormalize4(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"__inline\n" +"float4 normalize3(const float4 a)\n" +"{\n" +" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +" return fastNormalize4( n );\n" +"}\n" +"inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;i<numVerts;i++)\n" +" {\n" +" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;i<numVerts;i++)\n" +" {\n" +" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA,const float4 ornA,\n" +" const float4 posB,const float4 ornB,\n" +" float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +"{\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +" project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +" if(Max0<Min1 || Max1<Min0)\n" +" return false;\n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" *depth = d0<d1 ? d0:d1;\n" +" return true;\n" +"}\n" +"inline bool IsAlmostZero(const float4 v)\n" +"{\n" +" if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +" return false;\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" \n" +" const float4* verticesA, \n" +" const float4* uniqueEdgesA, \n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB, \n" +" __global const float4* uniqueEdgesB, \n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;i<numFacesA;i++)\n" +" {\n" +" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS*=-1.f;\n" +" curPlaneTests++;\n" +" float d;\n" +" if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +" return false;\n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* verticesA, \n" +" __global const float4* uniqueEdgesA, \n" +" __global const btGpuFace* facesA,\n" +" __global const int* indicesA,\n" +" const float4* verticesB,\n" +" const float4* uniqueEdgesB, \n" +" const btGpuFace* facesB,\n" +" const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;i<numFacesA;i++)\n" +" {\n" +" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS *= -1.f;\n" +" curPlaneTests++;\n" +" float d;\n" +" if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +" return false;\n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" const float4* verticesA, \n" +" const float4* uniqueEdgesA, \n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB, \n" +" __global const float4* uniqueEdgesB, \n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" int curEdgeEdge = 0;\n" +" // Test edges\n" +" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +" {\n" +" const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +" float4 edge0World = qtRotate(ornA,edge0);\n" +" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +" {\n" +" const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +" float4 edge1World = qtRotate(ornB,edge1);\n" +" float4 crossje = cross3(edge0World,edge1World);\n" +" curEdgeEdge++;\n" +" if(!IsAlmostZero(crossje))\n" +" {\n" +" crossje = normalize3(crossje);\n" +" if (dot3F4(DeltaC2,crossje)<0)\n" +" crossje *= -1.f;\n" +" float dist;\n" +" bool result = true;\n" +" {\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +" \n" +" if(Max0<Min1 || Max1<Min0)\n" +" result = false;\n" +" \n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" dist = d0<d1 ? d0:d1;\n" +" result = true;\n" +" }\n" +" \n" +" if(dist<*dmin)\n" +" {\n" +" *dmin = dist;\n" +" *sep = crossje;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" \n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"inline int findClippingFaces(const float4 separatingNormal,\n" +" const ConvexPolyhedronCL* hullA, \n" +" __global const ConvexPolyhedronCL* hullB,\n" +" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +" __global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" int capacityWorldVerts,\n" +" const float minDist, float maxDist,\n" +" const float4* verticesA,\n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB,\n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" __global int4* clippingFaces, int pairIndex)\n" +"{\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +" \n" +" \n" +" int closestFaceB=0;\n" +" float dmax = -FLT_MAX;\n" +" \n" +" {\n" +" for(int face=0;face<hullB->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" \n" +" {\n" +" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +" int numVertices = polyB.m_numIndices;\n" +" if (numVertices>capacityWorldVerts)\n" +" numVertices = capacityWorldVerts;\n" +" if (numVertices<0)\n" +" numVertices = 0;\n" +" \n" +" for(int e0=0;e0<numVertices;e0++)\n" +" {\n" +" if (e0<capacityWorldVerts)\n" +" {\n" +" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" }\n" +" \n" +" int closestFaceA=0;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;face<hullA->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" facesA[hullA->m_faceOffset+face].m_plane.x,\n" +" facesA[hullA->m_faceOffset+face].m_plane.y,\n" +" facesA[hullA->m_faceOffset+face].m_plane.z,\n" +" 0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" worldNormalsA1[pairIndex] = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" \n" +" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +" if (numVerticesA>capacityWorldVerts)\n" +" numVerticesA = capacityWorldVerts;\n" +" if (numVerticesA<0)\n" +" numVerticesA=0;\n" +" \n" +" for(int e0=0;e0<numVerticesA;e0++)\n" +" {\n" +" if (e0<capacityWorldVerts)\n" +" {\n" +" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +" }\n" +" }\n" +" \n" +" clippingFaces[pairIndex].x = closestFaceA;\n" +" clippingFaces[pairIndex].y = closestFaceB;\n" +" clippingFaces[pairIndex].z = numVerticesA;\n" +" clippingFaces[pairIndex].w = numWorldVertsB1;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" +"// work-in-progress\n" +"__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" +" __global const BodyData* rigidBodies,\n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes,\n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global btAabbCL* aabbs,\n" +" __global float4* concaveSeparatingNormalsOut,\n" +" __global int* concaveHasSeparatingNormals,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1GPU,\n" +" __global float4* worldNormalsAGPU,\n" +" __global float4* worldVertsB1GPU,\n" +" __global float* dmins,\n" +" int vertexFaceCapacity,\n" +" int numConcavePairs\n" +" )\n" +"{\n" +" \n" +" int i = get_global_id(0);\n" +" if (i>=numConcavePairs)\n" +" return;\n" +" \n" +" concaveHasSeparatingNormals[i] = 0;\n" +" \n" +" int pairIdx = i;\n" +" \n" +" int bodyIndexA = concavePairs[i].x;\n" +" int bodyIndexB = concavePairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" +" collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" concavePairs[pairIdx].w = -1;\n" +" return;\n" +" }\n" +" \n" +" \n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" int numActualConcaveConvexTests = 0;\n" +" \n" +" int f = concavePairs[i].z;\n" +" \n" +" bool overlap = false;\n" +" \n" +" ConvexPolyhedronCL convexPolyhedronA;\n" +" \n" +" //add 3 vertices of the triangle\n" +" convexPolyhedronA.m_numVertices = 3;\n" +" convexPolyhedronA.m_vertexOffset = 0;\n" +" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +" \n" +" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +" float4 triMinAabb, triMaxAabb;\n" +" btAabbCL triAabb;\n" +" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +" \n" +" float4 verticesA[3];\n" +" for (int i=0;i<3;i++)\n" +" {\n" +" int index = indices[face.m_indexOffset+i];\n" +" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +" verticesA[i] = vert;\n" +" localCenter += vert;\n" +" \n" +" triAabb.m_min = min(triAabb.m_min,vert);\n" +" triAabb.m_max = max(triAabb.m_max,vert);\n" +" \n" +" }\n" +" \n" +" overlap = true;\n" +" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +" \n" +" if (overlap)\n" +" {\n" +" float dmin = FLT_MAX;\n" +" int hasSeparatingAxis=5;\n" +" float4 sepAxis=make_float4(1,2,3,4);\n" +" \n" +" int localCC=0;\n" +" numActualConcaveConvexTests++;\n" +" \n" +" //a triangle has 3 unique edges\n" +" convexPolyhedronA.m_numUniqueEdges = 3;\n" +" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +" float4 uniqueEdgesA[3];\n" +" \n" +" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +" \n" +" \n" +" convexPolyhedronA.m_faceOffset = 0;\n" +" \n" +" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +" \n" +" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +" int indicesA[3+3+2+2+2];\n" +" int curUsedIndices=0;\n" +" int fidx=0;\n" +" \n" +" //front size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[0] = 0;\n" +" indicesA[1] = 1;\n" +" indicesA[2] = 2;\n" +" curUsedIndices+=3;\n" +" float c = face.m_plane.w;\n" +" facesA[fidx].m_plane.x = normal.x;\n" +" facesA[fidx].m_plane.y = normal.y;\n" +" facesA[fidx].m_plane.z = normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" //back size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[3]=2;\n" +" indicesA[4]=1;\n" +" indicesA[5]=0;\n" +" curUsedIndices+=3;\n" +" float c = dot(normal,verticesA[0]);\n" +" float c1 = -face.m_plane.w;\n" +" facesA[fidx].m_plane.x = -normal.x;\n" +" facesA[fidx].m_plane.y = -normal.y;\n" +" facesA[fidx].m_plane.z = -normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" \n" +" bool addEdgePlanes = true;\n" +" if (addEdgePlanes)\n" +" {\n" +" int numVertices=3;\n" +" int prevVertex = numVertices-1;\n" +" for (int i=0;i<numVertices;i++)\n" +" {\n" +" float4 v0 = verticesA[i];\n" +" float4 v1 = verticesA[prevVertex];\n" +" \n" +" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +" float c = -dot(edgeNormal,v0);\n" +" \n" +" facesA[fidx].m_numIndices = 2;\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[curUsedIndices++]=i;\n" +" indicesA[curUsedIndices++]=prevVertex;\n" +" \n" +" facesA[fidx].m_plane.x = edgeNormal.x;\n" +" facesA[fidx].m_plane.y = edgeNormal.y;\n" +" facesA[fidx].m_plane.z = edgeNormal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" fidx++;\n" +" prevVertex = i;\n" +" }\n" +" }\n" +" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +" \n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" \n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" \n" +" \n" +" \n" +" \n" +" ///////////////////\n" +" ///compound shape support\n" +" \n" +" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" int compoundChild = concavePairs[pairIdx].w;\n" +" int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +" }\n" +" //////////////////\n" +" \n" +" float4 c0local = convexPolyhedronA.m_localCenter;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" \n" +" \n" +" bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" verticesA,uniqueEdgesA,facesA,indicesA,\n" +" vertices,uniqueEdges,faces,indices,\n" +" &sepAxis,&dmin);\n" +" hasSeparatingAxis = 4;\n" +" if (!sepA)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA,\n" +" posB,ornB,\n" +" posA,ornA,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,indices,\n" +" verticesA,uniqueEdgesA,facesA,indicesA,\n" +" &sepAxis,&dmin);\n" +" \n" +" if (!sepB)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis = 1;\n" +" }\n" +" } \n" +" \n" +" if (hasSeparatingAxis)\n" +" {\n" +" dmins[i] = dmin;\n" +" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +" concaveHasSeparatingNormals[i]=1;\n" +" \n" +" } else\n" +" { \n" +" //mark this pair as in-active\n" +" concavePairs[pairIdx].w = -1;\n" +" }\n" +" }\n" +" else\n" +" { \n" +" //mark this pair as in-active\n" +" concavePairs[pairIdx].w = -1;\n" +" }\n" +"}\n" +"// work-in-progress\n" +"__kernel void findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" +" __global const BodyData* rigidBodies,\n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes,\n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global btAabbCL* aabbs,\n" +" __global float4* concaveSeparatingNormalsOut,\n" +" __global int* concaveHasSeparatingNormals,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1GPU,\n" +" __global float4* worldNormalsAGPU,\n" +" __global float4* worldVertsB1GPU,\n" +" __global float* dmins,\n" +" int vertexFaceCapacity,\n" +" int numConcavePairs\n" +" )\n" +"{\n" +" \n" +" int i = get_global_id(0);\n" +" if (i>=numConcavePairs)\n" +" return;\n" +" \n" +" if (!concaveHasSeparatingNormals[i])\n" +" return;\n" +" \n" +" int pairIdx = i;\n" +" \n" +" int bodyIndexA = concavePairs[i].x;\n" +" int bodyIndexB = concavePairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" int numActualConcaveConvexTests = 0;\n" +" \n" +" int f = concavePairs[i].z;\n" +" \n" +" bool overlap = false;\n" +" \n" +" ConvexPolyhedronCL convexPolyhedronA;\n" +" \n" +" //add 3 vertices of the triangle\n" +" convexPolyhedronA.m_numVertices = 3;\n" +" convexPolyhedronA.m_vertexOffset = 0;\n" +" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +" \n" +" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +" float4 triMinAabb, triMaxAabb;\n" +" btAabbCL triAabb;\n" +" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +" \n" +" float4 verticesA[3];\n" +" for (int i=0;i<3;i++)\n" +" {\n" +" int index = indices[face.m_indexOffset+i];\n" +" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +" verticesA[i] = vert;\n" +" localCenter += vert;\n" +" \n" +" triAabb.m_min = min(triAabb.m_min,vert);\n" +" triAabb.m_max = max(triAabb.m_max,vert);\n" +" \n" +" }\n" +" \n" +" overlap = true;\n" +" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +" \n" +" if (overlap)\n" +" {\n" +" float dmin = dmins[i];\n" +" int hasSeparatingAxis=5;\n" +" float4 sepAxis=make_float4(1,2,3,4);\n" +" sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" +" \n" +" int localCC=0;\n" +" numActualConcaveConvexTests++;\n" +" \n" +" //a triangle has 3 unique edges\n" +" convexPolyhedronA.m_numUniqueEdges = 3;\n" +" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +" float4 uniqueEdgesA[3];\n" +" \n" +" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +" \n" +" \n" +" convexPolyhedronA.m_faceOffset = 0;\n" +" \n" +" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +" \n" +" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +" int indicesA[3+3+2+2+2];\n" +" int curUsedIndices=0;\n" +" int fidx=0;\n" +" \n" +" //front size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[0] = 0;\n" +" indicesA[1] = 1;\n" +" indicesA[2] = 2;\n" +" curUsedIndices+=3;\n" +" float c = face.m_plane.w;\n" +" facesA[fidx].m_plane.x = normal.x;\n" +" facesA[fidx].m_plane.y = normal.y;\n" +" facesA[fidx].m_plane.z = normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" //back size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[3]=2;\n" +" indicesA[4]=1;\n" +" indicesA[5]=0;\n" +" curUsedIndices+=3;\n" +" float c = dot(normal,verticesA[0]);\n" +" float c1 = -face.m_plane.w;\n" +" facesA[fidx].m_plane.x = -normal.x;\n" +" facesA[fidx].m_plane.y = -normal.y;\n" +" facesA[fidx].m_plane.z = -normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" \n" +" bool addEdgePlanes = true;\n" +" if (addEdgePlanes)\n" +" {\n" +" int numVertices=3;\n" +" int prevVertex = numVertices-1;\n" +" for (int i=0;i<numVertices;i++)\n" +" {\n" +" float4 v0 = verticesA[i];\n" +" float4 v1 = verticesA[prevVertex];\n" +" \n" +" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +" float c = -dot(edgeNormal,v0);\n" +" \n" +" facesA[fidx].m_numIndices = 2;\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[curUsedIndices++]=i;\n" +" indicesA[curUsedIndices++]=prevVertex;\n" +" \n" +" facesA[fidx].m_plane.x = edgeNormal.x;\n" +" facesA[fidx].m_plane.y = edgeNormal.y;\n" +" facesA[fidx].m_plane.z = edgeNormal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" fidx++;\n" +" prevVertex = i;\n" +" }\n" +" }\n" +" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +" \n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" \n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" \n" +" \n" +" \n" +" \n" +" ///////////////////\n" +" ///compound shape support\n" +" \n" +" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" int compoundChild = concavePairs[pairIdx].w;\n" +" int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +" }\n" +" //////////////////\n" +" \n" +" float4 c0local = convexPolyhedronA.m_localCenter;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" \n" +" \n" +" {\n" +" bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" verticesA,uniqueEdgesA,facesA,indicesA,\n" +" vertices,uniqueEdges,faces,indices,\n" +" &sepAxis,&dmin);\n" +" \n" +" if (!sepEE)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis = 1;\n" +" }\n" +" }\n" +" \n" +" \n" +" if (hasSeparatingAxis)\n" +" {\n" +" sepAxis.w = dmin;\n" +" dmins[i] = dmin;\n" +" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +" concaveHasSeparatingNormals[i]=1;\n" +" \n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" \n" +" findClippingFaces(sepAxis,\n" +" &convexPolyhedronA,\n" +" &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsA1GPU,\n" +" worldNormalsAGPU,\n" +" worldVertsB1GPU,\n" +" vertexFaceCapacity,\n" +" minDist, maxDist,\n" +" verticesA,\n" +" facesA,\n" +" indicesA,\n" +" vertices,\n" +" faces,\n" +" indices,\n" +" clippingFacesOut, pairIdx);\n" +" \n" +" \n" +" } else\n" +" { \n" +" //mark this pair as in-active\n" +" concavePairs[pairIdx].w = -1;\n" +" }\n" +" }\n" +" else\n" +" { \n" +" //mark this pair as in-active\n" +" concavePairs[pairIdx].w = -1;\n" +" }\n" +" \n" +" concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" +"}\n" +; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h new file mode 100644 index 0000000000..6f8b0a90db --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h @@ -0,0 +1,2104 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* satKernelsCL= \ +"//keep this enum in sync with the CPU version (in btCollidable.h)\n" +"//written by Erwin Coumans\n" +"#define SHAPE_CONVEX_HULL 3\n" +"#define SHAPE_CONCAVE_TRIMESH 5\n" +"#define TRIANGLE_NUM_CONVEX_FACES 5\n" +"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +"#define B3_MAX_STACK_DEPTH 256\n" +"typedef unsigned int u32;\n" +"///keep this in sync with btCollidable.h\n" +"typedef struct\n" +"{\n" +" union {\n" +" int m_numChildShapes;\n" +" int m_bvhIndex;\n" +" };\n" +" union\n" +" {\n" +" float m_radius;\n" +" int m_compoundBvhIndex;\n" +" };\n" +" \n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +" \n" +"} btCollidableGpu;\n" +"#define MAX_NUM_PARTS_IN_BITS 10\n" +"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" +"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes\n" +" int m_escapeIndexOrTriangleIndex;\n" +"} b3QuantizedBvhNode;\n" +"typedef struct\n" +"{\n" +" float4 m_aabbMin;\n" +" float4 m_aabbMax;\n" +" float4 m_quantization;\n" +" int m_numNodes;\n" +" int m_numSubTrees;\n" +" int m_nodeOffset;\n" +" int m_subTreeOffset;\n" +"} b3BvhInfo;\n" +"int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" unsigned int x=0;\n" +" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" +" // Get only the lower bits where the triangle index is stored\n" +" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" +"}\n" +"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" +" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" +"}\n" +" \n" +"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" +"{\n" +" return -rootNode->m_escapeIndexOrTriangleIndex;\n" +"}\n" +"typedef struct\n" +"{\n" +" //12 bytes\n" +" unsigned short int m_quantizedAabbMin[3];\n" +" unsigned short int m_quantizedAabbMax[3];\n" +" //4 bytes, points to the root of the subtree\n" +" int m_rootNodeIndex;\n" +" //4 bytes\n" +" int m_subtreeSize;\n" +" int m_padding[3];\n" +"} b3BvhSubtreeInfo;\n" +"typedef struct\n" +"{\n" +" float4 m_childPosition;\n" +" float4 m_childOrientation;\n" +" int m_shapeIndex;\n" +" int m_unused0;\n" +" int m_unused1;\n" +" int m_unused2;\n" +"} btGpuChildShape;\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +" u32 m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} BodyData;\n" +"typedef struct \n" +"{\n" +" float4 m_localCenter;\n" +" float4 m_extents;\n" +" float4 mC;\n" +" float4 mE;\n" +" \n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"} ConvexPolyhedronCL;\n" +"typedef struct \n" +"{\n" +" union\n" +" {\n" +" float4 m_min;\n" +" float m_minElems[4];\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float4 m_max;\n" +" float m_maxElems[4];\n" +" int m_maxIndices[4];\n" +" };\n" +"} btAabbCL;\n" +"#ifndef B3_AABB_H\n" +"#define B3_AABB_H\n" +"#ifndef B3_FLOAT4_H\n" +"#define B3_FLOAT4_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#define B3_PLATFORM_DEFINITIONS_H\n" +"struct MyTest\n" +"{\n" +" int bla;\n" +"};\n" +"#ifdef __cplusplus\n" +"#else\n" +"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" +"#define B3_LARGE_FLOAT 1e18f\n" +"#define B3_INFINITY 1e18f\n" +"#define b3Assert(a)\n" +"#define b3ConstArray(a) __global const a*\n" +"#define b3AtomicInc atomic_inc\n" +"#define b3AtomicAdd atomic_add\n" +"#define b3Fabs fabs\n" +"#define b3Sqrt native_sqrt\n" +"#define b3Sin native_sin\n" +"#define b3Cos native_cos\n" +"#define B3_STATIC\n" +"#endif\n" +"#endif\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Float4;\n" +" #define b3Float4ConstArg const b3Float4\n" +" #define b3MakeFloat4 (float4)\n" +" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return dot(a1, b1);\n" +" }\n" +" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" +" {\n" +" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" +" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" +" return cross(a1, b1);\n" +" }\n" +" #define b3MinFloat4 min\n" +" #define b3MaxFloat4 max\n" +" #define b3Normalized(a) normalize(a)\n" +"#endif \n" +" \n" +"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" +"{\n" +" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" +" return false;\n" +" return true;\n" +"}\n" +"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" +"{\n" +" float maxDot = -B3_INFINITY;\n" +" int i = 0;\n" +" int ptIndex = -1;\n" +" for( i = 0; i < vecLen; i++ )\n" +" {\n" +" float dot = b3Dot3F4(vecArray[i],vec);\n" +" \n" +" if( dot > maxDot )\n" +" {\n" +" maxDot = dot;\n" +" ptIndex = i;\n" +" }\n" +" }\n" +" b3Assert(ptIndex>=0);\n" +" if (ptIndex<0)\n" +" {\n" +" ptIndex = 0;\n" +" }\n" +" *dotOut = maxDot;\n" +" return ptIndex;\n" +"}\n" +"#endif //B3_FLOAT4_H\n" +"#ifndef B3_MAT3x3_H\n" +"#define B3_MAT3x3_H\n" +"#ifndef B3_QUAT_H\n" +"#define B3_QUAT_H\n" +"#ifndef B3_PLATFORM_DEFINITIONS_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif\n" +"#endif\n" +"#ifndef B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#endif \n" +"#endif //B3_FLOAT4_H\n" +"#ifdef __cplusplus\n" +"#else\n" +" typedef float4 b3Quat;\n" +" #define b3QuatConstArg const b3Quat\n" +" \n" +" \n" +"inline float4 b3FastNormalize4(float4 v)\n" +"{\n" +" v = (float4)(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +" \n" +"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" +"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" +"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" +"{\n" +" b3Quat ans;\n" +" ans = b3Cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" +"{\n" +" b3Quat q;\n" +" q=in;\n" +" //return b3FastNormalize4(in);\n" +" float len = native_sqrt(dot(q, q));\n" +" if(len > 0.f)\n" +" {\n" +" q *= 1.f / len;\n" +" }\n" +" else\n" +" {\n" +" q.x = q.y = q.z = 0.f;\n" +" q.w = 1.f;\n" +" }\n" +" return q;\n" +"}\n" +"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" b3Quat qInv = b3QuatInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" +"{\n" +" return (b3Quat)(-q.xyz, q.w);\n" +"}\n" +"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" +"{\n" +" return b3QuatRotate( b3QuatInvert( q ), vec );\n" +"}\n" +"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" +"{\n" +" return b3QuatRotate( orientation, point ) + (translation);\n" +"}\n" +" \n" +"#endif \n" +"#endif //B3_QUAT_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"typedef struct\n" +"{\n" +" b3Float4 m_row[3];\n" +"}b3Mat3x3;\n" +"#define b3Mat3x3ConstArg const b3Mat3x3\n" +"#define b3GetRow(m,row) (m.m_row[row])\n" +"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" +"{\n" +" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" +" b3Mat3x3 out;\n" +" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" +" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" +" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" +" out.m_row[0].w = 0.f;\n" +" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" +" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" +" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" +" out.m_row[1].w = 0.f;\n" +" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" +" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" +" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" +" out.m_row[2].w = 0.f;\n" +" return out;\n" +"}\n" +"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = fabs(matIn.m_row[0]);\n" +" out.m_row[1] = fabs(matIn.m_row[1]);\n" +" out.m_row[2] = fabs(matIn.m_row[2]);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtZero();\n" +"__inline\n" +"b3Mat3x3 mtIdentity();\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" +"__inline\n" +"b3Mat3x3 mtZero()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(0.f);\n" +" m.m_row[1] = (b3Float4)(0.f);\n" +" m.m_row[2] = (b3Float4)(0.f);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtIdentity()\n" +"{\n" +" b3Mat3x3 m;\n" +" m.m_row[0] = (b3Float4)(1,0,0,0);\n" +" m.m_row[1] = (b3Float4)(0,1,0,0);\n" +" m.m_row[2] = (b3Float4)(0,0,1,0);\n" +" return m;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" +"{\n" +" b3Mat3x3 out;\n" +" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" +" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" +" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" +" return out;\n" +"}\n" +"__inline\n" +"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" +"{\n" +" b3Mat3x3 transB;\n" +" transB = mtTranspose( b );\n" +" b3Mat3x3 ans;\n" +" // why this doesn't run when 0ing in the for{}\n" +" a.m_row[0].w = 0.f;\n" +" a.m_row[1].w = 0.f;\n" +" a.m_row[2].w = 0.f;\n" +" for(int i=0; i<3; i++)\n" +" {\n" +"// a.m_row[i].w = 0.f;\n" +" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" +" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" +" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" +" ans.m_row[i].w = 0.f;\n" +" }\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" +"{\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a.m_row[0], b );\n" +" ans.y = b3Dot3F4( a.m_row[1], b );\n" +" ans.z = b3Dot3F4( a.m_row[2], b );\n" +" ans.w = 0.f;\n" +" return ans;\n" +"}\n" +"__inline\n" +"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" +"{\n" +" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" +" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" +" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" +" b3Float4 ans;\n" +" ans.x = b3Dot3F4( a, colx );\n" +" ans.y = b3Dot3F4( a, coly );\n" +" ans.z = b3Dot3F4( a, colz );\n" +" return ans;\n" +"}\n" +"#endif\n" +"#endif //B3_MAT3x3_H\n" +"typedef struct b3Aabb b3Aabb_t;\n" +"struct b3Aabb\n" +"{\n" +" union\n" +" {\n" +" float m_min[4];\n" +" b3Float4 m_minVec;\n" +" int m_minIndices[4];\n" +" };\n" +" union\n" +" {\n" +" float m_max[4];\n" +" b3Float4 m_maxVec;\n" +" int m_signedMaxIndices[4];\n" +" };\n" +"};\n" +"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" +" b3Float4ConstArg pos,\n" +" b3QuatConstArg orn,\n" +" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" +"{\n" +" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" +" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" +" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" +" b3Mat3x3 m;\n" +" m = b3QuatGetRotationMatrix(orn);\n" +" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" +" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" +" \n" +" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" +" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" +" 0.f);\n" +" *aabbMinOut = center-extent;\n" +" *aabbMaxOut = center+extent;\n" +"}\n" +"/// conservative test for overlap between two aabbs\n" +"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" +" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" +"{\n" +" bool overlap = true;\n" +" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" +" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" +" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" +" return overlap;\n" +"}\n" +"#endif //B3_AABB_H\n" +"/*\n" +"Bullet Continuous Collision Detection and Physics Library\n" +"Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" +"This software is provided 'as-is', without any express or implied warranty.\n" +"In no event will the authors be held liable for any damages arising from the use of this software.\n" +"Permission is granted to anyone to use this software for any purpose,\n" +"including commercial applications, and to alter it and redistribute it freely,\n" +"subject to the following restrictions:\n" +"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" +"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" +"3. This notice may not be removed or altered from any source distribution.\n" +"*/\n" +"#ifndef B3_INT2_H\n" +"#define B3_INT2_H\n" +"#ifdef __cplusplus\n" +"#else\n" +"#define b3UnsignedInt2 uint2\n" +"#define b3Int2 int2\n" +"#define b3MakeInt2 (int2)\n" +"#endif //__cplusplus\n" +"#endif\n" +"typedef struct\n" +"{\n" +" float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +"} btGpuFace;\n" +"#define make_float4 (float4)\n" +"__inline\n" +"float4 cross3(float4 a, float4 b)\n" +"{\n" +" return cross(a,b);\n" +" \n" +"// float4 a1 = make_float4(a.xyz,0.f);\n" +"// float4 b1 = make_float4(b.xyz,0.f);\n" +"// return cross(a1,b1);\n" +"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" +" \n" +" // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" +" \n" +" //return c;\n" +"}\n" +"__inline\n" +"float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = make_float4(a.xyz,0.f);\n" +" float4 b1 = make_float4(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +"float4 fastNormalize4(float4 v)\n" +"{\n" +" v = make_float4(v.xyz,0.f);\n" +" return fast_normalize(v);\n" +"}\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"typedef float4 Quaternion;\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b);\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in);\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec);\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q);\n" +"__inline\n" +"Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross3( a, b );\n" +" ans += a.w*b+b.w*a;\n" +"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +"Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fastNormalize4(in);\n" +"// in /= length( in );\n" +"// return in;\n" +"}\n" +"__inline\n" +"float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +"Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +"float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"__inline\n" +"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" +"{\n" +" return qtRotate( *orientation, *p ) + (*translation);\n" +"}\n" +"__inline\n" +"float4 normalize3(const float4 a)\n" +"{\n" +" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" +" return fastNormalize4( n );\n" +"}\n" +"inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;i<numVerts;i++)\n" +" {\n" +" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" +"const float4* dir, __global const float4* vertices, float* min, float* max)\n" +"{\n" +" min[0] = FLT_MAX;\n" +" max[0] = -FLT_MAX;\n" +" int numVerts = hull->m_numVertices;\n" +" const float4 localDir = qtInvRotate(orn,*dir);\n" +" float offset = dot(pos,*dir);\n" +" for(int i=0;i<numVerts;i++)\n" +" {\n" +" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" +" if(dp < min[0]) \n" +" min[0] = dp;\n" +" if(dp > max[0]) \n" +" max[0] = dp;\n" +" }\n" +" if(min[0]>max[0])\n" +" {\n" +" float tmp = min[0];\n" +" min[0] = max[0];\n" +" max[0] = tmp;\n" +" }\n" +" min[0] += offset;\n" +" max[0] += offset;\n" +"}\n" +"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA,const float4 ornA,\n" +" const float4 posB,const float4 ornB,\n" +" float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" +"{\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" +" project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" +" if(Max0<Min1 || Max1<Min0)\n" +" return false;\n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" *depth = d0<d1 ? d0:d1;\n" +" return true;\n" +"}\n" +"inline bool IsAlmostZero(const float4 v)\n" +"{\n" +" if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" +" return false;\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" \n" +" const float4* verticesA, \n" +" const float4* uniqueEdgesA, \n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB, \n" +" __global const float4* uniqueEdgesB, \n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;i<numFacesA;i++)\n" +" {\n" +" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS*=-1.f;\n" +" curPlaneTests++;\n" +" float d;\n" +" if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" +" return false;\n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* verticesA, \n" +" __global const float4* uniqueEdgesA, \n" +" __global const btGpuFace* facesA,\n" +" __global const int* indicesA,\n" +" const float4* verticesB,\n" +" const float4* uniqueEdgesB, \n" +" const btGpuFace* facesB,\n" +" const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;i<numFacesA;i++)\n" +" {\n" +" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS *= -1.f;\n" +" curPlaneTests++;\n" +" float d;\n" +" if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" +" return false;\n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" const float4* verticesA, \n" +" const float4* uniqueEdgesA, \n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB, \n" +" __global const float4* uniqueEdgesB, \n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" int curEdgeEdge = 0;\n" +" // Test edges\n" +" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +" {\n" +" const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" +" float4 edge0World = qtRotate(ornA,edge0);\n" +" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +" {\n" +" const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" +" float4 edge1World = qtRotate(ornB,edge1);\n" +" float4 crossje = cross3(edge0World,edge1World);\n" +" curEdgeEdge++;\n" +" if(!IsAlmostZero(crossje))\n" +" {\n" +" crossje = normalize3(crossje);\n" +" if (dot3F4(DeltaC2,crossje)<0)\n" +" crossje *= -1.f;\n" +" float dist;\n" +" bool result = true;\n" +" {\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" +" \n" +" if(Max0<Min1 || Max1<Min0)\n" +" result = false;\n" +" \n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" dist = d0<d1 ? d0:d1;\n" +" result = true;\n" +" }\n" +" \n" +" if(dist<*dmin)\n" +" {\n" +" *dmin = dist;\n" +" *sep = crossje;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" \n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA,const float4 ornA,\n" +" const float4 posB,const float4 ornB,\n" +" float4* sep_axis, __global const float4* vertices,float* depth)\n" +"{\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" +" if(Max0<Min1 || Max1<Min0)\n" +" return false;\n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" *depth = d0<d1 ? d0:d1;\n" +" return true;\n" +"}\n" +"bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* vertices, \n" +" __global const float4* uniqueEdges, \n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" \n" +" int curPlaneTests=0;\n" +" {\n" +" int numFacesA = hullA->m_numFaces;\n" +" // Test normals from hullA\n" +" for(int i=0;i<numFacesA;i++)\n" +" {\n" +" const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" +" float4 faceANormalWS = qtRotate(ornA,normal);\n" +" \n" +" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" +" faceANormalWS*=-1.f;\n" +" \n" +" curPlaneTests++;\n" +" \n" +" float d;\n" +" if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" +" return false;\n" +" \n" +" if(d<*dmin)\n" +" {\n" +" *dmin = d;\n" +" *sep = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" \n" +" return true;\n" +"}\n" +"bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* vertices,\n" +" __global const float4* unitSphereDirections,\n" +" int numUnitSphereDirections,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" int curEdgeEdge = 0;\n" +" // Test unit sphere directions\n" +" for (int i=0;i<numUnitSphereDirections;i++)\n" +" {\n" +" float4 crossje;\n" +" crossje = unitSphereDirections[i]; \n" +" if (dot3F4(DeltaC2,crossje)>0)\n" +" crossje *= -1.f;\n" +" {\n" +" float dist;\n" +" bool result = true;\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +" \n" +" if(Max0<Min1 || Max1<Min0)\n" +" return false;\n" +" \n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" dist = d0<d1 ? d0:d1;\n" +" result = true;\n" +" \n" +" if(dist<*dmin)\n" +" {\n" +" *dmin = dist;\n" +" *sep = crossje;\n" +" }\n" +" }\n" +" }\n" +" \n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" +" const float4 posA1,\n" +" const float4 ornA,\n" +" const float4 posB1,\n" +" const float4 ornB,\n" +" const float4 DeltaC2,\n" +" __global const float4* vertices, \n" +" __global const float4* uniqueEdges, \n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" float4* sep,\n" +" float* dmin)\n" +"{\n" +" \n" +" float4 posA = posA1;\n" +" posA.w = 0.f;\n" +" float4 posB = posB1;\n" +" posB.w = 0.f;\n" +" int curPlaneTests=0;\n" +" int curEdgeEdge = 0;\n" +" // Test edges\n" +" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" +" {\n" +" const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" +" float4 edge0World = qtRotate(ornA,edge0);\n" +" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" +" {\n" +" const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" +" float4 edge1World = qtRotate(ornB,edge1);\n" +" float4 crossje = cross3(edge0World,edge1World);\n" +" curEdgeEdge++;\n" +" if(!IsAlmostZero(crossje))\n" +" {\n" +" crossje = normalize3(crossje);\n" +" if (dot3F4(DeltaC2,crossje)<0)\n" +" crossje*=-1.f;\n" +" \n" +" float dist;\n" +" bool result = true;\n" +" {\n" +" float Min0,Max0;\n" +" float Min1,Max1;\n" +" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" +" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" +" \n" +" if(Max0<Min1 || Max1<Min0)\n" +" return false;\n" +" \n" +" float d0 = Max0 - Min1;\n" +" float d1 = Max1 - Min0;\n" +" dist = d0<d1 ? d0:d1;\n" +" result = true;\n" +" }\n" +" \n" +" if(dist<*dmin)\n" +" {\n" +" *dmin = dist;\n" +" *sep = crossje;\n" +" }\n" +" }\n" +" }\n" +" }\n" +" \n" +" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" +" {\n" +" *sep = -(*sep);\n" +" }\n" +" return true;\n" +"}\n" +"// work-in-progress\n" +"__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global volatile float4* gpuCompoundSepNormalsOut,\n" +" __global volatile int* gpuHasCompoundSepNormalsOut,\n" +" int numCompoundPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i<numCompoundPairs)\n" +" {\n" +" int bodyIndexA = gpuCompoundPairs[i].x;\n" +" int bodyIndexB = gpuCompoundPairs[i].y;\n" +" int childShapeIndexA = gpuCompoundPairs[i].z;\n" +" int childShapeIndexB = gpuCompoundPairs[i].w;\n" +" \n" +" int collidableIndexA = -1;\n" +" int collidableIndexB = -1;\n" +" \n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" \n" +" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" \n" +" if (childShapeIndexA >= 0)\n" +" {\n" +" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +" float4 newOrnA = qtMul(ornA,childOrnA);\n" +" posA = newPosA;\n" +" ornA = newOrnA;\n" +" } else\n" +" {\n" +" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" }\n" +" \n" +" if (childShapeIndexB>=0)\n" +" {\n" +" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" } else\n" +" {\n" +" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" +" }\n" +" \n" +" gpuHasCompoundSepNormalsOut[i] = 0;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" +" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" +" \n" +" if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" +" {\n" +" return;\n" +" }\n" +" int hasSeparatingAxis = 5;\n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" posA.w = 0.f;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" float4 sepNormal = make_float4(1,0,0,0);\n" +" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +" hasSeparatingAxis = 4;\n" +" if (!sepA)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +" if (!sepB)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else//(!sepB)\n" +" {\n" +" bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" +" if (sepEE)\n" +" {\n" +" gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" +" gpuHasCompoundSepNormalsOut[i] = 1;\n" +" }//sepEE\n" +" }//(!sepB)\n" +" }//(!sepA)\n" +" \n" +" \n" +" }\n" +" \n" +"}\n" +"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +"{\n" +" b3Float4 vecOut;\n" +" vecOut = b3MakeFloat4(\n" +" (float)(vecIn[0]) / (quantization.x),\n" +" (float)(vecIn[1]) / (quantization.y),\n" +" (float)(vecIn[2]) / (quantization.z),\n" +" 0.f);\n" +" vecOut += bvhAabbMin;\n" +" return vecOut;\n" +"}\n" +"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" +"{\n" +" b3Float4 vecOut;\n" +" vecOut = b3MakeFloat4(\n" +" (float)(vecIn[0]) / (quantization.x),\n" +" (float)(vecIn[1]) / (quantization.y),\n" +" (float)(vecIn[2]) / (quantization.z),\n" +" 0.f);\n" +" vecOut += bvhAabbMin;\n" +" return vecOut;\n" +"}\n" +"// work-in-progress\n" +"__kernel void findCompoundPairsKernel( __global const int4* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global b3Aabb_t* aabbLocalSpace,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global volatile int4* gpuCompoundPairsOut,\n" +" __global volatile int* numCompoundPairsOut,\n" +" __global const b3BvhSubtreeInfo* subtrees,\n" +" __global const b3QuantizedBvhNode* quantizedNodes,\n" +" __global const b3BvhInfo* bvhInfos,\n" +" int numPairs,\n" +" int maxNumCompoundPairsCapacity\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i<numPairs)\n" +" {\n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" //once the broadphase avoids static-static pairs, we can remove this test\n" +" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +" {\n" +" return;\n" +" }\n" +" if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +" {\n" +" int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" +" int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" +" int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" +" int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" +" int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" +" int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" +" b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" \n" +" for (int p=0;p<numSubTreesA;p++)\n" +" {\n" +" b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" +" //bvhInfos[bvhA].m_quantization\n" +" b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +" b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +" b3Float4 aabbAMinOut,aabbAMaxOut;\n" +" float margin=0.f;\n" +" b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" +" \n" +" for (int q=0;q<numSubTreesB;q++)\n" +" {\n" +" b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" +" b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +" b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +" b3Float4 aabbBMinOut,aabbBMaxOut;\n" +" float margin=0.f;\n" +" b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" +" \n" +" \n" +" bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" +" if (aabbOverlap)\n" +" {\n" +" \n" +" int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" +" int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" +" int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" +" int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" +" b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" +" b3Int2 node0;\n" +" node0.x = startNodeIndexA;\n" +" node0.y = startNodeIndexB;\n" +" int maxStackDepth = B3_MAX_STACK_DEPTH;\n" +" int depth=0;\n" +" nodeStack[depth++]=node0;\n" +" do\n" +" {\n" +" b3Int2 node = nodeStack[--depth];\n" +" b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +" b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" +" b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +" b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" +" float margin=0.f;\n" +" b3Float4 aabbAMinOut,aabbAMaxOut;\n" +" b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" +" b3Float4 aabbBMinOut,aabbBMaxOut;\n" +" b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" +" \n" +" bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" +" if (nodeOverlap)\n" +" {\n" +" bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" +" bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" +" bool isInternalA = !isLeafA;\n" +" bool isInternalB = !isLeafB;\n" +" //fail, even though it might hit two leaf nodes\n" +" if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" +" {\n" +" //printf(\"Error: traversal exceeded maxStackDepth\");\n" +" continue;\n" +" }\n" +" if(isInternalA)\n" +" {\n" +" int nodeAleftChild = node.x+1;\n" +" bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" +" int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" +" if(isInternalB)\n" +" { \n" +" int nodeBleftChild = node.y+1;\n" +" bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +" int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +" nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" +" nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" +" nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" +" nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" +" }\n" +" else\n" +" {\n" +" nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" +" nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" +" }\n" +" }\n" +" else\n" +" {\n" +" if(isInternalB)\n" +" {\n" +" int nodeBleftChild = node.y+1;\n" +" bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" +" int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" +" nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" +" nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" +" }\n" +" else\n" +" {\n" +" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +" {\n" +" int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" +" int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" +" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" +" }\n" +" }\n" +" }\n" +" }\n" +" } while (depth);\n" +" }\n" +" }\n" +" }\n" +" \n" +" return;\n" +" }\n" +" if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +" {\n" +" if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" +" {\n" +" int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" +" for (int c=0;c<numChildrenA;c++)\n" +" {\n" +" int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" +" int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" +" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" +" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" +" float4 newOrnA = qtMul(ornA,childOrnA);\n" +" int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" +" b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" +" float margin = 0.f;\n" +" \n" +" b3Float4 aabbAMinWS;\n" +" b3Float4 aabbAMaxWS;\n" +" \n" +" b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" +" newPosA,\n" +" newOrnA,\n" +" &aabbAMinWS,&aabbAMaxWS);\n" +" \n" +" \n" +" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +" for (int b=0;b<numChildrenB;b++)\n" +" {\n" +" int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +" b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" +" \n" +" b3Float4 aabbBMinWS;\n" +" b3Float4 aabbBMaxWS;\n" +" \n" +" b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" +" newPosB,\n" +" newOrnB,\n" +" &aabbBMinWS,&aabbBMaxWS);\n" +" \n" +" \n" +" \n" +" bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" +" if (aabbOverlap)\n" +" {\n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" float4 posA = newPosA;\n" +" posA.w = 0.f;\n" +" float4 posB = newPosB;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = newOrnA;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB =newOrnB;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" {//\n" +" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +" {\n" +" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" +" }\n" +" }//\n" +" }//fi(1)\n" +" } //for (int b=0\n" +" }//if (collidables[collidableIndexB].\n" +" else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" if (1)\n" +" {\n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" float4 posA = newPosA;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = newOrnA;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" {\n" +" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +" {\n" +" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" +" }//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +" }//\n" +" }//fi (1)\n" +" }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" }//for (int b=0;b<numChildrenB;b++) \n" +" return;\n" +" }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" +" && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +" {\n" +" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" +" for (int b=0;b<numChildrenB;b++)\n" +" {\n" +" int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" +" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +" //////////////////////////////////////\n" +" if (1)\n" +" {\n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = newPosB;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB =newOrnB;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" {//\n" +" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" +" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +" {\n" +" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" +" }//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" +" }//\n" +" }//fi (1) \n" +" }//for (int b=0;b<numChildrenB;b++)\n" +" return;\n" +" }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" return;\n" +" }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" +" }//i<numPairs\n" +"}\n" +"// work-in-progress\n" +"__kernel void findSeparatingAxisKernel( __global const int4* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global volatile float4* separatingNormals,\n" +" __global volatile int* hasSeparatingAxis,\n" +" int numPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" \n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" //once the broadphase avoids static-static pairs, we can remove this test\n" +" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" return;\n" +" }\n" +" \n" +" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" return;\n" +" }\n" +" \n" +" if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" return;\n" +" }\n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" float4 sepNormal;\n" +" \n" +" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,\n" +" indices,&sepNormal,&dmin);\n" +" hasSeparatingAxis[i] = 4;\n" +" if (!sepA)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" +" posA,ornA,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,\n" +" indices,&sepNormal,&dmin);\n" +" if (!sepB)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,\n" +" indices,&sepNormal,&dmin);\n" +" if (!sepEE)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis[i] = 1;\n" +" separatingNormals[i] = sepNormal;\n" +" }\n" +" }\n" +" }\n" +" \n" +" }\n" +"}\n" +"__kernel void findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global volatile float4* separatingNormals,\n" +" __global volatile int* hasSeparatingAxis,\n" +" __global float* dmins,\n" +" int numPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" \n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" hasSeparatingAxis[i] = 0; \n" +" \n" +" //once the broadphase avoids static-static pairs, we can remove this test\n" +" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" +" {\n" +" return;\n" +" }\n" +" \n" +" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" +" {\n" +" return;\n" +" }\n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" float dmin = FLT_MAX;\n" +" dmins[i] = dmin;\n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" float4 sepNormal;\n" +" \n" +" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,\n" +" indices,&sepNormal,&dmin);\n" +" hasSeparatingAxis[i] = 4;\n" +" if (!sepA)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" +" posA,ornA,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,\n" +" indices,&sepNormal,&dmin);\n" +" if (sepB)\n" +" {\n" +" dmins[i] = dmin;\n" +" hasSeparatingAxis[i] = 1;\n" +" separatingNormals[i] = sepNormal;\n" +" }\n" +" }\n" +" \n" +" }\n" +"}\n" +"__kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" +" __global const BodyData* rigidBodies, \n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global btAabbCL* aabbs,\n" +" __global float4* separatingNormals,\n" +" __global int* hasSeparatingAxis,\n" +" __global float* dmins,\n" +" __global const float4* unitSphereDirections,\n" +" int numUnitSphereDirections,\n" +" int numPairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" \n" +" if (i<numPairs)\n" +" {\n" +" if (hasSeparatingAxis[i])\n" +" {\n" +" \n" +" int bodyIndexA = pairs[i].x;\n" +" int bodyIndexB = pairs[i].y;\n" +" \n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" \n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" \n" +" \n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" \n" +" float dmin = dmins[i];\n" +" \n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" float4 sepNormal = separatingNormals[i];\n" +" \n" +" \n" +" \n" +" bool sepEE = false;\n" +" int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" +" if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" +" {\n" +" sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,\n" +" indices,&sepNormal,&dmin);\n" +" \n" +" if (!sepEE)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis[i] = 1;\n" +" separatingNormals[i] = sepNormal;\n" +" }\n" +" }\n" +" /*\n" +" ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" +" else\n" +" {\n" +" sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" vertices,unitSphereDirections,numUnitSphereDirections,\n" +" &sepNormal,&dmin);\n" +" if (!sepEE)\n" +" {\n" +" hasSeparatingAxis[i] = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis[i] = 1;\n" +" separatingNormals[i] = sepNormal;\n" +" }\n" +" }\n" +" */\n" +" } //if (hasSeparatingAxis[i])\n" +" }//(i<numPairs)\n" +"}\n" +"inline int findClippingFaces(const float4 separatingNormal,\n" +" const ConvexPolyhedronCL* hullA, \n" +" __global const ConvexPolyhedronCL* hullB,\n" +" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" +" __global float4* worldVertsA1,\n" +" __global float4* worldNormalsA1,\n" +" __global float4* worldVertsB1,\n" +" int capacityWorldVerts,\n" +" const float minDist, float maxDist,\n" +" const float4* verticesA,\n" +" const btGpuFace* facesA,\n" +" const int* indicesA,\n" +" __global const float4* verticesB,\n" +" __global const btGpuFace* facesB,\n" +" __global const int* indicesB,\n" +" __global int4* clippingFaces, int pairIndex)\n" +"{\n" +" int numContactsOut = 0;\n" +" int numWorldVertsB1= 0;\n" +" \n" +" \n" +" int closestFaceB=0;\n" +" float dmax = -FLT_MAX;\n" +" \n" +" {\n" +" for(int face=0;face<hullB->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" +" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" +" const float4 WorldNormal = qtRotate(ornB, Normal);\n" +" float d = dot3F4(WorldNormal,separatingNormal);\n" +" if (d > dmax)\n" +" {\n" +" dmax = d;\n" +" closestFaceB = face;\n" +" }\n" +" }\n" +" }\n" +" \n" +" {\n" +" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" +" int numVertices = polyB.m_numIndices;\n" +" if (numVertices>capacityWorldVerts)\n" +" numVertices = capacityWorldVerts;\n" +" \n" +" for(int e0=0;e0<numVertices;e0++)\n" +" {\n" +" if (e0<capacityWorldVerts)\n" +" {\n" +" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" +" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" +" }\n" +" }\n" +" }\n" +" \n" +" int closestFaceA=0;\n" +" {\n" +" float dmin = FLT_MAX;\n" +" for(int face=0;face<hullA->m_numFaces;face++)\n" +" {\n" +" const float4 Normal = make_float4(\n" +" facesA[hullA->m_faceOffset+face].m_plane.x,\n" +" facesA[hullA->m_faceOffset+face].m_plane.y,\n" +" facesA[hullA->m_faceOffset+face].m_plane.z,\n" +" 0.f);\n" +" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" +" \n" +" float d = dot3F4(faceANormalWS,separatingNormal);\n" +" if (d < dmin)\n" +" {\n" +" dmin = d;\n" +" closestFaceA = face;\n" +" worldNormalsA1[pairIndex] = faceANormalWS;\n" +" }\n" +" }\n" +" }\n" +" \n" +" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" +" if (numVerticesA>capacityWorldVerts)\n" +" numVerticesA = capacityWorldVerts;\n" +" \n" +" for(int e0=0;e0<numVerticesA;e0++)\n" +" {\n" +" if (e0<capacityWorldVerts)\n" +" {\n" +" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" +" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" +" }\n" +" }\n" +" \n" +" clippingFaces[pairIndex].x = closestFaceA;\n" +" clippingFaces[pairIndex].y = closestFaceB;\n" +" clippingFaces[pairIndex].z = numVerticesA;\n" +" clippingFaces[pairIndex].w = numWorldVertsB1;\n" +" \n" +" \n" +" return numContactsOut;\n" +"}\n" +"// work-in-progress\n" +"__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" +" __global const BodyData* rigidBodies,\n" +" __global const btCollidableGpu* collidables,\n" +" __global const ConvexPolyhedronCL* convexShapes, \n" +" __global const float4* vertices,\n" +" __global const float4* uniqueEdges,\n" +" __global const btGpuFace* faces,\n" +" __global const int* indices,\n" +" __global const btGpuChildShape* gpuChildShapes,\n" +" __global btAabbCL* aabbs,\n" +" __global float4* concaveSeparatingNormalsOut,\n" +" __global int* concaveHasSeparatingNormals,\n" +" __global int4* clippingFacesOut,\n" +" __global float4* worldVertsA1GPU,\n" +" __global float4* worldNormalsAGPU,\n" +" __global float4* worldVertsB1GPU,\n" +" int vertexFaceCapacity,\n" +" int numConcavePairs\n" +" )\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i>=numConcavePairs)\n" +" return;\n" +" concaveHasSeparatingNormals[i] = 0;\n" +" int pairIdx = i;\n" +" int bodyIndexA = concavePairs[i].x;\n" +" int bodyIndexB = concavePairs[i].y;\n" +" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" +" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" +" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" +" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" +" if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" +" collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" concavePairs[pairIdx].w = -1;\n" +" return;\n" +" }\n" +" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" +" int numActualConcaveConvexTests = 0;\n" +" \n" +" int f = concavePairs[i].z;\n" +" \n" +" bool overlap = false;\n" +" \n" +" ConvexPolyhedronCL convexPolyhedronA;\n" +" //add 3 vertices of the triangle\n" +" convexPolyhedronA.m_numVertices = 3;\n" +" convexPolyhedronA.m_vertexOffset = 0;\n" +" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" +" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" +" float4 triMinAabb, triMaxAabb;\n" +" btAabbCL triAabb;\n" +" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" +" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" +" \n" +" float4 verticesA[3];\n" +" for (int i=0;i<3;i++)\n" +" {\n" +" int index = indices[face.m_indexOffset+i];\n" +" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" +" verticesA[i] = vert;\n" +" localCenter += vert;\n" +" \n" +" triAabb.m_min = min(triAabb.m_min,vert); \n" +" triAabb.m_max = max(triAabb.m_max,vert); \n" +" }\n" +" overlap = true;\n" +" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" +" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" +" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" +" \n" +" if (overlap)\n" +" {\n" +" float dmin = FLT_MAX;\n" +" int hasSeparatingAxis=5;\n" +" float4 sepAxis=make_float4(1,2,3,4);\n" +" int localCC=0;\n" +" numActualConcaveConvexTests++;\n" +" //a triangle has 3 unique edges\n" +" convexPolyhedronA.m_numUniqueEdges = 3;\n" +" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" +" float4 uniqueEdgesA[3];\n" +" \n" +" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" +" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" +" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" +" convexPolyhedronA.m_faceOffset = 0;\n" +" \n" +" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" +" \n" +" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" +" int indicesA[3+3+2+2+2];\n" +" int curUsedIndices=0;\n" +" int fidx=0;\n" +" //front size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[0] = 0;\n" +" indicesA[1] = 1;\n" +" indicesA[2] = 2;\n" +" curUsedIndices+=3;\n" +" float c = face.m_plane.w;\n" +" facesA[fidx].m_plane.x = normal.x;\n" +" facesA[fidx].m_plane.y = normal.y;\n" +" facesA[fidx].m_plane.z = normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" //back size of triangle\n" +" {\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[3]=2;\n" +" indicesA[4]=1;\n" +" indicesA[5]=0;\n" +" curUsedIndices+=3;\n" +" float c = dot(normal,verticesA[0]);\n" +" float c1 = -face.m_plane.w;\n" +" facesA[fidx].m_plane.x = -normal.x;\n" +" facesA[fidx].m_plane.y = -normal.y;\n" +" facesA[fidx].m_plane.z = -normal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" facesA[fidx].m_numIndices=3;\n" +" }\n" +" fidx++;\n" +" bool addEdgePlanes = true;\n" +" if (addEdgePlanes)\n" +" {\n" +" int numVertices=3;\n" +" int prevVertex = numVertices-1;\n" +" for (int i=0;i<numVertices;i++)\n" +" {\n" +" float4 v0 = verticesA[i];\n" +" float4 v1 = verticesA[prevVertex];\n" +" \n" +" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" +" float c = -dot(edgeNormal,v0);\n" +" facesA[fidx].m_numIndices = 2;\n" +" facesA[fidx].m_indexOffset=curUsedIndices;\n" +" indicesA[curUsedIndices++]=i;\n" +" indicesA[curUsedIndices++]=prevVertex;\n" +" \n" +" facesA[fidx].m_plane.x = edgeNormal.x;\n" +" facesA[fidx].m_plane.y = edgeNormal.y;\n" +" facesA[fidx].m_plane.z = edgeNormal.z;\n" +" facesA[fidx].m_plane.w = c;\n" +" fidx++;\n" +" prevVertex = i;\n" +" }\n" +" }\n" +" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" +" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" +" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" +" posA.w = 0.f;\n" +" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" +" posB.w = 0.f;\n" +" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" +" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" +" \n" +" ///////////////////\n" +" ///compound shape support\n" +" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" +" {\n" +" int compoundChild = concavePairs[pairIdx].w;\n" +" int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" +" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" +" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" +" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" +" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" +" float4 newOrnB = qtMul(ornB,childOrnB);\n" +" posB = newPosB;\n" +" ornB = newOrnB;\n" +" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" +" }\n" +" //////////////////\n" +" float4 c0local = convexPolyhedronA.m_localCenter;\n" +" float4 c0 = transform(&c0local, &posA, &ornA);\n" +" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" +" float4 c1 = transform(&c1local,&posB,&ornB);\n" +" const float4 DeltaC2 = c0 - c1;\n" +" bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" verticesA,uniqueEdgesA,facesA,indicesA,\n" +" vertices,uniqueEdges,faces,indices,\n" +" &sepAxis,&dmin);\n" +" hasSeparatingAxis = 4;\n" +" if (!sepA)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA,\n" +" posB,ornB,\n" +" posA,ornA,\n" +" DeltaC2,\n" +" vertices,uniqueEdges,faces,indices,\n" +" verticesA,uniqueEdgesA,facesA,indicesA,\n" +" &sepAxis,&dmin);\n" +" if (!sepB)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" DeltaC2,\n" +" verticesA,uniqueEdgesA,facesA,indicesA,\n" +" vertices,uniqueEdges,faces,indices,\n" +" &sepAxis,&dmin);\n" +" \n" +" if (!sepEE)\n" +" {\n" +" hasSeparatingAxis = 0;\n" +" } else\n" +" {\n" +" hasSeparatingAxis = 1;\n" +" }\n" +" }\n" +" } \n" +" \n" +" if (hasSeparatingAxis)\n" +" {\n" +" sepAxis.w = dmin;\n" +" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" +" concaveHasSeparatingNormals[i]=1;\n" +" float minDist = -1e30f;\n" +" float maxDist = 0.02f;\n" +" \n" +" findClippingFaces(sepAxis,\n" +" &convexPolyhedronA,\n" +" &convexShapes[shapeIndexB],\n" +" posA,ornA,\n" +" posB,ornB,\n" +" worldVertsA1GPU,\n" +" worldNormalsAGPU,\n" +" worldVertsB1GPU,\n" +" vertexFaceCapacity,\n" +" minDist, maxDist,\n" +" verticesA,\n" +" facesA,\n" +" indicesA,\n" +" vertices,\n" +" faces,\n" +" indices,\n" +" clippingFacesOut, pairIdx);\n" +" } else\n" +" { \n" +" //mark this pair as in-active\n" +" concavePairs[pairIdx].w = -1;\n" +" }\n" +" }\n" +" else\n" +" { \n" +" //mark this pair as in-active\n" +" concavePairs[pairIdx].w = -1;\n" +" }\n" +" \n" +" concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" +"}\n" +; |