/* Bullet Continuous Collision Detection and Physics Library Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software. Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions: 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. */ bool findSeparatingAxisOnGpu = true; bool splitSearchSepAxisConcave = false; bool splitSearchSepAxisConvex = true; bool useMprGpu = true;//use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... bool bvhTraversalKernelGPU = true; bool findConcaveSeparatingAxisKernelGPU = true; bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; bool clipConvexFacesAndFindContactsCPU = false;//false;//true; bool reduceConcaveContactsOnGPU = true;//false; bool reduceConvexContactsOnGPU = true;//false; bool findConvexClippingFacesGPU = true; bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false static int myframecount=0;///for testing ///This file was written by Erwin Coumans ///Separating axis rest based on work from Pierre Terdiman, see ///And contact clipping based on work from Simon Hobbs //#define B3_DEBUG_SAT_FACE //#define CHECK_ON_HOST #ifdef CHECK_ON_HOST //#define PERSISTENT_CONTACTS_HOST #endif int b3g_actualSATPairTests=0; #include "b3ConvexHullContact.h" #include //memcpy #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" #include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" #include "Bullet3Geometry/b3AabbUtil.h" typedef b3AlignedObjectArray b3VertexArray; #include //for FLT_MAX #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" //#include "AdlQuaternion.h" #include "kernels/satKernels.h" #include "kernels/mprKernels.h" #include "kernels/satConcaveKernels.h" #include "kernels/satClipHullContacts.h" #include "kernels/bvhTraversal.h" #include "kernels/primitiveContacts.h" #include "Bullet3Geometry/b3AabbUtil.h" #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" #define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl" #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" #ifndef __global #define __global #endif #ifndef __kernel #define __kernel #endif #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" #define dot3F4 b3Dot GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) :m_context(ctx), m_device(device), m_queue(q), m_findSeparatingAxisKernel(0), m_findSeparatingAxisVertexFaceKernel(0), m_findSeparatingAxisEdgeEdgeKernel(0), m_unitSphereDirections(m_context,m_queue), m_totalContactsOut(m_context, m_queue), m_sepNormals(m_context, m_queue), m_dmins(m_context,m_queue), m_hasSeparatingNormals(m_context, m_queue), m_concaveSepNormals(m_context, m_queue), m_concaveHasSeparatingNormals(m_context,m_queue), m_numConcavePairsOut(m_context, m_queue), m_gpuCompoundPairs(m_context, m_queue), m_gpuCompoundSepNormals(m_context, m_queue), m_gpuHasCompoundSepNormals(m_context, m_queue), m_numCompoundPairsOut(m_context, m_queue) { m_totalContactsOut.push_back(0); cl_int errNum=0; if (1) { const char* mprSrc = mprKernelsCL; const char* srcConcave = satConcaveKernelsCL; char flags[1024]={0}; //#ifdef CL_PLATFORM_INTEL // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); //#endif m_mprPenetrationKernel = 0; m_findSeparatingAxisUnitSphereKernel = 0; if (useMprGpu) { cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); b3Assert(errNum==CL_SUCCESS); m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); b3Assert(m_mprPenetrationKernel); b3Assert(errNum==CL_SUCCESS); m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); b3Assert(m_findSeparatingAxisUnitSphereKernel); b3Assert(errNum==CL_SUCCESS); int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); m_unitSphereDirections.resize(numDirections); m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); } cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,satKernelsCL,&errNum,flags,BT_NARROWPHASE_SAT_PATH); b3Assert(errNum==CL_SUCCESS); cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcConcave,&errNum,flags,BT_NARROWPHASE_SAT_CONCAVE_PATH); b3Assert(errNum==CL_SUCCESS); m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisKernel",&errNum,satProg ); b3Assert(m_findSeparatingAxisKernel); b3Assert(errNum==CL_SUCCESS); m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisVertexFaceKernel",&errNum,satProg ); b3Assert(m_findSeparatingAxisVertexFaceKernel); m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisEdgeEdgeKernel",&errNum,satProg ); b3Assert(m_findSeparatingAxisVertexFaceKernel); m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findConcaveSeparatingAxisKernel",&errNum,satProg ); b3Assert(m_findConcaveSeparatingAxisKernel); b3Assert(errNum==CL_SUCCESS); m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisVertexFaceKernel",&errNum,satConcaveProg ); b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); b3Assert(errNum==CL_SUCCESS); m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel",&errNum,satConcaveProg ); b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); b3Assert(errNum==CL_SUCCESS); m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findCompoundPairsKernel",&errNum,satProg ); b3Assert(m_findCompoundPairsKernel); b3Assert(errNum==CL_SUCCESS); m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "processCompoundPairsKernel",&errNum,satProg ); b3Assert(m_processCompoundPairsKernel); b3Assert(errNum==CL_SUCCESS); } if (1) { const char* srcClip = satClipKernelsCL; char flags[1024]={0}; //#ifdef CL_PLATFORM_INTEL // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); //#endif cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); b3Assert(errNum==CL_SUCCESS); m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); b3Assert(errNum==CL_SUCCESS); m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); b3Assert(errNum==CL_SUCCESS); m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); b3Assert(errNum==CL_SUCCESS); m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndFindContactsKernel",&errNum,satClipContactsProg); b3Assert(errNum==CL_SUCCESS); m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); b3Assert(errNum==CL_SUCCESS); // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); // b3Assert(errNum==CL_SUCCESS); m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "newContactReductionKernel",&errNum,satClipContactsProg); b3Assert(errNum==CL_SUCCESS); } else { m_clipHullHullKernel=0; m_clipCompoundsHullHullKernel = 0; m_findClippingFacesKernel = 0; m_newContactReductionKernel=0; m_clipFacesAndFindContacts = 0; m_clipHullHullConcaveConvexKernel = 0; // m_extractManifoldAndAddContactKernel = 0; } if (1) { const char* srcBvh = bvhTraversalKernelCL; cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); b3Assert(errNum==CL_SUCCESS); m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); b3Assert(errNum==CL_SUCCESS); } { const char* primitiveContactsSrc = primitiveContactsKernelsCL; cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); b3Assert(errNum==CL_SUCCESS); m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); b3Assert(errNum==CL_SUCCESS); m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg ); b3Assert(errNum==CL_SUCCESS); b3Assert(m_findConcaveSphereContactsKernel); m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,""); b3Assert(errNum==CL_SUCCESS); b3Assert(m_processCompoundPairsPrimitivesKernel); } } GpuSatCollision::~GpuSatCollision() { if (m_findSeparatingAxisVertexFaceKernel) clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); if (m_findSeparatingAxisEdgeEdgeKernel) clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); if (m_findSeparatingAxisUnitSphereKernel) clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); if (m_mprPenetrationKernel) clReleaseKernel(m_mprPenetrationKernel); if (m_findSeparatingAxisKernel) clReleaseKernel(m_findSeparatingAxisKernel); if (m_findConcaveSeparatingAxisVertexFaceKernel) clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); if (m_findConcaveSeparatingAxisEdgeEdgeKernel) clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); if (m_findConcaveSeparatingAxisKernel) clReleaseKernel(m_findConcaveSeparatingAxisKernel); if (m_findCompoundPairsKernel) clReleaseKernel(m_findCompoundPairsKernel); if (m_processCompoundPairsKernel) clReleaseKernel(m_processCompoundPairsKernel); if (m_findClippingFacesKernel) clReleaseKernel(m_findClippingFacesKernel); if (m_clipFacesAndFindContacts) clReleaseKernel(m_clipFacesAndFindContacts); if (m_newContactReductionKernel) clReleaseKernel(m_newContactReductionKernel); if (m_primitiveContactsKernel) clReleaseKernel(m_primitiveContactsKernel); if (m_findConcaveSphereContactsKernel) clReleaseKernel(m_findConcaveSphereContactsKernel); if (m_processCompoundPairsPrimitivesKernel) clReleaseKernel(m_processCompoundPairsPrimitivesKernel); if (m_clipHullHullKernel) clReleaseKernel(m_clipHullHullKernel); if (m_clipCompoundsHullHullKernel) clReleaseKernel(m_clipCompoundsHullHullKernel); if (m_clipHullHullConcaveConvexKernel) clReleaseKernel(m_clipHullHullConcaveConvexKernel); // if (m_extractManifoldAndAddContactKernel) // clReleaseKernel(m_extractManifoldAndAddContactKernel); if (m_bvhTraversalKernel) clReleaseKernel(m_bvhTraversalKernel); } struct MyTriangleCallback : public b3NodeOverlapCallback { int m_bodyIndexA; int m_bodyIndexB; virtual void processNode(int subPart, int triangleIndex) { printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); printf("triangleIndex %d\n", triangleIndex); } }; #define float4 b3Vector3 #define make_float4(x,y,z,w) b3MakeVector3(x,y,z,w) float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) { float4 n = planeEqn; n[3] = 0.f; float dist = dot3F4(n, point) + planeEqn[3]; *closestPointOnFace = point - dist * n; return dist; } #define cross3(a,b) (a.cross(b)) b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) { b3Transform tr; tr.setIdentity(); tr.setOrigin(*pos); tr.setRotation(*orn); b3Vector3 res = tr(*v); return res; } inline bool IsPointInPolygon(const float4& p, const b3GpuFace* face, const float4* baseVertex, const int* convexIndices, float4* out) { float4 a; float4 b; float4 ab; float4 ap; float4 v; float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); if (face->m_numIndices<2) return false; float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; b = v0; for(unsigned i=0; i != face->m_numIndices; ++i) { a = b; float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; b = vi; ab = b-a; ap = p-a; v = cross3(ab,plane); if (b3Dot(ap, v) > 0.f) { float ab_m2 = b3Dot(ab, ab); float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; if (rt <= 0.f) { *out = a; } else if (rt >= 1.f) { *out = b; } else { float s = 1.f - rt; out[0].x = s * a.x + rt * b.x; out[0].y = s * a.y + rt * b.y; out[0].z = s * a.z + rt * b.z; } return false; } } return true; } #define normalize3(a) (a.normalize()) int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) { if( nPoints == 0 ) return 0; if (nPoints <=4) return nPoints; if (nPoints >64) nPoints = 64; float4 center = b3MakeVector3(0,0,0,0); { for (int i=0;i& vertices, b3Scalar& min, b3Scalar& max) { min = FLT_MAX; max = -FLT_MAX; int numVerts = hull.m_numVertices; const float4 localDir = b3QuatRotate(orn.inverse(),dir); b3Scalar offset = dot3F4(pos,dir); for(int i=0;i max) max = dp; } if(min>max) { b3Scalar tmp = min; min = max; max = tmp; } min += offset; max += offset; } static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, const float4& posA,const b3Quaternion& ornA, const float4& posB,const b3Quaternion& ornB, const float4& sep_axis, const b3AlignedObjectArray& verticesA,const b3AlignedObjectArray& verticesB,b3Scalar& depth) { b3Scalar Min0,Max0; b3Scalar Min1,Max1; project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0); project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1); if(Max0=0.0f); b3Scalar d1 = Max1 - Min0; assert(d1>=0.0f); depth = d01e-6 || fabsf(v.y)>1e-6 || fabsf(v.z)>1e-6) return false; return true; } static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, const float4& posA1, const b3Quaternion& ornA, const float4& posB1, const b3Quaternion& ornB, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& uniqueEdgesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, const b3AlignedObjectArray& verticesB, const b3AlignedObjectArray& uniqueEdgesB, const b3AlignedObjectArray& facesB, const b3AlignedObjectArray& indicesB, b3Vector3& sep) { B3_PROFILE("findSeparatingAxis"); b3g_actualSATPairTests++; float4 posA = posA1; posA.w = 0.f; float4 posB = posB1; posB.w = 0.f; //#ifdef TEST_INTERNAL_OBJECTS float4 c0local = (float4&)hullA.m_localCenter; float4 c0 = transform(&c0local, &posA, &ornA); float4 c1local = (float4&)hullB.m_localCenter; float4 c1 = transform(&c1local,&posB,&ornB); const float4 deltaC2 = c0 - c1; //#endif b3Scalar dmin = FLT_MAX; int curPlaneTests=0; int numFacesA = hullA.m_numFaces; // Test normals from hullA for(int i=0;i0.0f) sep = -sep; return true; } bool findSeparatingAxisEdgeEdge( __global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, const b3Float4& posA1, const b3Quat& ornA, const b3Float4& posB1, const b3Quat& ornB, const b3Float4& DeltaC2, __global const b3AlignedObjectArray& vertices, __global const b3AlignedObjectArray& uniqueEdges, __global const b3AlignedObjectArray& faces, __global const b3AlignedObjectArray& indices, float4* sep, float* dmin) { // int i = get_global_id(0); float4 posA = posA1; posA.w = 0.f; float4 posB = posB1; posB.w = 0.f; //int curPlaneTests=0; int curEdgeEdge = 0; // Test edges for(int e0=0;e0m_numUniqueEdges;e0++) { const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; float4 edge0World = b3QuatRotate(ornA,edge0); for(int e1=0;e1m_numUniqueEdges;e1++) { const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; float4 edge1World = b3QuatRotate(ornB,edge1); float4 crossje = cross3(edge0World,edge1World); curEdgeEdge++; if(!IsAlmostZero(crossje)) { crossje = normalize3(crossje); if (dot3F4(DeltaC2,crossje)<0) crossje*=-1.f; float dist; bool result = true; { float Min0,Max0; float Min1,Max1; project(*hullA,posA,ornA,crossje,vertices, Min0, Max0); project(*hullB,posB,ornB,crossje,vertices, Min1, Max1); if(Max00.0f) { *sep = -(*sep); } return true; } __inline float4 lerp3(const float4& a,const float4& b, float t) { return b3MakeVector3( a.x + (b.x - a.x) * t, a.y + (b.y - a.y) * t, a.z + (b.z - a.z) * t, 0.f); } // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float planeEqWS, float4* ppVtxOut) { int ve; float ds, de; int numVertsOut = 0; if (numVertsIn < 2) return 0; float4 firstVertex=pVtxIn[numVertsIn-1]; float4 endVertex = pVtxIn[0]; ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; for (ve = 0; ve < numVertsIn; ve++) { endVertex=pVtxIn[ve]; de = dot3F4(planeNormalWS,endVertex)+planeEqWS; if (ds<0) { if (de<0) { // Start < 0, end < 0, so output endVertex ppVtxOut[numVertsOut++] = endVertex; } else { // Start < 0, end >= 0, so output intersection ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); } } else { if (de<0) { // Start >= 0, end < 0 so output intersection and end ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); ppVtxOut[numVertsOut++] = endVertex; } } firstVertex = endVertex; ds = de; } return numVertsOut; } int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, float4* worldVertsB2, int capacityWorldVertsB2, const float minDist, float maxDist, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, float4* contactsOut, int contactCapacity) { int numContactsOut = 0; float4* pVtxIn = worldVertsB1; float4* pVtxOut = worldVertsB2; int numVertsIn = numWorldVertsB1; int numVertsOut = 0; int closestFaceA=-1; { float dmin = FLT_MAX; for(int face=0;facem_numFaces;face++) { const float4 Normal = b3MakeVector3( facesA[hullA->m_faceOffset+face].m_plane.x, facesA[hullA->m_faceOffset+face].m_plane.y, facesA[hullA->m_faceOffset+face].m_plane.z,0.f); const float4 faceANormalWS = b3QuatRotate(ornA,Normal); float d = dot3F4(faceANormalWS,separatingNormal); if (d < dmin) { dmin = d; closestFaceA = face; } } } if (closestFaceA<0) return numContactsOut; b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; // clip polygon to back of planes of all faces of hull A that are adjacent to witness face // int numContacts = numWorldVertsB1; int numVerticesA = polyA.m_numIndices; for(int e0=0;e0m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; const float4 edge0 = a - b; const float4 WorldEdge0 = b3QuatRotate(ornA,edge0); float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA); float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); float4 worldA1 = transform(&a,&posA,&ornA); float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); float4 planeNormalWS = planeNormalWS1; float planeEqWS=planeEqWS1; //clip face //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); //btSwap(pVtxIn,pVtxOut); float4* tmp = pVtxOut; pVtxOut = pVtxIn; pVtxIn = tmp; numVertsIn = numVertsOut; numVertsOut = 0; } // only keep points that are behind the witness face { float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); float localPlaneEq = polyA.m_plane.w; float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal); float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); for (int i=0;i& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, const b3AlignedObjectArray& verticesB, const b3AlignedObjectArray& facesB, const b3AlignedObjectArray& indicesB, float4* contactsOut, int contactCapacity) { int numContactsOut = 0; int numWorldVertsB1= 0; B3_PROFILE("clipHullAgainstHull"); // float curMaxDist=maxDist; int closestFaceB=-1; float dmax = -FLT_MAX; { //B3_PROFILE("closestFaceB"); if (hullB.m_numFaces!=1) { //printf("wtf\n"); } static bool once = true; //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); for(int face=0;facem_numIndices;i++) { float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]]; printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z); } } #endif //BT_DEBUG_SAT_FACE //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) { const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); const float4 WorldNormal = b3QuatRotate(ornB, Normal); #ifdef BT_DEBUG_SAT_FACE if (once) printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z); #endif float d = dot3F4(WorldNormal,separatingNormal); if (d > dmax) { dmax = d; closestFaceB = face; } } } once = false; } b3Assert(closestFaceB>=0); { //B3_PROFILE("worldVertsB1"); const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB]; const int numVertices = polyB.m_numIndices; for(int e0=0;e0=0) { //B3_PROFILE("clipFaceAgainstHull"); numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, posA,ornA, worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, verticesA, facesA, indicesA, contactsOut,contactCapacity); } return numContactsOut; } #define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; } #define REDUCE_MIN(v, n) {int i=0;\ for(int offset=0; offset64) nPoints = 64; float4 center = make_float4(0,0,0,0); { for (int i=0;i* bodyBuf, b3AlignedObjectArray* globalContactOut, int& nContacts, const b3AlignedObjectArray& hostConvexDataA, const b3AlignedObjectArray& hostConvexDataB, const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& uniqueEdgesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, const b3AlignedObjectArray& verticesB, const b3AlignedObjectArray& uniqueEdgesB, const b3AlignedObjectArray& facesB, const b3AlignedObjectArray& indicesB, const b3AlignedObjectArray& hostCollidablesA, const b3AlignedObjectArray& hostCollidablesB, const b3Vector3& sepNormalWorldSpace, int maxContactCapacity ) { int contactIndex = -1; b3ConvexPolyhedronData hullA, hullB; b3Collidable colA = hostCollidablesA[collidableIndexA]; hullA = hostConvexDataA[colA.m_shapeIndex]; //printf("numvertsA = %d\n",hullA.m_numVertices); b3Collidable colB = hostCollidablesB[collidableIndexB]; hullB = hostConvexDataB[colB.m_shapeIndex]; //printf("numvertsB = %d\n",hullB.m_numVertices); float4 contactsOut[MAX_VERTS]; int localContactCapacity = MAX_VERTS; #ifdef _WIN32 b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); #endif { float4 worldVertsB1[MAX_VERTS]; float4 worldVertsB2[MAX_VERTS]; int capacityWorldVerts = MAX_VERTS; float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; b3Scalar minDist = -1; b3Scalar maxDist = 0.; b3Transform trA,trB; { //B3_PROFILE("transform computation"); //trA.setIdentity(); trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); //trB.setIdentity(); trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); } b3Quaternion trAorn = trA.getRotation(); b3Quaternion trBorn = trB.getRotation(); int numContactsOut = clipHullAgainstHull(hostNormal, hostConvexDataA.at(shapeA), hostConvexDataB.at(shapeB), (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, worldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, verticesA, facesA,indicesA, verticesB, facesB,indicesB, contactsOut,localContactCapacity); if (numContactsOut>0) { B3_PROFILE("overlap"); float4 normalOnSurfaceB = (float4&)hostNormal; b3Int4 contactIdx; contactIdx.x = 0; contactIdx.y = 1; contactIdx.z = 2; contactIdx.w = 3; int numPoints = 0; { // B3_PROFILE("extractManifold"); numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); } b3Assert(numPoints); if (nContactsexpand(); b3Contact4& contact = globalContactOut->at(nContacts); contact.m_batchIdx = 0;//i; contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; contact.m_frictionCoeffCmp = 45874; contact.m_restituitionCoeffCmp = 0; // float distance = 0.f; for (int p=0;pm_numVertices;i++) { b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; float curDot = vtx.dot(planeNormalInConvex); if (curDot>maxDot) { hitVertex=i; maxDot=curDot; hitVtx = vtx; //make sure the deepest points is always included if (numPoints==MAX_PLANE_CONVEX_POINTS) numPoints--; } if (numPoints4) { numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); } int dstIdx; // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); if (numReducedPoints>0) { if (nGlobalContactsOut < maxContactCapacity) { dstIdx=nGlobalContactsOut; nGlobalContactsOut++; b3Contact4* c = &globalContactsOut[dstIdx]; c->m_worldNormalOnB = -planeNormalWorld; c->setFrictionCoeff(0.7); c->setRestituitionCoeff(0.f); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; for (int i=0;im_worldPosB[i] = pOnB1; } c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; }//if (dstIdx < numPairs) } // printf("computeContactPlaneConvex\n"); } B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) { b3Vector3 vecOut; vecOut.setValue( (b3Scalar)(vecIn[0]) / (quantization.x), (b3Scalar)(vecIn[1]) / (quantization.y), (b3Scalar)(vecIn[2]) / (quantization.z)); vecOut += bvhAabbMin; return vecOut; } void traverseTreeTree() { } #include "Bullet3Common/shared/b3Mat3x3.h" int numAabbChecks = 0; int maxNumAabbChecks = 0; int maxDepth = 0; // work-in-progress __kernel void findCompoundPairsKernel( int pairIndex, int bodyIndexA, int bodyIndexB, int collidableIndexA, int collidableIndexB, __global const b3RigidBodyData* rigidBodies, __global const b3Collidable* collidables, __global const b3ConvexPolyhedronData* convexShapes, __global const b3AlignedObjectArray& vertices, __global const b3AlignedObjectArray& aabbsWorldSpace, __global const b3AlignedObjectArray& aabbsLocalSpace, __global const b3GpuChildShape* gpuChildShapes, __global b3Int4* gpuCompoundPairsOut, __global int* numCompoundPairsOut, int maxNumCompoundPairsCapacity, b3AlignedObjectArray& treeNodesCPU, b3AlignedObjectArray& subTreesCPU, b3AlignedObjectArray& bvhInfoCPU ) { numAabbChecks=0; maxNumAabbChecks=0; // int i = pairIndex; { int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; //once the broadphase avoids static-static pairs, we can remove this test if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) { return; } if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) { int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees; int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; float4 posA = rigidBodies[bodyIndexA].m_pos; b3Quat ornA = rigidBodies[bodyIndexA].m_quat; b3Transform transA; transA.setIdentity(); transA.setOrigin(posA); transA.setRotation(ornA); b3Quat ornB = rigidBodies[bodyIndexB].m_quat; float4 posB = rigidBodies[bodyIndexB].m_pos; b3Transform transB; transB.setIdentity(); transB.setOrigin(posB); transB.setRotation(ornB); for (int p=0;p nodeStack; b3Int2 node0; node0.x = startNodeIndexA; node0.y = startNodeIndexB; int maxStackDepth = 1024; nodeStack.resize(maxStackDepth); int depth=0; nodeStack[depth++]=node0; do { if (depth > maxDepth) { maxDepth=depth; printf("maxDepth=%d\n",maxDepth); } b3Int2 node = nodeStack[--depth]; b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); float margin=0.f; b3Vector3 aabbAMinOut,aabbAMaxOut; b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); b3Vector3 aabbBMinOut,aabbBMaxOut; b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); numAabbChecks++; bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); if (nodeOverlap) { bool isLeafA = treeNodesCPU[node.x].isLeafNode(); bool isLeafB = treeNodesCPU[node.y].isLeafNode(); bool isInternalA = !isLeafA; bool isInternalB = !isLeafB; //fail, even though it might hit two leaf nodes if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) { b3Error("Error: traversal exceeded maxStackDepth\n"); continue; } if(isInternalA) { int nodeAleftChild = node.x+1; bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode(); int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + treeNodesCPU[node.x+1].getEscapeIndex(); if(isInternalB) { int nodeBleftChild = node.y+1; bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); } else { nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); } } else { if(isInternalB) { int nodeBleftChild = node.y+1; bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); } else { int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); if (compoundPairIdx& vertices, __global const b3AlignedObjectArray& uniqueEdges, __global const b3AlignedObjectArray& faces, __global const b3AlignedObjectArray& indices, __global b3Aabb* aabbs, __global const b3GpuChildShape* gpuChildShapes, __global b3AlignedObjectArray& gpuCompoundSepNormalsOut, __global b3AlignedObjectArray& gpuHasCompoundSepNormalsOut, int numCompoundPairs, int i ) { // int i = get_global_id(0); if (i= 0) { collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; b3Quat newOrnA = b3QuatMul(ornA,childOrnA); posA = newPosA; ornA = newOrnA; } else { collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; } if (childShapeIndexB>=0) { collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; b3Quat newOrnB = b3QuatMul(ornB,childOrnB); posB = newPosB; ornB = newOrnB; } else { collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; } gpuHasCompoundSepNormalsOut[i] = 0; int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; int shapeTypeA = collidables[collidableIndexA].m_shapeType; int shapeTypeB = collidables[collidableIndexB].m_shapeType; if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) { return; } int hasSeparatingAxis = 5; // int numFacesA = convexShapes[shapeIndexA].m_numFaces; float dmin = FLT_MAX; posA.w = 0.f; posB.w = 0.f; float4 c0local = convexShapes[shapeIndexA].m_localCenter; float4 c0 = transform(&c0local, &posA, &ornA); float4 c1local = convexShapes[shapeIndexB].m_localCenter; float4 c1 = transform(&c1local,&posB,&ornB); const float4 DeltaC2 = c0 - c1; float4 sepNormal = make_float4(1,0,0,0); // bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); hasSeparatingAxis = 4; if (!sepA) { hasSeparatingAxis = 0; } else { bool sepB = findSeparatingAxis( convexShapes[shapeIndexB],convexShapes[shapeIndexA],posB,ornB,posA,ornA,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); if (!sepB) { hasSeparatingAxis = 0; } else//(!sepB) { bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); if (sepEE) { gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); gpuHasCompoundSepNormalsOut[i] = 1; }//sepEE }//(!sepB) }//(!sepA) } } __kernel void clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundPairs, __global const b3RigidBodyData* rigidBodies, __global const b3Collidable* collidables, __global const b3ConvexPolyhedronData* convexShapes, __global const b3AlignedObjectArray& vertices, __global const b3AlignedObjectArray& uniqueEdges, __global const b3AlignedObjectArray& faces, __global const b3AlignedObjectArray& indices, __global const b3GpuChildShape* gpuChildShapes, __global const b3AlignedObjectArray& gpuCompoundSepNormalsOut, __global const b3AlignedObjectArray& gpuHasCompoundSepNormalsOut, __global struct b3Contact4Data* globalContactsOut, int* nGlobalContactsOut, int numCompoundPairs, int maxContactCapacity, int i) { // int i = get_global_id(0); int pairIndex = i; float4 worldVertsB1[64]; float4 worldVertsB2[64]; int capacityWorldVerts = 64; float4 localContactsOut[64]; int localContactCapacity=64; float minDist = -1e30f; float maxDist = 0.0f; if (i= 0) { collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; b3Quat newOrnA = b3QuatMul(ornA,childOrnA); posA = newPosA; ornA = newOrnA; } else { collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; } if (childShapeIndexB>=0) { collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; b3Quat newOrnB = b3QuatMul(ornB,childOrnB); posB = newPosB; ornB = newOrnB; } else { collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; } int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA,ornA, posB,ornB, worldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, vertices,faces,indices, vertices,faces,indices, localContactsOut,localContactCapacity); if (numLocalContactsOut>0) { float4 normal = -gpuCompoundSepNormalsOut[i]; int nPoints = numLocalContactsOut; float4* pointsIn = localContactsOut; b3Int4 contactIdx;// = {-1,-1,-1,-1}; contactIdx.s[0] = 0; contactIdx.s[1] = 1; contactIdx.s[2] = 2; contactIdx.s[3] = 3; int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); int dstIdx; dstIdx = b3AtomicInc( nGlobalContactsOut); if ((dstIdx+nReducedContacts) < maxContactCapacity) { __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; c->m_worldNormalOnB = -normal; c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); c->m_batchIdx = pairIndex; int bodyA = gpuCompoundPairs[pairIndex].x; int bodyB = gpuCompoundPairs[pairIndex].y; c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; c->m_childIndexA = childShapeIndexA; c->m_childIndexB = childShapeIndexB; for (int i=0;im_worldPosB[i] = pointsIn[contactIdx.s[i]]; } b3Contact4Data_setNumPoints(c,nReducedContacts); } }// if (numContactsOut>0) }// if (gpuHasCompoundSepNormalsOut[i]) }// if (i& hostAabbsWorldSpace, const b3AlignedObjectArray& hostAabbsLocalSpace, const b3AlignedObjectArray& convexVertices, const b3AlignedObjectArray& hostUniqueEdges, const b3AlignedObjectArray& convexIndices, const b3AlignedObjectArray& faces, b3Contact4* globalContactsOut, int& nGlobalContactsOut, int maxContactCapacity, b3AlignedObjectArray& treeNodesCPU, b3AlignedObjectArray& subTreesCPU, b3AlignedObjectArray& bvhInfoCPU ) { int shapeTypeB = collidables[collidableIndexB].m_shapeType; b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); b3AlignedObjectArray cpuCompoundPairsOut; int numCompoundPairsOut=0; int maxNumCompoundPairsCapacity = 8192;//1024; cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); // work-in-progress findCompoundPairsKernel( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, rigidBodies, collidables, convexShapes, convexVertices, hostAabbsWorldSpace, hostAabbsLocalSpace, cpuChildShapes, &cpuCompoundPairsOut[0], &numCompoundPairsOut, maxNumCompoundPairsCapacity , treeNodesCPU, subTreesCPU, bvhInfoCPU ); printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); if (numCompoundPairsOut>maxNumCompoundPairsCapacity) { b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); numCompoundPairsOut=maxNumCompoundPairsCapacity; } b3AlignedObjectArray cpuCompoundSepNormalsOut; b3AlignedObjectArray cpuHasCompoundSepNormalsOut; cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); for (int i=0;im_numVertices;i++) { b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; float curDot = vtx.dot(planeNormalInConvex); if (curDot>maxDot) { hitVertex=i; maxDot=curDot; hitVtx = vtx; //make sure the deepest points is always included if (numPoints==MAX_PLANE_CONVEX_POINTS) numPoints--; } if (numPoints4) { numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); } int dstIdx; // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); if (numReducedPoints>0) { if (nGlobalContactsOut < maxContactCapacity) { dstIdx=nGlobalContactsOut; nGlobalContactsOut++; b3Contact4* c = &globalContactsOut[dstIdx]; c->m_worldNormalOnB = -planeNormalWorld; c->setFrictionCoeff(0.7); c->setRestituitionCoeff(0.f); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; for (int i=0;im_worldPosB[i] = pOnB1; } c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; }//if (dstIdx < numPairs) } } } void computeContactSphereConvex(int pairIndex, int bodyIndexA, int bodyIndexB, int collidableIndexA, int collidableIndexB, const b3RigidBodyData* rigidBodies, const b3Collidable* collidables, const b3ConvexPolyhedronData* convexShapes, const b3Vector3* convexVertices, const int* convexIndices, const b3GpuFace* faces, b3Contact4* globalContactsOut, int& nGlobalContactsOut, int maxContactCapacity) { float radius = collidables[collidableIndexA].m_radius; float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; float4 pos = rigidBodies[bodyIndexB].m_pos; b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; b3Transform tr; tr.setIdentity(); tr.setOrigin(pos); tr.setRotation(quat); b3Transform trInv = tr.inverse(); float4 spherePos = trInv(spherePos1); int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; int shapeIndex = collidables[collidableIndex].m_shapeIndex; int numFaces = convexShapes[shapeIndex].m_numFaces; float4 closestPnt = b3MakeVector3(0, 0, 0, 0); // float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); float minDist = -1000000.f; // TODO: What is the largest/smallest float? bool bCollide = true; int region = -1; float4 localHitNormal; for ( int f = 0; f < numFaces; f++ ) { b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; float4 planeEqn; float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); planeEqn = n1; planeEqn[3] = face.m_plane.w; float4 pntReturn; float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); if ( dist > radius) { bCollide = false; break; } if ( dist > 0 ) { //might hit an edge or vertex b3Vector3 out; bool isInPoly = IsPointInPolygon(spherePos, &face, &convexVertices[convexShapes[shapeIndex].m_vertexOffset], convexIndices, &out); if (isInPoly) { if (dist>minDist) { minDist = dist; closestPnt = pntReturn; localHitNormal = planeEqn; region=1; } } else { b3Vector3 tmp = spherePos-out; b3Scalar l2 = tmp.length2(); if (l2minDist) { minDist = dist; closestPnt = out; localHitNormal = tmp/dist; region=2; } } else { bCollide = false; break; } } } else { if ( dist > minDist ) { minDist = dist; closestPnt = pntReturn; localHitNormal = planeEqn; region=3; } } } static int numChecks = 0; numChecks++; if (bCollide && minDist > -10000) { float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld; float4 pOnB1 = tr(closestPnt); //printf("dist ,%f,",minDist); float actualDepth = minDist-radius; if (actualDepth<0) { //printf("actualDepth = ,%f,", actualDepth); //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); //printf("region=,%d,\n", region); pOnB1[3] = actualDepth; int dstIdx; // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); if (nGlobalContactsOut < maxContactCapacity) { dstIdx=nGlobalContactsOut; nGlobalContactsOut++; b3Contact4* c = &globalContactsOut[dstIdx]; c->m_worldNormalOnB = normalOnSurfaceB1; c->setFrictionCoeff(0.7); c->setRestituitionCoeff(0.f); c->m_batchIdx = pairIndex; c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; c->m_worldPosB[0] = pOnB1; int numPoints = 1; c->m_worldNormalOnB.w = (b3Scalar)numPoints; }//if (dstIdx < numPairs) } }//if (hasCollision) } int computeContactConvexConvex2( int pairIndex, int bodyIndexA, int bodyIndexB, int collidableIndexA, int collidableIndexB, const b3AlignedObjectArray& rigidBodies, const b3AlignedObjectArray& collidables, const b3AlignedObjectArray& convexShapes, const b3AlignedObjectArray& convexVertices, const b3AlignedObjectArray& uniqueEdges, const b3AlignedObjectArray& convexIndices, const b3AlignedObjectArray& faces, b3AlignedObjectArray& globalContactsOut, int& nGlobalContactsOut, int maxContactCapacity, const b3AlignedObjectArray& oldContacts ) { int contactIndex = -1; b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; b3ConvexPolyhedronData hullA, hullB; b3Vector3 sepNormalWorldSpace; b3Collidable colA = collidables[collidableIndexA]; hullA = convexShapes[colA.m_shapeIndex]; //printf("numvertsA = %d\n",hullA.m_numVertices); b3Collidable colB = collidables[collidableIndexB]; hullB = convexShapes[colB.m_shapeIndex]; //printf("numvertsB = %d\n",hullB.m_numVertices); // int contactCapacity = MAX_VERTS; //int numContactsOut=0; #ifdef _WIN32 b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); #endif bool foundSepAxis = findSeparatingAxis(hullA,hullB, posA, ornA, posB, ornB, convexVertices,uniqueEdges,faces,convexIndices, convexVertices,uniqueEdges,faces,convexIndices, sepNormalWorldSpace ); if (foundSepAxis) { contactIndex = clipHullHullSingle( bodyIndexA, bodyIndexB, posA,ornA, posB,ornB, collidableIndexA, collidableIndexB, &rigidBodies, &globalContactsOut, nGlobalContactsOut, convexShapes, convexShapes, convexVertices, uniqueEdges, faces, convexIndices, convexVertices, uniqueEdges, faces, convexIndices, collidables, collidables, sepNormalWorldSpace, maxContactCapacity); } return contactIndex; } void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* pairs, int nPairs, const b3OpenCLArray* bodyBuf, b3OpenCLArray* contactOut, int& nContacts, const b3OpenCLArray* oldContacts, int maxContactCapacity, int compoundPairCapacity, const b3OpenCLArray& convexData, const b3OpenCLArray& gpuVertices, const b3OpenCLArray& gpuUniqueEdges, const b3OpenCLArray& gpuFaces, const b3OpenCLArray& gpuIndices, const b3OpenCLArray& gpuCollidables, const b3OpenCLArray& gpuChildShapes, const b3OpenCLArray& clAabbsWorldSpace, const b3OpenCLArray& clAabbsLocalSpace, b3OpenCLArray& worldVertsB1GPU, b3OpenCLArray& clippingFacesOutGPU, b3OpenCLArray& worldNormalsAGPU, b3OpenCLArray& worldVertsA1GPU, b3OpenCLArray& worldVertsB2GPU, b3AlignedObjectArray& bvhDataUnused, b3OpenCLArray* treeNodesGPU, b3OpenCLArray* subTreesGPU, b3OpenCLArray* bvhInfo, int numObjects, int maxTriConvexPairCapacity, b3OpenCLArray& triangleConvexPairsOut, int& numTriConvexPairsOut ) { myframecount++; if (!nPairs) return; #ifdef CHECK_ON_HOST b3AlignedObjectArray treeNodesCPU; treeNodesGPU->copyToHost(treeNodesCPU); b3AlignedObjectArray subTreesCPU; subTreesGPU->copyToHost(subTreesCPU); b3AlignedObjectArray bvhInfoCPU; bvhInfo->copyToHost(bvhInfoCPU); b3AlignedObjectArray hostAabbsWorldSpace; clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); b3AlignedObjectArray hostAabbsLocalSpace; clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArray hostConvexData; convexData.copyToHost(hostConvexData); b3AlignedObjectArray hostVertices; gpuVertices.copyToHost(hostVertices); b3AlignedObjectArray hostUniqueEdges; gpuUniqueEdges.copyToHost(hostUniqueEdges); b3AlignedObjectArray hostFaces; gpuFaces.copyToHost(hostFaces); b3AlignedObjectArray hostIndices; gpuIndices.copyToHost(hostIndices); b3AlignedObjectArray hostCollidables; gpuCollidables.copyToHost(hostCollidables); b3AlignedObjectArray cpuChildShapes; gpuChildShapes.copyToHost(cpuChildShapes); b3AlignedObjectArray hostTriangleConvexPairs; b3AlignedObjectArray hostContacts; if (nContacts) { contactOut->copyToHost(hostContacts); } b3AlignedObjectArray oldHostContacts; if (oldContacts->size()) { oldContacts->copyToHost(oldHostContacts); } hostContacts.resize(maxContactCapacity); for (int i=0;i=0) { // printf("convex convex contactIndex = %d\n",contactIndex); hostPairs[i].z = contactIndex; } // printf("plane-convex\n"); } } if (hostPairs.size()) { pairs->copyFromHost(hostPairs); } hostContacts.resize(nContacts); if (nContacts) { contactOut->copyFromHost(hostContacts); } else { contactOut->resize(0); } m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); //printf("(HOST) nContacts = %d\n",nContacts); #else { if (nPairs) { m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); B3_PROFILE("primitiveContactsKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); launcher.setConst(maxContactCapacity); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); } } #endif//CHECK_ON_HOST B3_PROFILE("computeConvexConvexContactsGPUSAT"); // printf("nContacts = %d\n",nContacts); m_sepNormals.resize(nPairs); m_hasSeparatingNormals.resize(nPairs); int concaveCapacity=maxTriConvexPairCapacity; m_concaveSepNormals.resize(concaveCapacity); m_concaveHasSeparatingNormals.resize(concaveCapacity); m_numConcavePairsOut.resize(0); m_numConcavePairsOut.push_back(0); m_gpuCompoundPairs.resize(compoundPairCapacity); m_gpuCompoundSepNormals.resize(compoundPairCapacity); m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); m_numCompoundPairsOut.resize(0); m_numCompoundPairsOut.push_back(0); int numCompoundPairs = 0; int numConcavePairs =0; { clFinish(m_queue); if (findSeparatingAxisOnGpu) { m_dmins.resize(nPairs); if (splitSearchSepAxisConvex) { if (useMprGpu) { nContacts = m_totalContactsOut.at(0); { B3_PROFILE("mprPenetrationKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_mprPenetrationKernel,"mprPenetrationKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(maxContactCapacity); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); /* b3AlignedObjectArrayhostHasSepAxis; m_hasSeparatingNormals.copyToHost(hostHasSepAxis); b3AlignedObjectArrayhostSepAxis; m_sepNormals.copyToHost(hostSepAxis); */ nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); if (nContacts>maxContactCapacity) { b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); nContacts = maxContactCapacity; } } } if (1) { if (1) { { B3_PROFILE("findSeparatingAxisVertexFaceKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( m_dmins.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); } int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); { B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( m_dmins.getBufferCL()), b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) }; b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numDirections); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); } } if (useMprGpu) { B3_PROFILE("findSeparatingAxisUnitSphereKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( m_dmins.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); launcher.setConst( numDirections); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); } } } else { B3_PROFILE("findSeparatingAxisKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); } } else { B3_PROFILE("findSeparatingAxisKernel CPU"); b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArray hostCollidables; gpuCollidables.copyToHost(hostCollidables); b3AlignedObjectArray cpuChildShapes; gpuChildShapes.copyToHost(cpuChildShapes); b3AlignedObjectArray hostConvexShapeData; convexData.copyToHost(hostConvexShapeData); b3AlignedObjectArray hostVertices; gpuVertices.copyToHost(hostVertices); b3AlignedObjectArray hostHasSepAxis; hostHasSepAxis.resize(nPairs); b3AlignedObjectArray hostSepAxis; hostSepAxis.resize(nPairs); b3AlignedObjectArray hostUniqueEdges; gpuUniqueEdges.copyToHost(hostUniqueEdges); b3AlignedObjectArray hostFaces; gpuFaces.copyToHost(hostFaces); b3AlignedObjectArray hostIndices; gpuIndices.copyToHost(hostIndices); b3AlignedObjectArray hostContacts; if (nContacts) { contactOut->copyToHost(hostContacts); } hostContacts.resize(maxContactCapacity); int nGlobalContactsOut = nContacts; for (int i=0;i dist) { float diff = depth - dist; static float maxdiff = 0.f; if (maxdiff < diff) { maxdiff = diff; printf("maxdiff = %20.10f\n",maxdiff); } } } if (depth > dmin) { b3Vector3 oldAxis = hostSepAxis[i]; depth = dmin; sepAxis2 = oldAxis; } if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) { if (depth > dist) { float diff = depth - dist; //printf("?diff = %f\n",diff ); static float maxdiff = 0.f; if (maxdiff < diff) { maxdiff = diff; printf("maxdiff = %20.10f\n",maxdiff); } } //this is used for SAT //hostHasSepAxis[i] = 1; //hostSepAxis[i] = sepAxis2; //add contact point //int contactIndex = nGlobalContactsOut; b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); nGlobalContactsOut++; newContact.m_batchIdx = 0;//i; newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; newContact.m_frictionCoeffCmp = 45874; newContact.m_restituitionCoeffCmp = 0; static float maxDepth = 0.f; if (depth > maxDepth) { maxDepth = depth; printf("MPR maxdepth = %f\n",maxDepth ); } resultPointOnBWorld.w = -depth; newContact.m_worldPosB[0] = resultPointOnBWorld; //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; newContact.m_worldNormalOnB = sepAxis2; newContact.m_worldNormalOnB.w = (b3Scalar)1; } else { printf("rejected\n"); } } } else { //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); b3AlignedObjectArray oldHostContacts; int result; result = computeContactConvexConvex2( //hostPairs, pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexShapeData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nGlobalContactsOut, maxContactCapacity, oldHostContacts //hostHasSepAxis, //hostSepAxis ); }//mpr }//hostHasSepAxis[i] = 1; } else { b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); b3Vector3 DeltaC2 = c0 - c1; b3Vector3 sepAxis; bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), &sepAxis, &dmin); if (hasSepAxisA) { bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), &sepAxis, &dmin); if (hasSepAxisB) { bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), &sepAxis, &dmin,true); if (hasEdgeEdge) { hostHasSepAxis[i] = 1; hostSepAxis[i] = sepAxis; } } } } } if (useGjkContacts)//nGlobalContactsOut>0) { //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); nContacts = nGlobalContactsOut; contactOut->copyFromHost(hostContacts); m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); } m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); m_sepNormals.copyFromHost(hostSepAxis); /* //double-check results from GPU (comment-out the 'else' so both paths are executed b3AlignedObjectArray checkHasSepAxis; m_hasSeparatingNormals.copyToHost(checkHasSepAxis); static int frameCount = 0; frameCount++; for (int i=0;igetBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), b3BufferInfoCL(subTreesGPU->getBufferCL()), b3BufferInfoCL(treeNodesGPU->getBufferCL()), b3BufferInfoCL(bvhInfo->getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); launcher.setConst( compoundPairCapacity); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); numCompoundPairs = m_numCompoundPairsOut.at(0); //printf("numCompoundPairs =%d\n",numCompoundPairs ); if (numCompoundPairs) { //printf("numCompoundPairs=%d\n",numCompoundPairs); } } else { b3AlignedObjectArray treeNodesCPU; treeNodesGPU->copyToHost(treeNodesCPU); b3AlignedObjectArray subTreesCPU; subTreesGPU->copyToHost(subTreesCPU); b3AlignedObjectArray bvhInfoCPU; bvhInfo->copyToHost(bvhInfoCPU); b3AlignedObjectArray hostAabbsWorldSpace; clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); b3AlignedObjectArray hostAabbsLocalSpace; clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArray cpuCompoundPairsOut; cpuCompoundPairsOut.resize(compoundPairCapacity); b3AlignedObjectArray hostCollidables; gpuCollidables.copyToHost(hostCollidables); b3AlignedObjectArray cpuChildShapes; gpuChildShapes.copyToHost(cpuChildShapes); b3AlignedObjectArray hostConvexData; convexData.copyToHost(hostConvexData); b3AlignedObjectArray hostVertices; gpuVertices.copyToHost(hostVertices); for (int pairIndex=0;pairIndex compoundPairCapacity) { b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); numCompoundPairs = compoundPairCapacity; } m_gpuCompoundPairs.resize(numCompoundPairs); m_gpuHasCompoundSepNormals.resize(numCompoundPairs); m_gpuCompoundSepNormals.resize(numCompoundPairs); if (numCompoundPairs) { B3_PROFILE("processCompoundPairsPrimitivesKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numCompoundPairs ); launcher.setConst(maxContactCapacity); int num = numCompoundPairs; launcher.launch1D( num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); if (nContacts>maxContactCapacity) { b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); nContacts = maxContactCapacity; } } if (numCompoundPairs) { B3_PROFILE("processCompoundPairsKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL()), b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numCompoundPairs ); int num = numCompoundPairs; launcher.launch1D( num); clFinish(m_queue); } //printf("numConcave = %d\n",numConcave); // printf("hostNormals.size()=%d\n",hostNormals.size()); //int numPairs = pairCount.at(0); } int vertexFaceCapacity = 64; { //now perform the tree query on GPU if (treeNodesGPU->size() && treeNodesGPU->size()) { if (bvhTraversalKernelGPU) { B3_PROFILE("m_bvhTraversalKernel"); numConcavePairs = m_numConcavePairsOut.at(0); b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); launcher.setBuffer( pairs->getBufferCL()); launcher.setBuffer( bodyBuf->getBufferCL()); launcher.setBuffer( gpuCollidables.getBufferCL()); launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); launcher.setBuffer( subTreesGPU->getBufferCL()); launcher.setBuffer( treeNodesGPU->getBufferCL()); launcher.setBuffer( bvhInfo->getBufferCL()); launcher.setConst( nPairs ); launcher.setConst( maxTriConvexPairCapacity); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); numConcavePairs = m_numConcavePairsOut.at(0); } else { b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArray hostCollidables; gpuCollidables.copyToHost(hostCollidables); b3AlignedObjectArray hostAabbsWorldSpace; clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); //int maxTriConvexPairCapacity, b3AlignedObjectArray triangleConvexPairsOutHost; triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); //int numTriConvexPairsOutHost=0; numConcavePairs = 0; //m_numConcavePairsOut b3AlignedObjectArray treeNodesCPU; treeNodesGPU->copyToHost(treeNodesCPU); b3AlignedObjectArray subTreesCPU; subTreesGPU->copyToHost(subTreesCPU); b3AlignedObjectArray bvhInfoCPU; bvhInfo->copyToHost(bvhInfoCPU); //compute it... volatile int hostNumConcavePairsOut=0; // for (int i=0;i maxTriConvexPairCapacity) { static int exceeded_maxTriConvexPairCapacity_count = 0; b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); numConcavePairs = maxTriConvexPairCapacity; } triangleConvexPairsOut.resize(numConcavePairs); if (numConcavePairs) { clippingFacesOutGPU.resize(numConcavePairs); worldNormalsAGPU.resize(numConcavePairs); worldVertsA1GPU.resize(vertexFaceCapacity*(numConcavePairs)); worldVertsB1GPU.resize(vertexFaceCapacity*(numConcavePairs)); if (findConcaveSeparatingAxisKernelGPU) { /* m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); worldVertsA1GPU.copyFromHost(worldVertsA1CPU); worldNormalsAGPU.copyFromHost(worldNormalsACPU); worldVertsB1GPU.copyFromHost(worldVertsB1CPU); */ //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) if (splitSearchSepAxisConcave) { //printf("numConcavePairs = %d\n",numConcavePairs); m_dmins.resize(numConcavePairs); { B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), b3BufferInfoCL(m_dmins.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel,"m_findConcaveSeparatingAxisVertexFaceKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); launcher.setConst( numConcavePairs ); int num = numConcavePairs; launcher.launch1D( num); clFinish(m_queue); } // numConcavePairs = 0; if (1) { B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), b3BufferInfoCL(m_dmins.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel,"m_findConcaveSeparatingAxisEdgeEdgeKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); launcher.setConst( numConcavePairs ); int num = numConcavePairs; launcher.launch1D( num); clFinish(m_queue); } // numConcavePairs = 0; } else { B3_PROFILE("findConcaveSeparatingAxisKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); launcher.setConst( numConcavePairs ); int num = numConcavePairs; launcher.launch1D( num); clFinish(m_queue); } } else { b3AlignedObjectArray clippingFacesOutCPU; b3AlignedObjectArray worldVertsA1CPU; b3AlignedObjectArray worldNormalsACPU; b3AlignedObjectArray worldVertsB1CPU; b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; b3AlignedObjectArray triangleConvexPairsOutHost; triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArray hostCollidables; gpuCollidables.copyToHost(hostCollidables); b3AlignedObjectArray hostAabbsWorldSpace; clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); b3AlignedObjectArray hostConvexData; convexData.copyToHost(hostConvexData); b3AlignedObjectArray hostVertices; gpuVertices.copyToHost(hostVertices); b3AlignedObjectArray hostUniqueEdges; gpuUniqueEdges.copyToHost(hostUniqueEdges); b3AlignedObjectArray hostFaces; gpuFaces.copyToHost(hostFaces); b3AlignedObjectArray hostIndices; gpuIndices.copyToHost(hostIndices); b3AlignedObjectArray cpuChildShapes; gpuChildShapes.copyToHost(cpuChildShapes); b3AlignedObjectArray concaveSepNormalsHost; m_concaveSepNormals.copyToHost(concaveSepNormalsHost); concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); b3GpuChildShape* childShapePointerCPU = 0; if (cpuChildShapes.size()) childShapePointerCPU = &cpuChildShapes.at(0); clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); worldVertsA1CPU.resize(worldVertsA1GPU.size()); worldNormalsACPU.resize(worldNormalsAGPU.size()); worldVertsB1CPU.resize(worldVertsB1GPU.size()); for (int i=0;i cpuCompoundSepNormals; // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); // b3AlignedObjectArray cpuConcavePairs; // triangleConvexPairsOut.copyToHost(cpuConcavePairs); } } } if (numConcavePairs) { if (numConcavePairs) { B3_PROFILE("findConcaveSphereContactsKernel"); nContacts = m_totalContactsOut.at(0); // printf("nContacts1 = %d\n",nContacts); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( numConcavePairs ); launcher.setConst(maxContactCapacity); int num = numConcavePairs; launcher.launch1D( num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); //printf("nContacts2 = %d\n",nContacts); if (nContacts >= maxContactCapacity) { b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); nContacts = maxContactCapacity; } } } #ifdef __APPLE__ bool contactClippingOnGpu = true; #else bool contactClippingOnGpu = true; #endif if (contactClippingOnGpu) { m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); // printf("nContacts3 = %d\n",nContacts); //B3_PROFILE("clipHullHullKernel"); bool breakupConcaveConvexKernel = true; #ifdef __APPLE__ //actually, some Apple OpenCL platform/device combinations work fine... breakupConcaveConvexKernel = true; #endif //concave-convex contact clipping if (numConcavePairs) { // printf("numConcavePairs = %d\n", numConcavePairs); // nContacts = m_totalContactsOut.at(0); // printf("nContacts before = %d\n", nContacts); if (breakupConcaveConvexKernel) { worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); //clipFacesAndFindContacts if (clipConcaveFacesAndFindContactsCPU) { b3AlignedObjectArray clippingFacesOutCPU; b3AlignedObjectArray worldVertsA1CPU; b3AlignedObjectArray worldNormalsACPU; b3AlignedObjectArray worldVertsB1CPU; clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); worldVertsA1GPU.copyToHost(worldVertsA1CPU); worldNormalsAGPU.copyToHost(worldNormalsACPU); worldVertsB1GPU.copyToHost(worldVertsB1CPU); b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); b3AlignedObjectArray concaveSepNormalsHost; m_concaveSepNormals.copyToHost(concaveSepNormalsHost); b3AlignedObjectArray worldVertsB2CPU; worldVertsB2CPU.resize(worldVertsB2GPU.size()); for (int i=0;ireserve(newContactCapacity); if (reduceConcaveContactsOnGPU) { // printf("newReservation = %d\n",newReservation); { B3_PROFILE("newContactReductionKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); launcher.setConst(newContactCapacity); launcher.setConst( numConcavePairs ); int num = numConcavePairs; launcher.launch1D( num); } nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); }else { volatile int nGlobalContactsOut = nContacts; b3AlignedObjectArray triangleConvexPairsOutHost; triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); b3AlignedObjectArray concaveSepNormalsHost; m_concaveSepNormals.copyToHost(concaveSepNormalsHost); b3AlignedObjectArray hostContacts; if (nContacts) { contactOut->copyToHost(hostContacts); } hostContacts.resize(newContactCapacity); b3AlignedObjectArray clippingFacesOutCPU; b3AlignedObjectArray worldVertsB2CPU; clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); worldVertsB2GPU.copyToHost(worldVertsB2CPU); for (int i=0;iresize(nContacts); hostContacts.resize(nContacts); //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); contactOut->copyFromHost(hostContacts); } } //re-use? } else { B3_PROFILE("clipHullHullConcaveConvexKernel"); nContacts = m_totalContactsOut.at(0); int newContactCapacity = contactOut->capacity(); //printf("contactOut5 = %d\n",nContacts); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(newContactCapacity); launcher.setConst( numConcavePairs ); int num = numConcavePairs; launcher.launch1D( num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); //printf("contactOut6 = %d\n",nContacts); b3AlignedObjectArray cpuContacts; contactOut->copyToHost(cpuContacts); } // printf("nContacts after = %d\n", nContacts); }//numConcavePairs //convex-convex contact clipping bool breakupKernel = false; #ifdef __APPLE__ breakupKernel = true; #endif #ifdef CHECK_ON_HOST bool computeConvexConvex = false; #else bool computeConvexConvex = true; #endif//CHECK_ON_HOST if (computeConvexConvex) { B3_PROFILE("clipHullHullKernel"); if (breakupKernel) { worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); clippingFacesOutGPU.resize(nPairs); worldNormalsAGPU.resize(nPairs); worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); if (findConvexClippingFacesGPU) { B3_PROFILE("findClippingFacesKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( vertexFaceCapacity); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); } else { float minDist = -1e30f; float maxDist = 0.02f; b3AlignedObjectArray hostConvexData; convexData.copyToHost(hostConvexData); b3AlignedObjectArray hostCollidables; gpuCollidables.copyToHost(hostCollidables); b3AlignedObjectArray hostHasSepNormals; m_hasSeparatingNormals.copyToHost(hostHasSepNormals); b3AlignedObjectArray cpuSepNormals; m_sepNormals.copyToHost(cpuSepNormals); b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); b3AlignedObjectArray worldVertsB1CPU; worldVertsB1GPU.copyToHost(worldVertsB1CPU); b3AlignedObjectArray clippingFacesOutCPU; clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); b3AlignedObjectArray worldNormalsACPU; worldNormalsACPU.resize(nPairs); b3AlignedObjectArray worldVertsA1CPU; worldVertsA1CPU.resize(worldVertsA1GPU.size()); b3AlignedObjectArray hostVertices; gpuVertices.copyToHost(hostVertices); b3AlignedObjectArray hostFaces; gpuFaces.copyToHost(hostFaces); b3AlignedObjectArray hostIndices; gpuIndices.copyToHost(hostIndices); for (int i=0;i hostPairs; //pairs->copyToHost(hostPairs); b3AlignedObjectArray hostSepNormals; m_sepNormals.copyToHost(hostSepNormals); b3AlignedObjectArray hostHasSepAxis; m_hasSeparatingNormals.copyToHost(hostHasSepAxis); b3AlignedObjectArray hostClippingFaces; clippingFacesOutGPU.copyToHost(hostClippingFaces); b3AlignedObjectArray worldVertsB2CPU; worldVertsB2CPU.resize(vertexFaceCapacity*nPairs); b3AlignedObjectArrayworldVertsA1CPU; worldVertsA1GPU.copyToHost(worldVertsA1CPU); b3AlignedObjectArray worldNormalsACPU; worldNormalsAGPU.copyToHost(worldNormalsACPU); b3AlignedObjectArray worldVertsB1CPU; worldVertsB1GPU.copyToHost(worldVertsB1CPU); /* __global const b3Float4* separatingNormals, __global const int* hasSeparatingAxis, __global b3Int4* clippingFacesOut, __global b3Float4* worldVertsA1, __global b3Float4* worldNormalsA1, __global b3Float4* worldVertsB1, __global b3Float4* worldVertsB2, int vertexFaceCapacity, int pairIndex */ for (int i=0;ireserve(newContactCapacity); if (reduceConvexContactsOnGPU) { { B3_PROFILE("newContactReductionKernel"); b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst(vertexFaceCapacity); launcher.setConst(newContactCapacity); launcher.setConst( nPairs ); int num = nPairs; launcher.launch1D( num); } nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); } else { volatile int nGlobalContactsOut = nContacts; b3AlignedObjectArray hostPairs; pairs->copyToHost(hostPairs); b3AlignedObjectArray hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); b3AlignedObjectArray hostSepNormals; m_sepNormals.copyToHost(hostSepNormals); b3AlignedObjectArray hostHasSepAxis; m_hasSeparatingNormals.copyToHost(hostHasSepAxis); b3AlignedObjectArray hostContactsOut; contactOut->copyToHost(hostContactsOut); hostContactsOut.resize(newContactCapacity); b3AlignedObjectArray hostClippingFaces; clippingFacesOutGPU.copyToHost(hostClippingFaces); b3AlignedObjectArray worldVertsB2CPU; worldVertsB2GPU.copyToHost(worldVertsB2CPU); for (int i=0;icopyFromHost(hostContactsOut); } // b3Contact4 pt = contactOut->at(0); // printf("nContacts = %d\n",nContacts); } } } else//breakupKernel { if (nPairs) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( pairs->getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( m_sepNormals.getBufferCL()), b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nPairs ); launcher.setConst(maxContactCapacity); int num = nPairs; launcher.launch1D( num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); if (nContacts >= maxContactCapacity) { b3Error("Exceeded contact capacity (%d/%d)\n",nContacts,maxContactCapacity); nContacts = maxContactCapacity; } contactOut->resize(nContacts); } } int nCompoundsPairs = m_gpuCompoundPairs.size(); if (nCompoundsPairs) { b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), b3BufferInfoCL( bodyBuf->getBufferCL(),true), b3BufferInfoCL( gpuCollidables.getBufferCL(),true), b3BufferInfoCL( convexData.getBufferCL(),true), b3BufferInfoCL( gpuVertices.getBufferCL(),true), b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), b3BufferInfoCL( gpuFaces.getBufferCL(),true), b3BufferInfoCL( gpuIndices.getBufferCL(),true), b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL(),true), b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL(),true), b3BufferInfoCL( contactOut->getBufferCL()), b3BufferInfoCL( m_totalContactsOut.getBufferCL()) }; b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); launcher.setConst( nCompoundsPairs ); launcher.setConst(maxContactCapacity); int num = nCompoundsPairs; launcher.launch1D( num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); if (nContacts>maxContactCapacity) { b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); nContacts = maxContactCapacity; } contactOut->resize(nContacts); }//if nCompoundsPairs } }//contactClippingOnGpu //printf("nContacts end = %d\n",nContacts); //printf("frameCount = %d\n",frameCount++); }