From e12c89e8c9896b2e5cdd70dbd2d2acb449ff4b94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Sat, 13 Jan 2018 14:01:53 +0100 Subject: bullet: Streamline bundling, remove extraneous src/ folder Document version and how to extract sources in thirdparty/README.md. Drop unnecessary CMake and Premake files. Simplify SCsub, drop unused one. --- .../NarrowphaseCollision/b3ConvexHullContact.cpp | 4733 ++++++++++++++++++++ 1 file changed, 4733 insertions(+) create mode 100644 thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp (limited to 'thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp') diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp new file mode 100644 index 0000000000..fb435aa7fd --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -0,0 +1,4733 @@ +/* +Bullet Continuous Collision Detection and Physics Library +Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ + +bool findSeparatingAxisOnGpu = true; +bool splitSearchSepAxisConcave = false; +bool splitSearchSepAxisConvex = true; +bool useMprGpu = true;//use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... +bool bvhTraversalKernelGPU = true; +bool findConcaveSeparatingAxisKernelGPU = true; +bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; +bool clipConvexFacesAndFindContactsCPU = false;//false;//true; +bool reduceConcaveContactsOnGPU = true;//false; +bool reduceConvexContactsOnGPU = true;//false; +bool findConvexClippingFacesGPU = true; +bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false +bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false + + +static int myframecount=0;///for testing + +///This file was written by Erwin Coumans +///Separating axis rest based on work from Pierre Terdiman, see +///And contact clipping based on work from Simon Hobbs + +//#define B3_DEBUG_SAT_FACE + +//#define CHECK_ON_HOST + +#ifdef CHECK_ON_HOST +//#define PERSISTENT_CONTACTS_HOST +#endif + +int b3g_actualSATPairTests=0; + +#include "b3ConvexHullContact.h" +#include //memcpy +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" + +#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" +#include "Bullet3Geometry/b3AabbUtil.h" + +typedef b3AlignedObjectArray b3VertexArray; + + +#include //for FLT_MAX +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +//#include "AdlQuaternion.h" + +#include "kernels/satKernels.h" +#include "kernels/mprKernels.h" + +#include "kernels/satConcaveKernels.h" + +#include "kernels/satClipHullContacts.h" +#include "kernels/bvhTraversal.h" +#include "kernels/primitiveContacts.h" + + +#include "Bullet3Geometry/b3AabbUtil.h" + +#define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" +#define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl" + +#define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" + + +#define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" +#define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" +#define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" + + +#ifndef __global +#define __global +#endif + +#ifndef __kernel +#define __kernel +#endif + + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" + + + +#define dot3F4 b3Dot + +GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) +:m_context(ctx), +m_device(device), +m_queue(q), + +m_findSeparatingAxisKernel(0), +m_findSeparatingAxisVertexFaceKernel(0), +m_findSeparatingAxisEdgeEdgeKernel(0), +m_unitSphereDirections(m_context,m_queue), + +m_totalContactsOut(m_context, m_queue), +m_sepNormals(m_context, m_queue), +m_dmins(m_context,m_queue), + +m_hasSeparatingNormals(m_context, m_queue), +m_concaveSepNormals(m_context, m_queue), +m_concaveHasSeparatingNormals(m_context,m_queue), +m_numConcavePairsOut(m_context, m_queue), + + +m_gpuCompoundPairs(m_context, m_queue), + + +m_gpuCompoundSepNormals(m_context, m_queue), +m_gpuHasCompoundSepNormals(m_context, m_queue), + +m_numCompoundPairsOut(m_context, m_queue) +{ + m_totalContactsOut.push_back(0); + + cl_int errNum=0; + + if (1) + { + const char* mprSrc = mprKernelsCL; + + const char* srcConcave = satConcaveKernelsCL; + char flags[1024]={0}; +//#ifdef CL_PLATFORM_INTEL +// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); +//#endif + m_mprPenetrationKernel = 0; + m_findSeparatingAxisUnitSphereKernel = 0; + + if (useMprGpu) + { + cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); + b3Assert(m_mprPenetrationKernel); + b3Assert(errNum==CL_SUCCESS); + + m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); + b3Assert(m_findSeparatingAxisUnitSphereKernel); + b3Assert(errNum==CL_SUCCESS); + + + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + m_unitSphereDirections.resize(numDirections); + m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); + + + } + + + cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,satKernelsCL,&errNum,flags,BT_NARROWPHASE_SAT_PATH); + b3Assert(errNum==CL_SUCCESS); + + cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcConcave,&errNum,flags,BT_NARROWPHASE_SAT_CONCAVE_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisKernel",&errNum,satProg ); + b3Assert(m_findSeparatingAxisKernel); + b3Assert(errNum==CL_SUCCESS); + + + m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisVertexFaceKernel",&errNum,satProg ); + b3Assert(m_findSeparatingAxisVertexFaceKernel); + + m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisEdgeEdgeKernel",&errNum,satProg ); + b3Assert(m_findSeparatingAxisVertexFaceKernel); + + + m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findConcaveSeparatingAxisKernel",&errNum,satProg ); + b3Assert(m_findConcaveSeparatingAxisKernel); + b3Assert(errNum==CL_SUCCESS); + + m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisVertexFaceKernel",&errNum,satConcaveProg ); + b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); + b3Assert(errNum==CL_SUCCESS); + + m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel",&errNum,satConcaveProg ); + b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); + b3Assert(errNum==CL_SUCCESS); + + + + + m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findCompoundPairsKernel",&errNum,satProg ); + b3Assert(m_findCompoundPairsKernel); + b3Assert(errNum==CL_SUCCESS); + m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "processCompoundPairsKernel",&errNum,satProg ); + b3Assert(m_processCompoundPairsKernel); + b3Assert(errNum==CL_SUCCESS); + } + + if (1) + { + const char* srcClip = satClipKernelsCL; + + char flags[1024]={0}; +//#ifdef CL_PLATFORM_INTEL +// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); +//#endif + + cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + + m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndFindContactsKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + + m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + +// m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); + // b3Assert(errNum==CL_SUCCESS); + + m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, + "newContactReductionKernel",&errNum,satClipContactsProg); + b3Assert(errNum==CL_SUCCESS); + } + else + { + m_clipHullHullKernel=0; + m_clipCompoundsHullHullKernel = 0; + m_findClippingFacesKernel = 0; + m_newContactReductionKernel=0; + m_clipFacesAndFindContacts = 0; + m_clipHullHullConcaveConvexKernel = 0; +// m_extractManifoldAndAddContactKernel = 0; + } + + if (1) + { + const char* srcBvh = bvhTraversalKernelCL; + cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); + b3Assert(errNum==CL_SUCCESS); + + } + + { + const char* primitiveContactsSrc = primitiveContactsKernelsCL; + cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); + b3Assert(errNum==CL_SUCCESS); + + m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); + b3Assert(errNum==CL_SUCCESS); + + m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg ); + b3Assert(errNum==CL_SUCCESS); + b3Assert(m_findConcaveSphereContactsKernel); + + m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,""); + b3Assert(errNum==CL_SUCCESS); + b3Assert(m_processCompoundPairsPrimitivesKernel); + + } + + +} + +GpuSatCollision::~GpuSatCollision() +{ + + if (m_findSeparatingAxisVertexFaceKernel) + clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); + + if (m_findSeparatingAxisEdgeEdgeKernel) + clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); + + if (m_findSeparatingAxisUnitSphereKernel) + clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); + + if (m_mprPenetrationKernel) + clReleaseKernel(m_mprPenetrationKernel); + + + if (m_findSeparatingAxisKernel) + clReleaseKernel(m_findSeparatingAxisKernel); + + if (m_findConcaveSeparatingAxisVertexFaceKernel) + clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); + + + if (m_findConcaveSeparatingAxisEdgeEdgeKernel) + clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); + + if (m_findConcaveSeparatingAxisKernel) + clReleaseKernel(m_findConcaveSeparatingAxisKernel); + + if (m_findCompoundPairsKernel) + clReleaseKernel(m_findCompoundPairsKernel); + + if (m_processCompoundPairsKernel) + clReleaseKernel(m_processCompoundPairsKernel); + + if (m_findClippingFacesKernel) + clReleaseKernel(m_findClippingFacesKernel); + + if (m_clipFacesAndFindContacts) + clReleaseKernel(m_clipFacesAndFindContacts); + if (m_newContactReductionKernel) + clReleaseKernel(m_newContactReductionKernel); + if (m_primitiveContactsKernel) + clReleaseKernel(m_primitiveContactsKernel); + + if (m_findConcaveSphereContactsKernel) + clReleaseKernel(m_findConcaveSphereContactsKernel); + + if (m_processCompoundPairsPrimitivesKernel) + clReleaseKernel(m_processCompoundPairsPrimitivesKernel); + + if (m_clipHullHullKernel) + clReleaseKernel(m_clipHullHullKernel); + if (m_clipCompoundsHullHullKernel) + clReleaseKernel(m_clipCompoundsHullHullKernel); + + if (m_clipHullHullConcaveConvexKernel) + clReleaseKernel(m_clipHullHullConcaveConvexKernel); +// if (m_extractManifoldAndAddContactKernel) + // clReleaseKernel(m_extractManifoldAndAddContactKernel); + + if (m_bvhTraversalKernel) + clReleaseKernel(m_bvhTraversalKernel); + +} + +struct MyTriangleCallback : public b3NodeOverlapCallback +{ + int m_bodyIndexA; + int m_bodyIndexB; + + virtual void processNode(int subPart, int triangleIndex) + { + printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); + printf("triangleIndex %d\n", triangleIndex); + } +}; + + +#define float4 b3Vector3 +#define make_float4(x,y,z,w) b3MakeVector3(x,y,z,w) + +float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) +{ + float4 n = planeEqn; + n[3] = 0.f; + float dist = dot3F4(n, point) + planeEqn[3]; + *closestPointOnFace = point - dist * n; + return dist; +} + + + +#define cross3(a,b) (a.cross(b)) +b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) +{ + b3Transform tr; + tr.setIdentity(); + tr.setOrigin(*pos); + tr.setRotation(*orn); + b3Vector3 res = tr(*v); + return res; +} + + +inline bool IsPointInPolygon(const float4& p, + const b3GpuFace* face, + const float4* baseVertex, + const int* convexIndices, + float4* out) +{ + float4 a; + float4 b; + float4 ab; + float4 ap; + float4 v; + + float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); + + if (face->m_numIndices<2) + return false; + + + float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; + b = v0; + + for(unsigned i=0; i != face->m_numIndices; ++i) + { + a = b; + float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; + b = vi; + ab = b-a; + ap = p-a; + v = cross3(ab,plane); + + if (b3Dot(ap, v) > 0.f) + { + float ab_m2 = b3Dot(ab, ab); + float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; + if (rt <= 0.f) + { + *out = a; + } + else if (rt >= 1.f) + { + *out = b; + } + else + { + float s = 1.f - rt; + out[0].x = s * a.x + rt * b.x; + out[0].y = s * a.y + rt * b.y; + out[0].z = s * a.z + rt * b.z; + } + return false; + } + } + return true; +} + +#define normalize3(a) (a.normalize()) + + +int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +{ + if( nPoints == 0 ) + return 0; + + if (nPoints <=4) + return nPoints; + + + if (nPoints >64) + nPoints = 64; + + float4 center = b3MakeVector3(0,0,0,0); + { + + for (int i=0;i& vertices, b3Scalar& min, b3Scalar& max) +{ + min = FLT_MAX; + max = -FLT_MAX; + int numVerts = hull.m_numVertices; + + const float4 localDir = b3QuatRotate(orn.inverse(),dir); + + b3Scalar offset = dot3F4(pos,dir); + + for(int i=0;i max) max = dp; + } + if(min>max) + { + b3Scalar tmp = min; + min = max; + max = tmp; + } + min += offset; + max += offset; +} + + +static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA,const b3Quaternion& ornA, + const float4& posB,const b3Quaternion& ornB, + const float4& sep_axis, const b3AlignedObjectArray& verticesA,const b3AlignedObjectArray& verticesB,b3Scalar& depth) +{ + b3Scalar Min0,Max0; + b3Scalar Min1,Max1; + project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0); + project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1); + + if(Max0=0.0f); + b3Scalar d1 = Max1 - Min0; + assert(d1>=0.0f); + depth = d01e-6 || fabsf(v.y)>1e-6 || fabsf(v.z)>1e-6) return false; + return true; +} + + +static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA1, + const b3Quaternion& ornA, + const float4& posB1, + const b3Quaternion& ornB, + const b3AlignedObjectArray& verticesA, + const b3AlignedObjectArray& uniqueEdgesA, + const b3AlignedObjectArray& facesA, + const b3AlignedObjectArray& indicesA, + const b3AlignedObjectArray& verticesB, + const b3AlignedObjectArray& uniqueEdgesB, + const b3AlignedObjectArray& facesB, + const b3AlignedObjectArray& indicesB, + + b3Vector3& sep) +{ + B3_PROFILE("findSeparatingAxis"); + + b3g_actualSATPairTests++; + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; +//#ifdef TEST_INTERNAL_OBJECTS + float4 c0local = (float4&)hullA.m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = (float4&)hullB.m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 deltaC2 = c0 - c1; +//#endif + + b3Scalar dmin = FLT_MAX; + int curPlaneTests=0; + + int numFacesA = hullA.m_numFaces; + // Test normals from hullA + for(int i=0;i0.0f) + sep = -sep; + + return true; +} + + +bool findSeparatingAxisEdgeEdge( __global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, + const b3Float4& posA1, + const b3Quat& ornA, + const b3Float4& posB1, + const b3Quat& ornB, + const b3Float4& DeltaC2, + __global const b3AlignedObjectArray& vertices, + __global const b3AlignedObjectArray& uniqueEdges, + __global const b3AlignedObjectArray& faces, + __global const b3AlignedObjectArray& indices, + float4* sep, + float* dmin) +{ +// int i = get_global_id(0); + + float4 posA = posA1; + posA.w = 0.f; + float4 posB = posB1; + posB.w = 0.f; + + //int curPlaneTests=0; + + int curEdgeEdge = 0; + // Test edges + for(int e0=0;e0m_numUniqueEdges;e0++) + { + const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; + float4 edge0World = b3QuatRotate(ornA,edge0); + + for(int e1=0;e1m_numUniqueEdges;e1++) + { + const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; + float4 edge1World = b3QuatRotate(ornB,edge1); + + + float4 crossje = cross3(edge0World,edge1World); + + curEdgeEdge++; + if(!IsAlmostZero(crossje)) + { + crossje = normalize3(crossje); + if (dot3F4(DeltaC2,crossje)<0) + crossje*=-1.f; + + float dist; + bool result = true; + { + float Min0,Max0; + float Min1,Max1; + project(*hullA,posA,ornA,crossje,vertices, Min0, Max0); + project(*hullB,posB,ornB,crossje,vertices, Min1, Max1); + + if(Max00.0f) + { + *sep = -(*sep); + } + return true; +} + + +__inline float4 lerp3(const float4& a,const float4& b, float t) +{ + return b3MakeVector3( a.x + (b.x - a.x) * t, + a.y + (b.y - a.y) * t, + a.z + (b.z - a.z) * t, + 0.f); +} + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float planeEqWS, float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; + if (numVertsIn < 2) + return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + +int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, + const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + const b3AlignedObjectArray& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, + //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;facem_numFaces;face++) + { + const float4 Normal = b3MakeVector3( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = b3QuatRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face +// int numContacts = numWorldVertsB1; + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; + const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = b3QuatRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;i& verticesA, const b3AlignedObjectArray& facesA, const b3AlignedObjectArray& indicesA, + const b3AlignedObjectArray& verticesB, const b3AlignedObjectArray& facesB, const b3AlignedObjectArray& indicesB, + + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + B3_PROFILE("clipHullAgainstHull"); + +// float curMaxDist=maxDist; + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + //B3_PROFILE("closestFaceB"); + if (hullB.m_numFaces!=1) + { + //printf("wtf\n"); + } + static bool once = true; + //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); + + for(int face=0;facem_numIndices;i++) + { + float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]]; + printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z); + } + } +#endif //BT_DEBUG_SAT_FACE + //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) + { + const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, + facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = b3QuatRotate(ornB, Normal); +#ifdef BT_DEBUG_SAT_FACE + if (once) + printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z); +#endif + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + once = false; + } + + + b3Assert(closestFaceB>=0); + { + //B3_PROFILE("worldVertsB1"); + const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0=0) + { + //B3_PROFILE("clipFaceAgainstHull"); + numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, + verticesA, facesA, indicesA, + contactsOut,contactCapacity); + } + + return numContactsOut; +} + + + + + + +#define PARALLEL_SUM(v, n) for(int j=1; j v[i+offset].y)? v[i]: v[i+offset]; } +#define REDUCE_MIN(v, n) {int i=0;\ +for(int offset=0; offset64) + nPoints = 64; + + float4 center = make_float4(0,0,0,0); + { + + for (int i=0;i* bodyBuf, + b3AlignedObjectArray* globalContactOut, + int& nContacts, + + const b3AlignedObjectArray& hostConvexDataA, + const b3AlignedObjectArray& hostConvexDataB, + + const b3AlignedObjectArray& verticesA, + const b3AlignedObjectArray& uniqueEdgesA, + const b3AlignedObjectArray& facesA, + const b3AlignedObjectArray& indicesA, + + const b3AlignedObjectArray& verticesB, + const b3AlignedObjectArray& uniqueEdgesB, + const b3AlignedObjectArray& facesB, + const b3AlignedObjectArray& indicesB, + + const b3AlignedObjectArray& hostCollidablesA, + const b3AlignedObjectArray& hostCollidablesB, + const b3Vector3& sepNormalWorldSpace, + int maxContactCapacity ) +{ + int contactIndex = -1; + b3ConvexPolyhedronData hullA, hullB; + + b3Collidable colA = hostCollidablesA[collidableIndexA]; + hullA = hostConvexDataA[colA.m_shapeIndex]; + //printf("numvertsA = %d\n",hullA.m_numVertices); + + + b3Collidable colB = hostCollidablesB[collidableIndexB]; + hullB = hostConvexDataB[colB.m_shapeIndex]; + //printf("numvertsB = %d\n",hullB.m_numVertices); + + + float4 contactsOut[MAX_VERTS]; + int localContactCapacity = MAX_VERTS; + +#ifdef _WIN32 + b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); + b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); +#endif + + + { + + float4 worldVertsB1[MAX_VERTS]; + float4 worldVertsB2[MAX_VERTS]; + int capacityWorldVerts = MAX_VERTS; + + float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); + int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; + int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; + + b3Scalar minDist = -1; + b3Scalar maxDist = 0.; + + + + b3Transform trA,trB; + { + //B3_PROFILE("transform computation"); + //trA.setIdentity(); + trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); + trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); + + //trB.setIdentity(); + trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); + trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); + } + + b3Quaternion trAorn = trA.getRotation(); + b3Quaternion trBorn = trB.getRotation(); + + int numContactsOut = clipHullAgainstHull(hostNormal, + hostConvexDataA.at(shapeA), + hostConvexDataB.at(shapeB), + (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, + (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + verticesA, facesA,indicesA, + verticesB, facesB,indicesB, + + contactsOut,localContactCapacity); + + if (numContactsOut>0) + { + B3_PROFILE("overlap"); + + float4 normalOnSurfaceB = (float4&)hostNormal; + + b3Int4 contactIdx; + contactIdx.x = 0; + contactIdx.y = 1; + contactIdx.z = 2; + contactIdx.w = 3; + + int numPoints = 0; + + { + // B3_PROFILE("extractManifold"); + numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); + } + + b3Assert(numPoints); + + if (nContactsexpand(); + b3Contact4& contact = globalContactOut->at(nContacts); + contact.m_batchIdx = 0;//i; + contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; + contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; + + contact.m_frictionCoeffCmp = 45874; + contact.m_restituitionCoeffCmp = 0; + + // float distance = 0.f; + for (int p=0;pm_numVertices;i++) + { + b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; + float curDot = vtx.dot(planeNormalInConvex); + + + if (curDot>maxDot) + { + hitVertex=i; + maxDot=curDot; + hitVtx = vtx; + //make sure the deepest points is always included + if (numPoints==MAX_PLANE_CONVEX_POINTS) + numPoints--; + } + + if (numPoints4) + { + numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + } + int dstIdx; +// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (numReducedPoints>0) + { + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx=nGlobalContactsOut; + nGlobalContactsOut++; + + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -planeNormalWorld; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + for (int i=0;im_worldPosB[i] = pOnB1; + } + c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; + }//if (dstIdx < numPairs) + } + + + +// printf("computeContactPlaneConvex\n"); +} + + + +B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) + { + b3Vector3 vecOut; + vecOut.setValue( + (b3Scalar)(vecIn[0]) / (quantization.x), + (b3Scalar)(vecIn[1]) / (quantization.y), + (b3Scalar)(vecIn[2]) / (quantization.z)); + vecOut += bvhAabbMin; + return vecOut; + } + +void traverseTreeTree() +{ + +} + +#include "Bullet3Common/shared/b3Mat3x3.h" + +int numAabbChecks = 0; +int maxNumAabbChecks = 0; +int maxDepth = 0; + +// work-in-progress +__kernel void findCompoundPairsKernel( + int pairIndex, + int bodyIndexA, + int bodyIndexB, + int collidableIndexA, + int collidableIndexB, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray& vertices, + __global const b3AlignedObjectArray& aabbsWorldSpace, + __global const b3AlignedObjectArray& aabbsLocalSpace, + __global const b3GpuChildShape* gpuChildShapes, + __global b3Int4* gpuCompoundPairsOut, + __global int* numCompoundPairsOut, + int maxNumCompoundPairsCapacity, + b3AlignedObjectArray& treeNodesCPU, + b3AlignedObjectArray& subTreesCPU, + b3AlignedObjectArray& bvhInfoCPU + ) +{ + numAabbChecks=0; + maxNumAabbChecks=0; +// int i = pairIndex; + { + + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + //once the broadphase avoids static-static pairs, we can remove this test + if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + { + return; + } + + if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + { + int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; + int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; + int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees; + int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; + int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; + + + int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; + + float4 posA = rigidBodies[bodyIndexA].m_pos; + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; + + b3Transform transA; + transA.setIdentity(); + transA.setOrigin(posA); + transA.setRotation(ornA); + + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + b3Transform transB; + transB.setIdentity(); + transB.setOrigin(posB); + transB.setRotation(ornB); + + + + for (int p=0;p nodeStack; + b3Int2 node0; + node0.x = startNodeIndexA; + node0.y = startNodeIndexB; + + int maxStackDepth = 1024; + nodeStack.resize(maxStackDepth); + int depth=0; + nodeStack[depth++]=node0; + + do + { + if (depth > maxDepth) + { + maxDepth=depth; + printf("maxDepth=%d\n",maxDepth); + } + b3Int2 node = nodeStack[--depth]; + + b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + + b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + + float margin=0.f; + b3Vector3 aabbAMinOut,aabbAMaxOut; + b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + + b3Vector3 aabbBMinOut,aabbBMaxOut; + b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + + numAabbChecks++; + bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + if (nodeOverlap) + { + bool isLeafA = treeNodesCPU[node.x].isLeafNode(); + bool isLeafB = treeNodesCPU[node.y].isLeafNode(); + bool isInternalA = !isLeafA; + bool isInternalB = !isLeafB; + + //fail, even though it might hit two leaf nodes + if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) + { + b3Error("Error: traversal exceeded maxStackDepth\n"); + continue; + } + + if(isInternalA) + { + int nodeAleftChild = node.x+1; + bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode(); + int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + treeNodesCPU[node.x+1].getEscapeIndex(); + + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); + + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); + } + else + { + nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); + nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); + } + } + else + { + if(isInternalB) + { + int nodeBleftChild = node.y+1; + bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); + int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); + } + else + { + int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); + if (compoundPairIdx& vertices, + __global const b3AlignedObjectArray& uniqueEdges, + __global const b3AlignedObjectArray& faces, + __global const b3AlignedObjectArray& indices, + __global b3Aabb* aabbs, + __global const b3GpuChildShape* gpuChildShapes, + __global b3AlignedObjectArray& gpuCompoundSepNormalsOut, + __global b3AlignedObjectArray& gpuHasCompoundSepNormalsOut, + int numCompoundPairs, + int i + ) +{ + +// int i = get_global_id(0); + if (i= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; + b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; + b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + gpuHasCompoundSepNormalsOut[i] = 0; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int shapeTypeA = collidables[collidableIndexA].m_shapeType; + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + + + if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) + { + return; + } + + int hasSeparatingAxis = 5; + + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + float dmin = FLT_MAX; + posA.w = 0.f; + posB.w = 0.f; + float4 c0local = convexShapes[shapeIndexA].m_localCenter; + float4 c0 = transform(&c0local, &posA, &ornA); + float4 c1local = convexShapes[shapeIndexB].m_localCenter; + float4 c1 = transform(&c1local,&posB,&ornB); + const float4 DeltaC2 = c0 - c1; + float4 sepNormal = make_float4(1,0,0,0); +// bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); + + hasSeparatingAxis = 4; + if (!sepA) + { + hasSeparatingAxis = 0; + } else + { + bool sepB = findSeparatingAxis( convexShapes[shapeIndexB],convexShapes[shapeIndexA],posB,ornB,posA,ornA,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); + + if (!sepB) + { + hasSeparatingAxis = 0; + } else//(!sepB) + { + bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + if (sepEE) + { + gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); + gpuHasCompoundSepNormalsOut[i] = 1; + }//sepEE + }//(!sepB) + }//(!sepA) + + + } + +} + + +__kernel void clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundPairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray& vertices, + __global const b3AlignedObjectArray& uniqueEdges, + __global const b3AlignedObjectArray& faces, + __global const b3AlignedObjectArray& indices, + __global const b3GpuChildShape* gpuChildShapes, + __global const b3AlignedObjectArray& gpuCompoundSepNormalsOut, + __global const b3AlignedObjectArray& gpuHasCompoundSepNormalsOut, + __global struct b3Contact4Data* globalContactsOut, + int* nGlobalContactsOut, + int numCompoundPairs, int maxContactCapacity, int i) +{ + +// int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.0f; + + if (i= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; + b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; + b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], + convexShapes[shapeIndexA], convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -gpuCompoundSepNormalsOut[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + b3Int4 contactIdx;// = {-1,-1,-1,-1}; + + contactIdx.s[0] = 0; + contactIdx.s[1] = 1; + contactIdx.s[2] = 2; + contactIdx.s[3] = 3; + + int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); + + int dstIdx; + dstIdx = b3AtomicInc( nGlobalContactsOut); + if ((dstIdx+nReducedContacts) < maxContactCapacity) + { + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = gpuCompoundPairs[pairIndex].x; + int bodyB = gpuCompoundPairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = childShapeIndexA; + c->m_childIndexB = childShapeIndexB; + for (int i=0;im_worldPosB[i] = pointsIn[contactIdx.s[i]]; + } + b3Contact4Data_setNumPoints(c,nReducedContacts); + } + + }// if (numContactsOut>0) + }// if (gpuHasCompoundSepNormalsOut[i]) + }// if (i& hostAabbsWorldSpace, + const b3AlignedObjectArray& hostAabbsLocalSpace, + + const b3AlignedObjectArray& convexVertices, + const b3AlignedObjectArray& hostUniqueEdges, + const b3AlignedObjectArray& convexIndices, + const b3AlignedObjectArray& faces, + + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity, + b3AlignedObjectArray& treeNodesCPU, + b3AlignedObjectArray& subTreesCPU, + b3AlignedObjectArray& bvhInfoCPU + ) +{ + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); + + b3AlignedObjectArray cpuCompoundPairsOut; + int numCompoundPairsOut=0; + int maxNumCompoundPairsCapacity = 8192;//1024; + cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); + + // work-in-progress + findCompoundPairsKernel( + pairIndex, + bodyIndexA,bodyIndexB, + collidableIndexA,collidableIndexB, + rigidBodies, + collidables, + convexShapes, + convexVertices, + hostAabbsWorldSpace, + hostAabbsLocalSpace, + cpuChildShapes, + &cpuCompoundPairsOut[0], + &numCompoundPairsOut, + maxNumCompoundPairsCapacity , + treeNodesCPU, + subTreesCPU, + bvhInfoCPU + ); + + printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); + if (numCompoundPairsOut>maxNumCompoundPairsCapacity) + { + b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); + numCompoundPairsOut=maxNumCompoundPairsCapacity; + } + b3AlignedObjectArray cpuCompoundSepNormalsOut; + b3AlignedObjectArray cpuHasCompoundSepNormalsOut; + cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); + cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); + + for (int i=0;im_numVertices;i++) + { + b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; + float curDot = vtx.dot(planeNormalInConvex); + + + if (curDot>maxDot) + { + hitVertex=i; + maxDot=curDot; + hitVtx = vtx; + //make sure the deepest points is always included + if (numPoints==MAX_PLANE_CONVEX_POINTS) + numPoints--; + } + + if (numPoints4) + { + numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + } + int dstIdx; + // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (numReducedPoints>0) + { + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx=nGlobalContactsOut; + nGlobalContactsOut++; + + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -planeNormalWorld; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + for (int i=0;im_worldPosB[i] = pOnB1; + } + c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; + }//if (dstIdx < numPairs) + } + + } + + +} + + + + + +void computeContactSphereConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) +{ + + float radius = collidables[collidableIndexA].m_radius; + float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; + b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; + + + + float4 pos = rigidBodies[bodyIndexB].m_pos; + + + b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; + + b3Transform tr; + tr.setIdentity(); + tr.setOrigin(pos); + tr.setRotation(quat); + b3Transform trInv = tr.inverse(); + + float4 spherePos = trInv(spherePos1); + + int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; + int shapeIndex = collidables[collidableIndex].m_shapeIndex; + int numFaces = convexShapes[shapeIndex].m_numFaces; + float4 closestPnt = b3MakeVector3(0, 0, 0, 0); +// float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); + float minDist = -1000000.f; // TODO: What is the largest/smallest float? + bool bCollide = true; + int region = -1; + float4 localHitNormal; + for ( int f = 0; f < numFaces; f++ ) + { + b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; + float4 planeEqn; + float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); + planeEqn = n1; + planeEqn[3] = face.m_plane.w; + + float4 pntReturn; + float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); + + if ( dist > radius) + { + bCollide = false; + break; + } + + if ( dist > 0 ) + { + //might hit an edge or vertex + b3Vector3 out; + + bool isInPoly = IsPointInPolygon(spherePos, + &face, + &convexVertices[convexShapes[shapeIndex].m_vertexOffset], + convexIndices, + &out); + if (isInPoly) + { + if (dist>minDist) + { + minDist = dist; + closestPnt = pntReturn; + localHitNormal = planeEqn; + region=1; + } + } else + { + b3Vector3 tmp = spherePos-out; + b3Scalar l2 = tmp.length2(); + if (l2minDist) + { + minDist = dist; + closestPnt = out; + localHitNormal = tmp/dist; + region=2; + } + + } else + { + bCollide = false; + break; + } + } + } + else + { + if ( dist > minDist ) + { + minDist = dist; + closestPnt = pntReturn; + localHitNormal = planeEqn; + region=3; + } + } + } + static int numChecks = 0; + numChecks++; + + if (bCollide && minDist > -10000) + { + + float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld; + float4 pOnB1 = tr(closestPnt); + //printf("dist ,%f,",minDist); + float actualDepth = minDist-radius; + if (actualDepth<0) + { + //printf("actualDepth = ,%f,", actualDepth); + //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); + //printf("region=,%d,\n", region); + pOnB1[3] = actualDepth; + + int dstIdx; +// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx=nGlobalContactsOut; + nGlobalContactsOut++; + + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = normalOnSurfaceB1; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; + c->m_worldPosB[0] = pOnB1; + int numPoints = 1; + c->m_worldNormalOnB.w = (b3Scalar)numPoints; + }//if (dstIdx < numPairs) + } + }//if (hasCollision) + +} + + + + +int computeContactConvexConvex2( + int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3AlignedObjectArray& rigidBodies, + const b3AlignedObjectArray& collidables, + const b3AlignedObjectArray& convexShapes, + const b3AlignedObjectArray& convexVertices, + const b3AlignedObjectArray& uniqueEdges, + const b3AlignedObjectArray& convexIndices, + const b3AlignedObjectArray& faces, + b3AlignedObjectArray& globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity, + const b3AlignedObjectArray& oldContacts + ) +{ + int contactIndex = -1; + b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; + b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; + b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; + b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; + + + b3ConvexPolyhedronData hullA, hullB; + + b3Vector3 sepNormalWorldSpace; + + + + b3Collidable colA = collidables[collidableIndexA]; + hullA = convexShapes[colA.m_shapeIndex]; + //printf("numvertsA = %d\n",hullA.m_numVertices); + + + b3Collidable colB = collidables[collidableIndexB]; + hullB = convexShapes[colB.m_shapeIndex]; + //printf("numvertsB = %d\n",hullB.m_numVertices); + +// int contactCapacity = MAX_VERTS; + //int numContactsOut=0; + + +#ifdef _WIN32 + b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); + b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); +#endif + + bool foundSepAxis = findSeparatingAxis(hullA,hullB, + posA, + ornA, + posB, + ornB, + + convexVertices,uniqueEdges,faces,convexIndices, + convexVertices,uniqueEdges,faces,convexIndices, + + sepNormalWorldSpace + ); + + + if (foundSepAxis) + { + + + contactIndex = clipHullHullSingle( + bodyIndexA, bodyIndexB, + posA,ornA, + posB,ornB, + collidableIndexA, collidableIndexB, + &rigidBodies, + &globalContactsOut, + nGlobalContactsOut, + + convexShapes, + convexShapes, + + convexVertices, + uniqueEdges, + faces, + convexIndices, + + convexVertices, + uniqueEdges, + faces, + convexIndices, + + collidables, + collidables, + sepNormalWorldSpace, + maxContactCapacity); + + } + + return contactIndex; +} + + + + + + + +void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray* pairs, int nPairs, + const b3OpenCLArray* bodyBuf, + b3OpenCLArray* contactOut, int& nContacts, + const b3OpenCLArray* oldContacts, + int maxContactCapacity, + int compoundPairCapacity, + const b3OpenCLArray& convexData, + const b3OpenCLArray& gpuVertices, + const b3OpenCLArray& gpuUniqueEdges, + const b3OpenCLArray& gpuFaces, + const b3OpenCLArray& gpuIndices, + const b3OpenCLArray& gpuCollidables, + const b3OpenCLArray& gpuChildShapes, + + const b3OpenCLArray& clAabbsWorldSpace, + const b3OpenCLArray& clAabbsLocalSpace, + + b3OpenCLArray& worldVertsB1GPU, + b3OpenCLArray& clippingFacesOutGPU, + b3OpenCLArray& worldNormalsAGPU, + b3OpenCLArray& worldVertsA1GPU, + b3OpenCLArray& worldVertsB2GPU, + b3AlignedObjectArray& bvhDataUnused, + b3OpenCLArray* treeNodesGPU, + b3OpenCLArray* subTreesGPU, + b3OpenCLArray* bvhInfo, + + int numObjects, + int maxTriConvexPairCapacity, + b3OpenCLArray& triangleConvexPairsOut, + int& numTriConvexPairsOut + ) +{ + myframecount++; + + if (!nPairs) + return; + +#ifdef CHECK_ON_HOST + + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostAabbsLocalSpace; + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); + + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + b3AlignedObjectArray hostTriangleConvexPairs; + + b3AlignedObjectArray hostContacts; + if (nContacts) + { + contactOut->copyToHost(hostContacts); + } + + b3AlignedObjectArray oldHostContacts; + + if (oldContacts->size()) + { + oldContacts->copyToHost(oldHostContacts); + } + + + hostContacts.resize(maxContactCapacity); + + for (int i=0;i=0) + { +// printf("convex convex contactIndex = %d\n",contactIndex); + hostPairs[i].z = contactIndex; + } +// printf("plane-convex\n"); + + } + + + } + + if (hostPairs.size()) + { + pairs->copyFromHost(hostPairs); + } + + hostContacts.resize(nContacts); + if (nContacts) + { + + contactOut->copyFromHost(hostContacts); + } else + { + contactOut->resize(0); + } + + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + //printf("(HOST) nContacts = %d\n",nContacts); + +#else + + { + if (nPairs) + { + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + + B3_PROFILE("primitiveContactsKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst(maxContactCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + } + } + + +#endif//CHECK_ON_HOST + + B3_PROFILE("computeConvexConvexContactsGPUSAT"); + // printf("nContacts = %d\n",nContacts); + + + m_sepNormals.resize(nPairs); + m_hasSeparatingNormals.resize(nPairs); + + int concaveCapacity=maxTriConvexPairCapacity; + m_concaveSepNormals.resize(concaveCapacity); + m_concaveHasSeparatingNormals.resize(concaveCapacity); + m_numConcavePairsOut.resize(0); + m_numConcavePairsOut.push_back(0); + + + m_gpuCompoundPairs.resize(compoundPairCapacity); + + m_gpuCompoundSepNormals.resize(compoundPairCapacity); + + + m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); + + m_numCompoundPairsOut.resize(0); + m_numCompoundPairsOut.push_back(0); + + int numCompoundPairs = 0; + + int numConcavePairs =0; + + { + clFinish(m_queue); + if (findSeparatingAxisOnGpu) + { + m_dmins.resize(nPairs); + if (splitSearchSepAxisConvex) + { + + + if (useMprGpu) + { + nContacts = m_totalContactsOut.at(0); + { + B3_PROFILE("mprPenetrationKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_mprPenetrationKernel,"mprPenetrationKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + + launcher.setConst(maxContactCapacity); + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + /* + b3AlignedObjectArrayhostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + b3AlignedObjectArrayhostSepAxis; + m_sepNormals.copyToHost(hostSepAxis); + */ + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); + if (nContacts>maxContactCapacity) + { + + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + + } + } + + if (1) + { + + if (1) + { + { + B3_PROFILE("findSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + + { + B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()), + b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) + + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numDirections); + launcher.setConst( nPairs ); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } + } + if (useMprGpu) + { + B3_PROFILE("findSeparatingAxisUnitSphereKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + launcher.setConst( numDirections); + + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + } + + + } else + { + B3_PROFILE("findSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + } + else + { + + B3_PROFILE("findSeparatingAxisKernel CPU"); + + + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + b3AlignedObjectArray hostConvexShapeData; + convexData.copyToHost(hostConvexShapeData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray hostHasSepAxis; + hostHasSepAxis.resize(nPairs); + b3AlignedObjectArray hostSepAxis; + hostSepAxis.resize(nPairs); + + b3AlignedObjectArray hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + + b3AlignedObjectArray hostContacts; + if (nContacts) + { + contactOut->copyToHost(hostContacts); + } + hostContacts.resize(maxContactCapacity); + int nGlobalContactsOut = nContacts; + + + for (int i=0;i dist) + { + float diff = depth - dist; + + static float maxdiff = 0.f; + if (maxdiff < diff) + { + maxdiff = diff; + printf("maxdiff = %20.10f\n",maxdiff); + } + } + } + if (depth > dmin) + { + b3Vector3 oldAxis = hostSepAxis[i]; + depth = dmin; + sepAxis2 = oldAxis; + } + + + + if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) + { + if (depth > dist) + { + float diff = depth - dist; + //printf("?diff = %f\n",diff ); + static float maxdiff = 0.f; + if (maxdiff < diff) + { + maxdiff = diff; + printf("maxdiff = %20.10f\n",maxdiff); + } + } + //this is used for SAT + //hostHasSepAxis[i] = 1; + //hostSepAxis[i] = sepAxis2; + + //add contact point + + //int contactIndex = nGlobalContactsOut; + b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); + nGlobalContactsOut++; + newContact.m_batchIdx = 0;//i; + newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; + newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; + + newContact.m_frictionCoeffCmp = 45874; + newContact.m_restituitionCoeffCmp = 0; + + + static float maxDepth = 0.f; + + if (depth > maxDepth) + { + maxDepth = depth; + printf("MPR maxdepth = %f\n",maxDepth ); + + } + + + resultPointOnBWorld.w = -depth; + newContact.m_worldPosB[0] = resultPointOnBWorld; + //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; + newContact.m_worldNormalOnB = sepAxis2; + newContact.m_worldNormalOnB.w = (b3Scalar)1; + } else + { + printf("rejected\n"); + } + + + } + } else + { + + + + //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + b3AlignedObjectArray oldHostContacts; + int result; + result = computeContactConvexConvex2( //hostPairs, + pairIndex, + bodyIndexA, bodyIndexB, + collidableIndexA, collidableIndexB, + hostBodyBuf, + hostCollidables, + hostConvexShapeData, + hostVertices, + hostUniqueEdges, + hostIndices, + hostFaces, + hostContacts, + nGlobalContactsOut, + maxContactCapacity, + oldHostContacts + //hostHasSepAxis, + //hostSepAxis + + ); + }//mpr + }//hostHasSepAxis[i] = 1; + + } else + { + + b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; + b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); + b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; + b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); + b3Vector3 DeltaC2 = c0 - c1; + + b3Vector3 sepAxis; + + bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + + if (hasSepAxisA) + { + bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + if (hasSepAxisB) + { + bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin,true); + + if (hasEdgeEdge) + { + hostHasSepAxis[i] = 1; + hostSepAxis[i] = sepAxis; + } + } + } + } + } + + if (useGjkContacts)//nGlobalContactsOut>0) + { + //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); + nContacts = nGlobalContactsOut; + contactOut->copyFromHost(hostContacts); + + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + } + + m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); + m_sepNormals.copyFromHost(hostSepAxis); + + /* + //double-check results from GPU (comment-out the 'else' so both paths are executed + b3AlignedObjectArray checkHasSepAxis; + m_hasSeparatingNormals.copyToHost(checkHasSepAxis); + static int frameCount = 0; + frameCount++; + for (int i=0;igetBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), + b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), + b3BufferInfoCL(subTreesGPU->getBufferCL()), + b3BufferInfoCL(treeNodesGPU->getBufferCL()), + b3BufferInfoCL(bvhInfo->getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst( compoundPairCapacity); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + numCompoundPairs = m_numCompoundPairsOut.at(0); + //printf("numCompoundPairs =%d\n",numCompoundPairs ); + if (numCompoundPairs) + { + //printf("numCompoundPairs=%d\n",numCompoundPairs); + } + + + } else + { + + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostAabbsLocalSpace; + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); + + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + + b3AlignedObjectArray cpuCompoundPairsOut; + cpuCompoundPairsOut.resize(compoundPairCapacity); + + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + + + + for (int pairIndex=0;pairIndex compoundPairCapacity) + { + b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); + numCompoundPairs = compoundPairCapacity; + } + + + + m_gpuCompoundPairs.resize(numCompoundPairs); + m_gpuHasCompoundSepNormals.resize(numCompoundPairs); + m_gpuCompoundSepNormals.resize(numCompoundPairs); + + + if (numCompoundPairs) + { + B3_PROFILE("processCompoundPairsPrimitivesKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numCompoundPairs ); + launcher.setConst(maxContactCapacity); + + int num = numCompoundPairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); + if (nContacts>maxContactCapacity) + { + + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + } + + + if (numCompoundPairs) + { + B3_PROFILE("processCompoundPairsKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL()), + b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numCompoundPairs ); + + int num = numCompoundPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } + + + //printf("numConcave = %d\n",numConcave); + + + +// printf("hostNormals.size()=%d\n",hostNormals.size()); + //int numPairs = pairCount.at(0); + + + + } + int vertexFaceCapacity = 64; + + + + { + //now perform the tree query on GPU + + + + + if (treeNodesGPU->size() && treeNodesGPU->size()) + { + if (bvhTraversalKernelGPU) + { + + B3_PROFILE("m_bvhTraversalKernel"); + + + numConcavePairs = m_numConcavePairsOut.at(0); + + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); + launcher.setBuffer( pairs->getBufferCL()); + launcher.setBuffer( bodyBuf->getBufferCL()); + launcher.setBuffer( gpuCollidables.getBufferCL()); + launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); + launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); + launcher.setBuffer( subTreesGPU->getBufferCL()); + launcher.setBuffer( treeNodesGPU->getBufferCL()); + launcher.setBuffer( bvhInfo->getBufferCL()); + + launcher.setConst( nPairs ); + launcher.setConst( maxTriConvexPairCapacity); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + numConcavePairs = m_numConcavePairsOut.at(0); + } else + { + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + //int maxTriConvexPairCapacity, + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + + //int numTriConvexPairsOutHost=0; + numConcavePairs = 0; + //m_numConcavePairsOut + + b3AlignedObjectArray treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + b3AlignedObjectArray subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + b3AlignedObjectArray bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + //compute it... + + volatile int hostNumConcavePairsOut=0; + + // + for (int i=0;i maxTriConvexPairCapacity) + { + static int exceeded_maxTriConvexPairCapacity_count = 0; + b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", + numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); + numConcavePairs = maxTriConvexPairCapacity; + } + triangleConvexPairsOut.resize(numConcavePairs); + + if (numConcavePairs) + { + + + + + clippingFacesOutGPU.resize(numConcavePairs); + worldNormalsAGPU.resize(numConcavePairs); + worldVertsA1GPU.resize(vertexFaceCapacity*(numConcavePairs)); + worldVertsB1GPU.resize(vertexFaceCapacity*(numConcavePairs)); + + + if (findConcaveSeparatingAxisKernelGPU) + { + + /* + m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + */ + + //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) + if (splitSearchSepAxisConcave) + { + //printf("numConcavePairs = %d\n",numConcavePairs); + m_dmins.resize(numConcavePairs); + { + B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel,"m_findConcaveSeparatingAxisVertexFaceKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + + + } +// numConcavePairs = 0; + if (1) + { + B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel,"m_findConcaveSeparatingAxisEdgeEdgeKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + // numConcavePairs = 0; + + + + + + + } else + { + B3_PROFILE("findConcaveSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst( numConcavePairs ); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + } + + + } else + { + + b3AlignedObjectArray clippingFacesOutCPU; + b3AlignedObjectArray worldVertsA1CPU; + b3AlignedObjectArray worldNormalsACPU; + b3AlignedObjectArray worldVertsB1CPU; + b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; + + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + + + b3AlignedObjectArray concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); + + b3GpuChildShape* childShapePointerCPU = 0; + if (cpuChildShapes.size()) + childShapePointerCPU = &cpuChildShapes.at(0); + + clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + worldNormalsACPU.resize(worldNormalsAGPU.size()); + worldVertsB1CPU.resize(worldVertsB1GPU.size()); + + for (int i=0;i cpuCompoundSepNormals; +// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); +// b3AlignedObjectArray cpuConcavePairs; +// triangleConvexPairsOut.copyToHost(cpuConcavePairs); + + + } + } + + + } + + if (numConcavePairs) + { + if (numConcavePairs) + { + B3_PROFILE("findConcaveSphereContactsKernel"); + nContacts = m_totalContactsOut.at(0); +// printf("nContacts1 = %d\n",nContacts); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + + launcher.setConst( numConcavePairs ); + launcher.setConst(maxContactCapacity); + + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); + + //printf("nContacts2 = %d\n",nContacts); + + if (nContacts >= maxContactCapacity) + { + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + } + + } + + + +#ifdef __APPLE__ + bool contactClippingOnGpu = true; +#else + bool contactClippingOnGpu = true; +#endif + + if (contactClippingOnGpu) + { + m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); +// printf("nContacts3 = %d\n",nContacts); + + + //B3_PROFILE("clipHullHullKernel"); + + bool breakupConcaveConvexKernel = true; + +#ifdef __APPLE__ + //actually, some Apple OpenCL platform/device combinations work fine... + breakupConcaveConvexKernel = true; +#endif + //concave-convex contact clipping + if (numConcavePairs) + { + // printf("numConcavePairs = %d\n", numConcavePairs); + // nContacts = m_totalContactsOut.at(0); + // printf("nContacts before = %d\n", nContacts); + + if (breakupConcaveConvexKernel) + { + + worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); + + + //clipFacesAndFindContacts + + if (clipConcaveFacesAndFindContactsCPU) + { + + b3AlignedObjectArray clippingFacesOutCPU; + b3AlignedObjectArray worldVertsA1CPU; + b3AlignedObjectArray worldNormalsACPU; + b3AlignedObjectArray worldVertsB1CPU; + + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + worldVertsA1GPU.copyToHost(worldVertsA1CPU); + worldNormalsAGPU.copyToHost(worldNormalsACPU); + worldVertsB1GPU.copyToHost(worldVertsB1CPU); + + + + b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; + m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); + + b3AlignedObjectArray concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + + b3AlignedObjectArray worldVertsB2CPU; + worldVertsB2CPU.resize(worldVertsB2GPU.size()); + + + for (int i=0;ireserve(newContactCapacity); + if (reduceConcaveContactsOnGPU) + { +// printf("newReservation = %d\n",newReservation); + { + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(newContactCapacity); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + + //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); + }else + { + + volatile int nGlobalContactsOut = nContacts; + b3AlignedObjectArray triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + b3AlignedObjectArrayconcaveHasSeparatingNormalsCPU; + m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); + + b3AlignedObjectArray concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + + + b3AlignedObjectArray hostContacts; + if (nContacts) + { + contactOut->copyToHost(hostContacts); + } + hostContacts.resize(newContactCapacity); + + b3AlignedObjectArray clippingFacesOutCPU; + b3AlignedObjectArray worldVertsB2CPU; + + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + worldVertsB2GPU.copyToHost(worldVertsB2CPU); + + + + for (int i=0;iresize(nContacts); + hostContacts.resize(nContacts); + //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); + contactOut->copyFromHost(hostContacts); + } + + } + //re-use? + + + } else + { + B3_PROFILE("clipHullHullConcaveConvexKernel"); + nContacts = m_totalContactsOut.at(0); + int newContactCapacity = contactOut->capacity(); + + //printf("contactOut5 = %d\n",nContacts); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(newContactCapacity); + launcher.setConst( numConcavePairs ); + int num = numConcavePairs; + launcher.launch1D( num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + //printf("contactOut6 = %d\n",nContacts); + b3AlignedObjectArray cpuContacts; + contactOut->copyToHost(cpuContacts); + } + // printf("nContacts after = %d\n", nContacts); + }//numConcavePairs + + + + //convex-convex contact clipping + + bool breakupKernel = false; + +#ifdef __APPLE__ + breakupKernel = true; +#endif + +#ifdef CHECK_ON_HOST + bool computeConvexConvex = false; +#else + bool computeConvexConvex = true; +#endif//CHECK_ON_HOST + if (computeConvexConvex) + { + B3_PROFILE("clipHullHullKernel"); + if (breakupKernel) + { + + + + + worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); + clippingFacesOutGPU.resize(nPairs); + worldNormalsAGPU.resize(nPairs); + worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); + worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); + + if (findConvexClippingFacesGPU) + { + B3_PROFILE("findClippingFacesKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( vertexFaceCapacity); + launcher.setConst( nPairs ); + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + } else + { + + float minDist = -1e30f; + float maxDist = 0.02f; + + b3AlignedObjectArray hostConvexData; + convexData.copyToHost(hostConvexData); + b3AlignedObjectArray hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray hostHasSepNormals; + m_hasSeparatingNormals.copyToHost(hostHasSepNormals); + b3AlignedObjectArray cpuSepNormals; + m_sepNormals.copyToHost(cpuSepNormals); + + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + + //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); + b3AlignedObjectArray worldVertsB1CPU; + worldVertsB1GPU.copyToHost(worldVertsB1CPU); + + b3AlignedObjectArray clippingFacesOutCPU; + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + + b3AlignedObjectArray worldNormalsACPU; + worldNormalsACPU.resize(nPairs); + + b3AlignedObjectArray worldVertsA1CPU; + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + + + b3AlignedObjectArray hostVertices; + gpuVertices.copyToHost(hostVertices); + b3AlignedObjectArray hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray hostIndices; + gpuIndices.copyToHost(hostIndices); + + + for (int i=0;i hostPairs; + //pairs->copyToHost(hostPairs); + + b3AlignedObjectArray hostSepNormals; + m_sepNormals.copyToHost(hostSepNormals); + b3AlignedObjectArray hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + + b3AlignedObjectArray hostClippingFaces; + clippingFacesOutGPU.copyToHost(hostClippingFaces); + b3AlignedObjectArray worldVertsB2CPU; + worldVertsB2CPU.resize(vertexFaceCapacity*nPairs); + + b3AlignedObjectArrayworldVertsA1CPU; + worldVertsA1GPU.copyToHost(worldVertsA1CPU); + b3AlignedObjectArray worldNormalsACPU; + worldNormalsAGPU.copyToHost(worldNormalsACPU); + + b3AlignedObjectArray worldVertsB1CPU; + worldVertsB1GPU.copyToHost(worldVertsB1CPU); + + /* + __global const b3Float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global b3Int4* clippingFacesOut, + __global b3Float4* worldVertsA1, + __global b3Float4* worldNormalsA1, + __global b3Float4* worldVertsB1, + __global b3Float4* worldVertsB2, + int vertexFaceCapacity, + int pairIndex + */ + for (int i=0;ireserve(newContactCapacity); + + if (reduceConvexContactsOnGPU) + { + { + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(newContactCapacity); + launcher.setConst( nPairs ); + int num = nPairs; + + launcher.launch1D( num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); + } else + { + + volatile int nGlobalContactsOut = nContacts; + b3AlignedObjectArray hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray hostSepNormals; + m_sepNormals.copyToHost(hostSepNormals); + b3AlignedObjectArray hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + b3AlignedObjectArray hostContactsOut; + contactOut->copyToHost(hostContactsOut); + hostContactsOut.resize(newContactCapacity); + + b3AlignedObjectArray hostClippingFaces; + clippingFacesOutGPU.copyToHost(hostClippingFaces); + b3AlignedObjectArray worldVertsB2CPU; + worldVertsB2GPU.copyToHost(worldVertsB2CPU); + + for (int i=0;icopyFromHost(hostContactsOut); + } + // b3Contact4 pt = contactOut->at(0); + // printf("nContacts = %d\n",nContacts); + } + } + } + else//breakupKernel + { + + if (nPairs) + { + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( pairs->getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( m_sepNormals.getBufferCL()), + b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nPairs ); + launcher.setConst(maxContactCapacity); + + int num = nPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + if (nContacts >= maxContactCapacity) + { + b3Error("Exceeded contact capacity (%d/%d)\n",nContacts,maxContactCapacity); + nContacts = maxContactCapacity; + } + contactOut->resize(nContacts); + } + } + + + int nCompoundsPairs = m_gpuCompoundPairs.size(); + + if (nCompoundsPairs) + { + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), + b3BufferInfoCL( bodyBuf->getBufferCL(),true), + b3BufferInfoCL( gpuCollidables.getBufferCL(),true), + b3BufferInfoCL( convexData.getBufferCL(),true), + b3BufferInfoCL( gpuVertices.getBufferCL(),true), + b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), + b3BufferInfoCL( gpuFaces.getBufferCL(),true), + b3BufferInfoCL( gpuIndices.getBufferCL(),true), + b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), + b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL(),true), + b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL(),true), + b3BufferInfoCL( contactOut->getBufferCL()), + b3BufferInfoCL( m_totalContactsOut.getBufferCL()) + }; + b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nCompoundsPairs ); + launcher.setConst(maxContactCapacity); + + int num = nCompoundsPairs; + launcher.launch1D( num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + if (nContacts>maxContactCapacity) + { + + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + contactOut->resize(nContacts); + }//if nCompoundsPairs + } + }//contactClippingOnGpu + + //printf("nContacts end = %d\n",nContacts); + + //printf("frameCount = %d\n",frameCount++); +} -- cgit v1.2.3