diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp | 4985 |
1 files changed, 2330 insertions, 2655 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp index fb435aa7fd..54a104c5c8 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp @@ -16,19 +16,18 @@ subject to the following restrictions: bool findSeparatingAxisOnGpu = true; bool splitSearchSepAxisConcave = false; bool splitSearchSepAxisConvex = true; -bool useMprGpu = true;//use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... +bool useMprGpu = true; //use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... bool bvhTraversalKernelGPU = true; bool findConcaveSeparatingAxisKernelGPU = true; -bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; -bool clipConvexFacesAndFindContactsCPU = false;//false;//true; -bool reduceConcaveContactsOnGPU = true;//false; -bool reduceConvexContactsOnGPU = true;//false; +bool clipConcaveFacesAndFindContactsCPU = false; //false;//true; +bool clipConvexFacesAndFindContactsCPU = false; //false;//true; +bool reduceConcaveContactsOnGPU = true; //false; +bool reduceConvexContactsOnGPU = true; //false; bool findConvexClippingFacesGPU = true; -bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false -bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false +bool useGjk = false; ///option for CPU/host testing, when findSeparatingAxisOnGpu = false +bool useGjkContacts = false; //////option for CPU/host testing when findSeparatingAxisOnGpu = false - -static int myframecount=0;///for testing +static int myframecount = 0; ///for testing ///This file was written by Erwin Coumans ///Separating axis rest based on work from Pierre Terdiman, see @@ -42,10 +41,10 @@ static int myframecount=0;///for testing //#define PERSISTENT_CONTACTS_HOST #endif -int b3g_actualSATPairTests=0; +int b3g_actualSATPairTests = 0; #include "b3ConvexHullContact.h" -#include <string.h>//memcpy +#include <string.h> //memcpy #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" @@ -54,8 +53,7 @@ int b3g_actualSATPairTests=0; typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; - -#include <float.h> //for FLT_MAX +#include <float.h> //for FLT_MAX #include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" //#include "AdlQuaternion.h" @@ -69,7 +67,6 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; #include "kernels/bvhTraversal.h" #include "kernels/primitiveContacts.h" - #include "Bullet3Geometry/b3AabbUtil.h" #define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" @@ -77,12 +74,10 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; #define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" - #define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" #define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" #define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" - #ifndef __global #define __global #endif @@ -91,204 +86,184 @@ typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; #define __kernel #endif - #include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" #include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" - - #define dot3F4 b3Dot -GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) -:m_context(ctx), -m_device(device), -m_queue(q), +GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q) + : m_context(ctx), + m_device(device), + m_queue(q), -m_findSeparatingAxisKernel(0), -m_findSeparatingAxisVertexFaceKernel(0), -m_findSeparatingAxisEdgeEdgeKernel(0), -m_unitSphereDirections(m_context,m_queue), + m_findSeparatingAxisKernel(0), + m_findSeparatingAxisVertexFaceKernel(0), + m_findSeparatingAxisEdgeEdgeKernel(0), + m_unitSphereDirections(m_context, m_queue), -m_totalContactsOut(m_context, m_queue), -m_sepNormals(m_context, m_queue), -m_dmins(m_context,m_queue), + m_totalContactsOut(m_context, m_queue), + m_sepNormals(m_context, m_queue), + m_dmins(m_context, m_queue), -m_hasSeparatingNormals(m_context, m_queue), -m_concaveSepNormals(m_context, m_queue), -m_concaveHasSeparatingNormals(m_context,m_queue), -m_numConcavePairsOut(m_context, m_queue), + m_hasSeparatingNormals(m_context, m_queue), + m_concaveSepNormals(m_context, m_queue), + m_concaveHasSeparatingNormals(m_context, m_queue), + m_numConcavePairsOut(m_context, m_queue), + m_gpuCompoundPairs(m_context, m_queue), -m_gpuCompoundPairs(m_context, m_queue), + m_gpuCompoundSepNormals(m_context, m_queue), + m_gpuHasCompoundSepNormals(m_context, m_queue), - -m_gpuCompoundSepNormals(m_context, m_queue), -m_gpuHasCompoundSepNormals(m_context, m_queue), - -m_numCompoundPairsOut(m_context, m_queue) + m_numCompoundPairsOut(m_context, m_queue) { m_totalContactsOut.push_back(0); - - cl_int errNum=0; + + cl_int errNum = 0; if (1) { const char* mprSrc = mprKernelsCL; - + const char* srcConcave = satConcaveKernelsCL; - char flags[1024]={0}; -//#ifdef CL_PLATFORM_INTEL -// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); -//#endif - m_mprPenetrationKernel = 0; + char flags[1024] = {0}; + //#ifdef CL_PLATFORM_INTEL + // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); + //#endif + m_mprPenetrationKernel = 0; m_findSeparatingAxisUnitSphereKernel = 0; if (useMprGpu) { - cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); + cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH); + b3Assert(errNum == CL_SUCCESS); + + m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg); b3Assert(m_mprPenetrationKernel); - b3Assert(errNum==CL_SUCCESS); + b3Assert(errNum == CL_SUCCESS); - m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); + m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg); b3Assert(m_findSeparatingAxisUnitSphereKernel); - b3Assert(errNum==CL_SUCCESS); + b3Assert(errNum == CL_SUCCESS); - - int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); + int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); m_unitSphereDirections.resize(numDirections); - m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); - - + m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true); } + cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH); + b3Assert(errNum == CL_SUCCESS); - cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,satKernelsCL,&errNum,flags,BT_NARROWPHASE_SAT_PATH); - b3Assert(errNum==CL_SUCCESS); - - cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcConcave,&errNum,flags,BT_NARROWPHASE_SAT_CONCAVE_PATH); - b3Assert(errNum==CL_SUCCESS); + cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH); + b3Assert(errNum == CL_SUCCESS); - m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisKernel",&errNum,satProg ); + m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg); b3Assert(m_findSeparatingAxisKernel); - b3Assert(errNum==CL_SUCCESS); + b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisVertexFaceKernel",&errNum,satProg ); + m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg); b3Assert(m_findSeparatingAxisVertexFaceKernel); - m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisEdgeEdgeKernel",&errNum,satProg ); + m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg); b3Assert(m_findSeparatingAxisVertexFaceKernel); - - m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findConcaveSeparatingAxisKernel",&errNum,satProg ); + m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg); b3Assert(m_findConcaveSeparatingAxisKernel); - b3Assert(errNum==CL_SUCCESS); - - m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisVertexFaceKernel",&errNum,satConcaveProg ); + b3Assert(errNum == CL_SUCCESS); + + m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg); b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); - b3Assert(errNum==CL_SUCCESS); - - m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel",&errNum,satConcaveProg ); + b3Assert(errNum == CL_SUCCESS); + + m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg); b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); - b3Assert(errNum==CL_SUCCESS); - - - - - m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findCompoundPairsKernel",&errNum,satProg ); + b3Assert(errNum == CL_SUCCESS); + + m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg); b3Assert(m_findCompoundPairsKernel); - b3Assert(errNum==CL_SUCCESS); - m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "processCompoundPairsKernel",&errNum,satProg ); + b3Assert(errNum == CL_SUCCESS); + m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg); b3Assert(m_processCompoundPairsKernel); - b3Assert(errNum==CL_SUCCESS); + b3Assert(errNum == CL_SUCCESS); } if (1) { const char* srcClip = satClipKernelsCL; - char flags[1024]={0}; -//#ifdef CL_PLATFORM_INTEL -// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); -//#endif + char flags[1024] = {0}; + //#ifdef CL_PLATFORM_INTEL + // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); + //#endif - cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); - b3Assert(errNum==CL_SUCCESS); + cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH); + b3Assert(errNum == CL_SUCCESS); - m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); + m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg); + b3Assert(errNum == CL_SUCCESS); - m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - + m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg); + b3Assert(errNum == CL_SUCCESS); - m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); + m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg); + b3Assert(errNum == CL_SUCCESS); - m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndFindContactsKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); + m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg); + b3Assert(errNum == CL_SUCCESS); - m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); + m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg); + b3Assert(errNum == CL_SUCCESS); -// m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); - // b3Assert(errNum==CL_SUCCESS); + // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); + // b3Assert(errNum==CL_SUCCESS); - m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, - "newContactReductionKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); + m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, + "newContactReductionKernel", &errNum, satClipContactsProg); + b3Assert(errNum == CL_SUCCESS); } - else + else { - m_clipHullHullKernel=0; + m_clipHullHullKernel = 0; m_clipCompoundsHullHullKernel = 0; - m_findClippingFacesKernel = 0; - m_newContactReductionKernel=0; - m_clipFacesAndFindContacts = 0; + m_findClippingFacesKernel = 0; + m_newContactReductionKernel = 0; + m_clipFacesAndFindContacts = 0; m_clipHullHullConcaveConvexKernel = 0; -// m_extractManifoldAndAddContactKernel = 0; + // m_extractManifoldAndAddContactKernel = 0; } - if (1) + if (1) { const char* srcBvh = bvhTraversalKernelCL; - cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); - b3Assert(errNum==CL_SUCCESS); + cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH); + b3Assert(errNum == CL_SUCCESS); + m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, ""); + b3Assert(errNum == CL_SUCCESS); } - - { - const char* primitiveContactsSrc = primitiveContactsKernelsCL; - cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); - b3Assert(errNum==CL_SUCCESS); - m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); - b3Assert(errNum==CL_SUCCESS); + { + const char* primitiveContactsSrc = primitiveContactsKernelsCL; + cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); + b3Assert(errNum == CL_SUCCESS); + + m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, ""); + b3Assert(errNum == CL_SUCCESS); - m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg ); - b3Assert(errNum==CL_SUCCESS); + m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg); + b3Assert(errNum == CL_SUCCESS); b3Assert(m_findConcaveSphereContactsKernel); - m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,""); - b3Assert(errNum==CL_SUCCESS); + m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, ""); + b3Assert(errNum == CL_SUCCESS); b3Assert(m_processCompoundPairsPrimitivesKernel); - - } - - + } } GpuSatCollision::~GpuSatCollision() { - if (m_findSeparatingAxisVertexFaceKernel) clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); @@ -301,17 +276,15 @@ GpuSatCollision::~GpuSatCollision() if (m_mprPenetrationKernel) clReleaseKernel(m_mprPenetrationKernel); - if (m_findSeparatingAxisKernel) clReleaseKernel(m_findSeparatingAxisKernel); - if (m_findConcaveSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); + if (m_findConcaveSeparatingAxisVertexFaceKernel) + clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); + + if (m_findConcaveSeparatingAxisEdgeEdgeKernel) + clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); - - if (m_findConcaveSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); - if (m_findConcaveSeparatingAxisKernel) clReleaseKernel(m_findConcaveSeparatingAxisKernel); @@ -320,17 +293,17 @@ GpuSatCollision::~GpuSatCollision() if (m_processCompoundPairsKernel) clReleaseKernel(m_processCompoundPairsKernel); - - if (m_findClippingFacesKernel) - clReleaseKernel(m_findClippingFacesKernel); - - if (m_clipFacesAndFindContacts) - clReleaseKernel(m_clipFacesAndFindContacts); - if (m_newContactReductionKernel) - clReleaseKernel(m_newContactReductionKernel); + + if (m_findClippingFacesKernel) + clReleaseKernel(m_findClippingFacesKernel); + + if (m_clipFacesAndFindContacts) + clReleaseKernel(m_clipFacesAndFindContacts); + if (m_newContactReductionKernel) + clReleaseKernel(m_newContactReductionKernel); if (m_primitiveContactsKernel) clReleaseKernel(m_primitiveContactsKernel); - + if (m_findConcaveSphereContactsKernel) clReleaseKernel(m_findConcaveSphereContactsKernel); @@ -344,12 +317,11 @@ GpuSatCollision::~GpuSatCollision() if (m_clipHullHullConcaveConvexKernel) clReleaseKernel(m_clipHullHullConcaveConvexKernel); -// if (m_extractManifoldAndAddContactKernel) + // if (m_extractManifoldAndAddContactKernel) // clReleaseKernel(m_extractManifoldAndAddContactKernel); if (m_bvhTraversalKernel) clReleaseKernel(m_bvhTraversalKernel); - } struct MyTriangleCallback : public b3NodeOverlapCallback @@ -359,14 +331,13 @@ struct MyTriangleCallback : public b3NodeOverlapCallback virtual void processNode(int subPart, int triangleIndex) { - printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); + printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB); printf("triangleIndex %d\n", triangleIndex); } }; - #define float4 b3Vector3 -#define make_float4(x,y,z,w) b3MakeVector3(x,y,z,w) +#define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w) float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) { @@ -377,9 +348,7 @@ float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn return dist; } - - -#define cross3(a,b) (a.cross(b)) +#define cross3(a, b) (a.cross(b)) b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) { b3Transform tr; @@ -390,184 +359,170 @@ b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion return res; } - -inline bool IsPointInPolygon(const float4& p, - const b3GpuFace* face, +inline bool IsPointInPolygon(const float4& p, + const b3GpuFace* face, const float4* baseVertex, - const int* convexIndices, - float4* out) + const int* convexIndices, + float4* out) { - float4 a; - float4 b; - float4 ab; - float4 ap; - float4 v; + float4 a; + float4 b; + float4 ab; + float4 ap; + float4 v; - float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); - - if (face->m_numIndices<2) + float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f); + + if (face->m_numIndices < 2) return false; - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; + float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]]; b = v0; - for(unsigned i=0; i != face->m_numIndices; ++i) - { + for (unsigned i = 0; i != face->m_numIndices; ++i) + { a = b; float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; b = vi; - ab = b-a; - ap = p-a; - v = cross3(ab,plane); - - if (b3Dot(ap, v) > 0.f) - { - float ab_m2 = b3Dot(ab, ab); - float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; + ab = b - a; + ap = p - a; + v = cross3(ab, plane); + + if (b3Dot(ap, v) > 0.f) + { + float ab_m2 = b3Dot(ab, ab); + float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; + if (rt <= 0.f) + { + *out = a; + } + else if (rt >= 1.f) + { + *out = b; + } + else + { + float s = 1.f - rt; out[0].x = s * a.x + rt * b.x; out[0].y = s * a.y + rt * b.y; out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; + } + return false; + } + } + return true; } #define normalize3(a) (a.normalize()) - -int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) { - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = b3MakeVector3(0,0,0,0); + if (nPoints == 0) + return 0; + + if (nPoints <= 4) + return nPoints; + + if (nPoints > 64) + nPoints = 64; + + float4 center = b3MakeVector3(0, 0, 0, 0); { - - for (int i=0;i<nPoints;i++) + for (int i = 0; i < nPoints; i++) center += p[i]; center /= (float)nPoints; } - - - + // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} + float4 aVector = p[0] - center; + float4 u = cross3(nearNormal, aVector); + float4 v = cross3(nearNormal, u); + u = normalize3(u); + v = normalize3(v); + //keep point with deepest penetration + float minW = FLT_MAX; -#define MAX_VERTS 1024 + int minIndex = -1; + + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for (int ie = 0; ie < nPoints; ie++) + { + if (p[ie].w < minW) + { + minW = p[ie].w; + minIndex = ie; + } + float f; + float4 r = p[ie] - center; + f = dot3F4(u, r); + if (f < maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4(-u, r); + if (f < maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + f = dot3F4(v, r); + if (f < maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4(-v, r); + if (f < maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; +} +#define MAX_VERTS 1024 -inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max) +inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max) { min = FLT_MAX; max = -FLT_MAX; int numVerts = hull.m_numVertices; - const float4 localDir = b3QuatRotate(orn.inverse(),dir); + const float4 localDir = b3QuatRotate(orn.inverse(), dir); - b3Scalar offset = dot3F4(pos,dir); + b3Scalar offset = dot3F4(pos, dir); - for(int i=0;i<numVerts;i++) + for (int i = 0; i < numVerts; i++) { //b3Vector3 pt = trans * vertices[m_vertexOffset+i]; //b3Scalar dp = pt.dot(dir); //b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; - b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset+i],localDir); + b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir); //b3Assert(dp==dpL); - if(dp < min) min = dp; - if(dp > max) max = dp; + if (dp < min) min = dp; + if (dp > max) max = dp; } - if(min>max) + if (min > max) { b3Scalar tmp = min; min = max; @@ -577,50 +532,48 @@ inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, cons max += offset; } - -static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA,const b3Quaternion& ornA, - const float4& posB,const b3Quaternion& ornB, - const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth) +static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA, const b3Quaternion& ornA, + const float4& posB, const b3Quaternion& ornB, + const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth) { - b3Scalar Min0,Max0; - b3Scalar Min1,Max1; - project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0); - project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1); + b3Scalar Min0, Max0; + b3Scalar Min1, Max1; + project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0); + project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1); - if(Max0<Min1 || Max1<Min0) + if (Max0 < Min1 || Max1 < Min0) return false; b3Scalar d0 = Max0 - Min1; - assert(d0>=0.0f); + assert(d0 >= 0.0f); b3Scalar d1 = Max1 - Min0; - assert(d1>=0.0f); - depth = d0<d1 ? d0:d1; + assert(d1 >= 0.0f); + depth = d0 < d1 ? d0 : d1; return true; } inline bool IsAlmostZero(const b3Vector3& v) { - if(fabsf(v.x)>1e-6 || fabsf(v.y)>1e-6 || fabsf(v.z)>1e-6) return false; + if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false; return true; } - -static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA1, - const b3Quaternion& ornA, - const float4& posB1, - const b3Quaternion& ornB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, - const b3AlignedObjectArray<b3GpuFace>& facesA, - const b3AlignedObjectArray<int>& indicesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, - const b3AlignedObjectArray<b3GpuFace>& facesB, - const b3AlignedObjectArray<int>& indicesB, - - b3Vector3& sep) +static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA1, + const b3Quaternion& ornA, + const float4& posB1, + const b3Quaternion& ornB, + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, + const b3AlignedObjectArray<b3GpuFace>& facesA, + const b3AlignedObjectArray<int>& indicesA, + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, + const b3AlignedObjectArray<b3GpuFace>& facesB, + const b3AlignedObjectArray<int>& indicesB, + + b3Vector3& sep) { B3_PROFILE("findSeparatingAxis"); @@ -629,41 +582,40 @@ static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3Con posA.w = 0.f; float4 posB = posB1; posB.w = 0.f; -//#ifdef TEST_INTERNAL_OBJECTS + //#ifdef TEST_INTERNAL_OBJECTS float4 c0local = (float4&)hullA.m_localCenter; float4 c0 = transform(&c0local, &posA, &ornA); float4 c1local = (float4&)hullB.m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); + float4 c1 = transform(&c1local, &posB, &ornB); const float4 deltaC2 = c0 - c1; -//#endif + //#endif b3Scalar dmin = FLT_MAX; - int curPlaneTests=0; + int curPlaneTests = 0; int numFacesA = hullA.m_numFaces; // Test normals from hullA - for(int i=0;i<numFacesA;i++) + for (int i = 0; i < numFacesA; i++) { - const float4& normal = (float4&)facesA[hullA.m_faceOffset+i].m_plane; - float4 faceANormalWS = b3QuatRotate(ornA,normal); + const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane; + float4 faceANormalWS = b3QuatRotate(ornA, normal); - if (dot3F4(deltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; + if (dot3F4(deltaC2, faceANormalWS) < 0) + faceANormalWS *= -1.f; curPlaneTests++; #ifdef TEST_INTERNAL_OBJECTS gExpectedNbTests++; - if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) + if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) continue; gActualNbTests++; #endif - b3Scalar d; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,faceANormalWS, verticesA, verticesB,d)) + if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d)) return false; - if(d<dmin) + if (d < dmin) { dmin = d; sep = (b3Vector3&)faceANormalWS; @@ -672,28 +624,28 @@ static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3Con int numFacesB = hullB.m_numFaces; // Test normals from hullB - for(int i=0;i<numFacesB;i++) + for (int i = 0; i < numFacesB; i++) { - float4 normal = (float4&)facesB[hullB.m_faceOffset+i].m_plane; + float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane; float4 WorldNormal = b3QuatRotate(ornB, normal); - if (dot3F4(deltaC2,WorldNormal)<0) + if (dot3F4(deltaC2, WorldNormal) < 0) { - WorldNormal*=-1.f; + WorldNormal *= -1.f; } curPlaneTests++; #ifdef TEST_INTERNAL_OBJECTS gExpectedNbTests++; - if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin)) + if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin)) continue; gActualNbTests++; #endif b3Scalar d; - if(!TestSepAxis(hullA, hullB,posA,ornA,posB,ornB,WorldNormal,verticesA,verticesB,d)) + if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d)) return false; - if(d<dmin) + if (d < dmin) { dmin = d; sep = (b3Vector3&)WorldNormal; @@ -702,70 +654,65 @@ static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3Con int curEdgeEdge = 0; // Test edges - for(int e0=0;e0<hullA.m_numUniqueEdges;e0++) + for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++) { - const float4& edge0 = (float4&) uniqueEdgesA[hullA.m_uniqueEdgesOffset+e0]; - float4 edge0World = b3QuatRotate(ornA,(float4&)edge0); + const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0]; + float4 edge0World = b3QuatRotate(ornA, (float4&)edge0); - for(int e1=0;e1<hullB.m_numUniqueEdges;e1++) + for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++) { - const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset+e1]; - float4 edge1World = b3QuatRotate(ornB,(float4&)edge1); - + const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1]; + float4 edge1World = b3QuatRotate(ornB, (float4&)edge1); - float4 crossje = cross3(edge0World,edge1World); + float4 crossje = cross3(edge0World, edge1World); curEdgeEdge++; - if(!IsAlmostZero((b3Vector3&)crossje)) + if (!IsAlmostZero((b3Vector3&)crossje)) { crossje = normalize3(crossje); - if (dot3F4(deltaC2,crossje)<0) - crossje*=-1.f; - + if (dot3F4(deltaC2, crossje) < 0) + crossje *= -1.f; #ifdef TEST_INTERNAL_OBJECTS gExpectedNbTests++; - if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin)) + if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin)) continue; gActualNbTests++; #endif b3Scalar dist; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,crossje, verticesA,verticesB,dist)) + if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist)) return false; - if(dist<dmin) + if (dist < dmin) { dmin = dist; sep = (b3Vector3&)crossje; } } } - } - - if((dot3F4(-deltaC2,(float4&)sep))>0.0f) + if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f) sep = -sep; return true; } - -bool findSeparatingAxisEdgeEdge( __global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - const b3Float4& posA1, - const b3Quat& ornA, - const b3Float4& posB1, - const b3Quat& ornB, - const b3Float4& DeltaC2, - __global const b3AlignedObjectArray<float4>& vertices, - __global const b3AlignedObjectArray<float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - float4* sep, - float* dmin) +bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, + const b3Float4& posA1, + const b3Quat& ornA, + const b3Float4& posB1, + const b3Quat& ornB, + const b3Float4& DeltaC2, + __global const b3AlignedObjectArray<float4>& vertices, + __global const b3AlignedObjectArray<float4>& uniqueEdges, + __global const b3AlignedObjectArray<b3GpuFace>& faces, + __global const b3AlignedObjectArray<int>& indices, + float4* sep, + float* dmin) { -// int i = get_global_id(0); + // int i = get_global_id(0); float4 posA = posA1; posA.w = 0.f; @@ -776,97 +723,89 @@ bool findSeparatingAxisEdgeEdge( __global const b3ConvexPolyhedronData* hullA, _ int curEdgeEdge = 0; // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) + for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++) { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = b3QuatRotate(ornA,edge0); + const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0]; + float4 edge0World = b3QuatRotate(ornA, edge0); - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) + for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++) { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = b3QuatRotate(ornB,edge1); + const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1]; + float4 edge1World = b3QuatRotate(ornB, edge1); - - float4 crossje = cross3(edge0World,edge1World); + float4 crossje = cross3(edge0World, edge1World); curEdgeEdge++; - if(!IsAlmostZero(crossje)) + if (!IsAlmostZero(crossje)) { crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje*=-1.f; - + if (dot3F4(DeltaC2, crossje) < 0) + crossje *= -1.f; + float dist; bool result = true; { - float Min0,Max0; - float Min1,Max1; - project(*hullA,posA,ornA,crossje,vertices, Min0, Max0); - project(*hullB,posB,ornB,crossje,vertices, Min1, Max1); - - if(Max0<Min1 || Max1<Min0) + float Min0, Max0; + float Min1, Max1; + project(*hullA, posA, ornA, crossje, vertices, Min0, Max0); + project(*hullB, posB, ornB, crossje, vertices, Min1, Max1); + + if (Max0 < Min1 || Max1 < Min0) result = false; - + float d0 = Max0 - Min1; float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; + dist = d0 < d1 ? d0 : d1; result = true; - } - - if(dist<*dmin) + if (dist < *dmin) { *dmin = dist; *sep = crossje; } } } - } - - if((dot3F4(-DeltaC2,*sep))>0.0f) + if ((dot3F4(-DeltaC2, *sep)) > 0.0f) { *sep = -(*sep); } return true; } - -__inline float4 lerp3(const float4& a,const float4& b, float t) +__inline float4 lerp3(const float4& a, const float4& b, float t) { - return b3MakeVector3( a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); + return b3MakeVector3(a.x + (b.x - a.x) * t, + a.y + (b.y - a.y) * t, + a.z + (b.z - a.z) * t, + 0.f); } - // Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float planeEqWS, float4* ppVtxOut) +int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut) { - int ve; float ds, de; int numVertsOut = 0; if (numVertsIn < 2) return 0; - float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 firstVertex = pVtxIn[numVertsIn - 1]; float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS; for (ve = 0; ve < numVertsIn; ve++) { - endVertex=pVtxIn[ve]; + endVertex = pVtxIn[ve]; - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + de = dot3F4(planeNormalWS, endVertex) + planeEqWS; - if (ds<0) + if (ds < 0) { - if (de<0) + if (de < 0) { // Start < 0, end < 0, so output endVertex ppVtxOut[numVertsOut++] = endVertex; @@ -874,15 +813,15 @@ int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float p else { // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); } } else { - if (de<0) + if (de < 0) { // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); ppVtxOut[numVertsOut++] = endVertex; } } @@ -892,36 +831,35 @@ int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float p return numVertsOut; } - -int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, - const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, - //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, - float4* contactsOut, - int contactCapacity) +int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, + const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, + //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, + float4* contactsOut, + int contactCapacity) { int numContactsOut = 0; float4* pVtxIn = worldVertsB1; float4* pVtxOut = worldVertsB2; - + int numVertsIn = numWorldVertsB1; int numVertsOut = 0; - int closestFaceA=-1; + int closestFaceA = -1; { float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) + for (int face = 0; face < hullA->m_numFaces; face++) { const float4 Normal = b3MakeVector3( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = b3QuatRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); + facesA[hullA->m_faceOffset + face].m_plane.x, + facesA[hullA->m_faceOffset + face].m_plane.y, + facesA[hullA->m_faceOffset + face].m_plane.z, 0.f); + const float4 faceANormalWS = b3QuatRotate(ornA, Normal); + + float d = dot3F4(faceANormalWS, separatingNormal); if (d < dmin) { dmin = d; @@ -929,33 +867,33 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron } } } - if (closestFaceA<0) + if (closestFaceA < 0) return numContactsOut; - b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; + b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA]; // clip polygon to back of planes of all faces of hull A that are adjacent to witness face -// int numContacts = numWorldVertsB1; + // int numContacts = numWorldVertsB1; int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) + for (int e0 = 0; e0 < numVerticesA; e0++) { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; - const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]]; + const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]]; const float4 edge0 = a - b; - const float4 WorldEdge0 = b3QuatRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA); + const float4 WorldEdge0 = b3QuatRotate(ornA, edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); + float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1); + float4 worldA1 = transform(&a, &posA, &ornA); + float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1); - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - + float planeEqWS = planeEqWS1; + //clip face //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut); //btSwap(pVtxIn,pVtxOut); float4* tmp = pVtxOut; @@ -965,32 +903,32 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron numVertsOut = 0; } - // only keep points that are behind the witness face { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;i<numVertsIn;i++) + float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal); + float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA); + for (int i = 0; i < numVertsIn; i++) { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) + float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS; + if (depth <= minDist) { depth = minDist; } - if (numContactsOut<contactCapacity) + if (numContactsOut < contactCapacity) { - if (depth <=maxDist) + if (depth <= maxDist) { float4 pointInWorld = pVtxIn[i]; //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth); //printf("depth=%f\n",depth); } - } else + } + else { - b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut,contactCapacity); + b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity); } } } @@ -998,62 +936,60 @@ int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedron return numContactsOut; } +static int clipHullAgainstHull(const float4& separatingNormal, + const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, + const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, + const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB, - -static int clipHullAgainstHull(const float4& separatingNormal, - const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA, const b3Quaternion& ornA,const float4& posB, const b3Quaternion& ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, - const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB, - - float4* contactsOut, - int contactCapacity) + float4* contactsOut, + int contactCapacity) { int numContactsOut = 0; - int numWorldVertsB1= 0; - + int numWorldVertsB1 = 0; + B3_PROFILE("clipHullAgainstHull"); -// float curMaxDist=maxDist; - int closestFaceB=-1; + // float curMaxDist=maxDist; + int closestFaceB = -1; float dmax = -FLT_MAX; { //B3_PROFILE("closestFaceB"); - if (hullB.m_numFaces!=1) + if (hullB.m_numFaces != 1) { //printf("wtf\n"); } static bool once = true; //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); - - for(int face=0;face<hullB.m_numFaces;face++) + + for (int face = 0; face < hullB.m_numFaces; face++) { #ifdef BT_DEBUG_SAT_FACE if (once) - printf("face %d\n",face); - const b3GpuFace* faceB = &facesB[hullB.m_faceOffset+face]; + printf("face %d\n", face); + const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face]; if (once) { - for (int i=0;i<faceB->m_numIndices;i++) + for (int i = 0; i < faceB->m_numIndices; i++) { - float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]]; - printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z); + float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]]; + printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z); } } -#endif //BT_DEBUG_SAT_FACE - //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) +#endif //BT_DEBUG_SAT_FACE \ + //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) { - const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, - facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); + const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x, + facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f); const float4 WorldNormal = b3QuatRotate(ornB, Normal); #ifdef BT_DEBUG_SAT_FACE if (once) - printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z); + printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z); #endif - float d = dot3F4(WorldNormal,separatingNormal); + float d = dot3F4(WorldNormal, separatingNormal); if (d > dmax) { dmax = d; @@ -1064,184 +1000,176 @@ static int clipHullAgainstHull(const float4& separatingNormal, once = false; } - - b3Assert(closestFaceB>=0); + b3Assert(closestFaceB >= 0); { //B3_PROFILE("worldVertsB1"); - const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB]; + const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB]; const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) + for (int e0 = 0; e0 < numVertices; e0++) { - const float4& b = verticesB[hullB.m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB); } } - if (closestFaceB>=0) + if (closestFaceB >= 0) { //B3_PROFILE("clipFaceAgainstHull"); - numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, - verticesA, facesA, indicesA, - contactsOut,contactCapacity); + numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, + posA, ornA, + worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist, + verticesA, facesA, indicesA, + contactsOut, contactCapacity); } return numContactsOut; } +#define PARALLEL_SUM(v, n) \ + for (int j = 1; j < n; j++) v[0] += v[j]; +#define PARALLEL_DO(execution, n) \ + for (int ie = 0; ie < n; ie++) \ + { \ + execution; \ + } +#define REDUCE_MAX(v, n) \ + { \ + int i = 0; \ + for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \ + } +#define REDUCE_MIN(v, n) \ + { \ + int i = 0; \ + for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \ + } +int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) +{ + if (nPoints == 0) + return 0; + if (nPoints <= 4) + return nPoints; + if (nPoints > 64) + nPoints = 64; - -#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} -#define REDUCE_MAX(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } -#define REDUCE_MIN(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } - -int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0,0,0,0); + float4 center = make_float4(0, 0, 0, 0); { - - for (int i=0;i<nPoints;i++) + for (int i = 0; i < nPoints; i++) center += p[i]; center /= (float)nPoints; } - - - + // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} + float4 aVector = p[0] - center; + float4 u = cross3(nearNormal, aVector); + float4 v = cross3(nearNormal, u); + u = normalize3(u); + v = normalize3(v); + + //keep point with deepest penetration + float minW = FLT_MAX; + int minIndex = -1; + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for (int ie = 0; ie < nPoints; ie++) + { + if (p[ie].w < minW) + { + minW = p[ie].w; + minIndex = ie; + } + float f; + float4 r = p[ie] - center; + f = dot3F4(u, r); + if (f < maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4(-u, r); + if (f < maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + f = dot3F4(v, r); + if (f < maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4(-v, r); + if (f < maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; +} int clipHullHullSingle( - int bodyIndexA, int bodyIndexB, - const float4& posA, - const b3Quaternion& ornA, - const float4& posB, - const b3Quaternion& ornB, + int bodyIndexA, int bodyIndexB, + const float4& posA, + const b3Quaternion& ornA, + const float4& posB, + const b3Quaternion& ornB, - int collidableIndexA, int collidableIndexB, + int collidableIndexA, int collidableIndexB, - const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, - b3AlignedObjectArray<b3Contact4>* globalContactOut, - int& nContacts, - - const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, - const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, - - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, - const b3AlignedObjectArray<b3GpuFace>& facesA, - const b3AlignedObjectArray<int>& indicesA, - - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, - const b3AlignedObjectArray<b3GpuFace>& facesB, - const b3AlignedObjectArray<int>& indicesB, - - const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, - const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, - const b3Vector3& sepNormalWorldSpace, - int maxContactCapacity ) + const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, + b3AlignedObjectArray<b3Contact4>* globalContactOut, + int& nContacts, + + const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, + const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, + + const b3AlignedObjectArray<b3Vector3>& verticesA, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, + const b3AlignedObjectArray<b3GpuFace>& facesA, + const b3AlignedObjectArray<int>& indicesA, + + const b3AlignedObjectArray<b3Vector3>& verticesB, + const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, + const b3AlignedObjectArray<b3GpuFace>& facesB, + const b3AlignedObjectArray<int>& indicesB, + + const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, + const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, + const b3Vector3& sepNormalWorldSpace, + int maxContactCapacity) { int contactIndex = -1; b3ConvexPolyhedronData hullA, hullB; - - b3Collidable colA = hostCollidablesA[collidableIndexA]; - hullA = hostConvexDataA[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - - b3Collidable colB = hostCollidablesB[collidableIndexB]; - hullB = hostConvexDataB[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - + + b3Collidable colA = hostCollidablesA[collidableIndexA]; + hullA = hostConvexDataA[colA.m_shapeIndex]; + //printf("numvertsA = %d\n",hullA.m_numVertices); + + b3Collidable colB = hostCollidablesB[collidableIndexB]; + hullB = hostConvexDataB[colB.m_shapeIndex]; + //printf("numvertsB = %d\n",hullB.m_numVertices); + float4 contactsOut[MAX_VERTS]; int localContactCapacity = MAX_VERTS; @@ -1249,133 +1177,125 @@ int clipHullHullSingle( b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); #endif - - + { - float4 worldVertsB1[MAX_VERTS]; float4 worldVertsB2[MAX_VERTS]; int capacityWorldVerts = MAX_VERTS; - float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); + float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f); int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; b3Scalar minDist = -1; b3Scalar maxDist = 0.; - - - b3Transform trA,trB; + b3Transform trA, trB; { - //B3_PROFILE("transform computation"); - //trA.setIdentity(); - trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); - trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); - - //trB.setIdentity(); - trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); - trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); + //B3_PROFILE("transform computation"); + //trA.setIdentity(); + trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z)); + trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w)); + + //trB.setIdentity(); + trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z)); + trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w)); } b3Quaternion trAorn = trA.getRotation(); - b3Quaternion trBorn = trB.getRotation(); - - int numContactsOut = clipHullAgainstHull(hostNormal, - hostConvexDataA.at(shapeA), - hostConvexDataB.at(shapeB), - (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, - (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - verticesA, facesA,indicesA, - verticesB, facesB,indicesB, - - contactsOut,localContactCapacity); - - if (numContactsOut>0) + b3Quaternion trBorn = trB.getRotation(); + + int numContactsOut = clipHullAgainstHull(hostNormal, + hostConvexDataA.at(shapeA), + hostConvexDataB.at(shapeB), + (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, + (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, + worldVertsB1, worldVertsB2, capacityWorldVerts, + minDist, maxDist, + verticesA, facesA, indicesA, + verticesB, facesB, indicesB, + + contactsOut, localContactCapacity); + + if (numContactsOut > 0) { B3_PROFILE("overlap"); float4 normalOnSurfaceB = (float4&)hostNormal; - + b3Int4 contactIdx; contactIdx.x = 0; contactIdx.y = 1; contactIdx.z = 2; contactIdx.w = 3; - + int numPoints = 0; - + { - // B3_PROFILE("extractManifold"); - numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); + // B3_PROFILE("extractManifold"); + numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); } - + b3Assert(numPoints); - - if (nContacts<maxContactCapacity) + + if (nContacts < maxContactCapacity) { contactIndex = nContacts; globalContactOut->expand(); b3Contact4& contact = globalContactOut->at(nContacts); - contact.m_batchIdx = 0;//i; - contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; - contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; + contact.m_batchIdx = 0; //i; + contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; + contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; contact.m_frictionCoeffCmp = 45874; contact.m_restituitionCoeffCmp = 0; - - // float distance = 0.f; - for (int p=0;p<numPoints;p++) + + // float distance = 0.f; + for (int p = 0; p < numPoints; p++) { - contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];//check if it is actually on B - contact.m_worldNormalOnB = normalOnSurfaceB; + contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]]; //check if it is actually on B + contact.m_worldNormalOnB = normalOnSurfaceB; } //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); contact.m_worldNormalOnB.w = (b3Scalar)numPoints; nContacts++; - } else + } + else { - b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts,maxContactCapacity); + b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity); } } } return contactIndex; } - - - - void computeContactPlaneConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) { - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; + int shapeIndex = collidables[collidableIndexB].m_shapeIndex; const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex]; - + b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; -// int numContactsOut = 0; -// int numWorldVertsB1= 0; + // int numContactsOut = 0; + // int numWorldVertsB1= 0; b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); + b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); + b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal); float planeConstant = planeEq.w; b3Transform convexWorldTransform; convexWorldTransform.setIdentity(); @@ -1387,13 +1307,13 @@ void computeContactPlaneConvex(int pairIndex, planeTransform.setRotation(ornA); b3Transform planeInConvex; - planeInConvex= convexWorldTransform.inverse() * planeTransform; + planeInConvex = convexWorldTransform.inverse() * planeTransform; b3Transform convexInPlane; convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; + + b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal; float maxDot = -1e30; - int hitVertex=-1; + int hitVertex = -1; b3Vector3 hitVtx; #define MAX_PLANE_CONVEX_POINTS 64 @@ -1406,54 +1326,52 @@ void computeContactPlaneConvex(int pairIndex, contactIdx.s[1] = 1; contactIdx.s[2] = 2; contactIdx.s[3] = 3; - - for (int i=0;i<hullB->m_numVertices;i++) + + for (int i = 0; i < hullB->m_numVertices; i++) { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; + b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i]; float curDot = vtx.dot(planeNormalInConvex); - - if (curDot>maxDot) + if (curDot > maxDot) { - hitVertex=i; - maxDot=curDot; + hitVertex = i; + maxDot = curDot; hitVtx = vtx; //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) + if (numPoints == MAX_PLANE_CONVEX_POINTS) numPoints--; } - if (numPoints<MAX_PLANE_CONVEX_POINTS) + if (numPoints < MAX_PLANE_CONVEX_POINTS) { - b3Vector3 vtxWorld = convexWorldTransform*vtx; - b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; - float dist = planeNormal.dot(vtxInPlane)-planeConstant; - if (dist<0.f) + b3Vector3 vtxWorld = convexWorldTransform * vtx; + b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; + float dist = planeNormal.dot(vtxInPlane) - planeConstant; + if (dist < 0.f) { vtxWorld.w = dist; contactPoints[numPoints] = vtxWorld; numPoints++; } } - } - int numReducedPoints = 0; + int numReducedPoints = 0; numReducedPoints = numPoints; - - if (numPoints>4) + + if (numPoints > 4) { - numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx); } int dstIdx; -// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints>0) + // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (numReducedPoints > 0) { if (nGlobalContactsOut < maxContactCapacity) { - dstIdx=nGlobalContactsOut; + dstIdx = nGlobalContactsOut; nGlobalContactsOut++; b3Contact4* c = &globalContactsOut[dstIdx]; @@ -1462,38 +1380,33 @@ void computeContactPlaneConvex(int pairIndex, c->setRestituitionCoeff(0.f); c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - for (int i=0;i<numReducedPoints;i++) + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; + for (int i = 0; i < numReducedPoints; i++) { b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; c->m_worldPosB[i] = pOnB1; } c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - }//if (dstIdx < numPairs) - } - - + } //if (dstIdx < numPairs) + } -// printf("computeContactPlaneConvex\n"); + // printf("computeContactPlaneConvex\n"); } - - -B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) - { - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (quantization.x), - (b3Scalar)(vecIn[1]) / (quantization.y), - (b3Scalar)(vecIn[2]) / (quantization.z)); - vecOut += bvhAabbMin; - return vecOut; - } +B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) +{ + b3Vector3 vecOut; + vecOut.setValue( + (b3Scalar)(vecIn[0]) / (quantization.x), + (b3Scalar)(vecIn[1]) / (quantization.y), + (b3Scalar)(vecIn[2]) / (quantization.z)); + vecOut += bvhAabbMin; + return vecOut; +} void traverseTreeTree() { - } #include "Bullet3Common/shared/b3Mat3x3.h" @@ -1503,44 +1416,40 @@ int maxNumAabbChecks = 0; int maxDepth = 0; // work-in-progress -__kernel void findCompoundPairsKernel( +__kernel void findCompoundPairsKernel( int pairIndex, int bodyIndexA, int bodyIndexB, int collidableIndexA, int collidableIndexB, - __global const b3RigidBodyData* rigidBodies, + __global const b3RigidBodyData* rigidBodies, __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, + __global const b3ConvexPolyhedronData* convexShapes, __global const b3AlignedObjectArray<b3Float4>& vertices, __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace, __global const b3GpuChildShape* gpuChildShapes, __global b3Int4* gpuCompoundPairsOut, - __global int* numCompoundPairsOut, + __global int* numCompoundPairsOut, int maxNumCompoundPairsCapacity, - b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, - b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, - b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU - ) + b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, + b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, + b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU) { - numAabbChecks=0; - maxNumAabbChecks=0; -// int i = pairIndex; + numAabbChecks = 0; + maxNumAabbChecks = 0; + // int i = pairIndex; { - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) + if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0)) { return; } - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) { int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; @@ -1548,9 +1457,8 @@ __kernel void findCompoundPairsKernel( int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; - int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; - + float4 posA = rigidBodies[bodyIndexA].m_pos; b3Quat ornA = rigidBodies[bodyIndexA].m_quat; @@ -1567,41 +1475,37 @@ __kernel void findCompoundPairsKernel( transB.setOrigin(posB); transB.setRotation(ornB); - - - for (int p=0;p<numSubTreesA;p++) + for (int p = 0; p < numSubTreesA; p++) { - b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA+p]; + b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p]; //bvhInfoCPU[bvhA].m_quantization - b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); + b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); + b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 aabbAMinOut,aabbAMaxOut; - float margin=0.f; - b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + b3Vector3 aabbAMinOut, aabbAMaxOut; + float margin = 0.f; + b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - for (int q=0;q<numSubTreesB;q++) + for (int q = 0; q < numSubTreesB; q++) { - b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB+q]; + b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q]; - b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 aabbBMinOut,aabbBMaxOut; - float margin=0.f; - b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + b3Vector3 aabbBMinOut, aabbBMaxOut; + float margin = 0.f; + b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks=0; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + numAabbChecks = 0; + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); if (aabbOverlap) { - - int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfoCPU[bvhA].m_nodeOffset; - // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; + int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset; + // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; - int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfoCPU[bvhB].m_nodeOffset; - // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; + int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset; + // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; b3AlignedObjectArray<b3Int2> nodeStack; b3Int2 node0; @@ -1610,33 +1514,33 @@ __kernel void findCompoundPairsKernel( int maxStackDepth = 1024; nodeStack.resize(maxStackDepth); - int depth=0; - nodeStack[depth++]=node0; + int depth = 0; + nodeStack[depth++] = node0; do { if (depth > maxDepth) { - maxDepth=depth; - printf("maxDepth=%d\n",maxDepth); + maxDepth = depth; + printf("maxDepth=%d\n", maxDepth); } b3Int2 node = nodeStack[--depth]; - - b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); + b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - float margin=0.f; - b3Vector3 aabbAMinOut,aabbAMaxOut; - b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); + b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 aabbBMinOut,aabbBMaxOut; - b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + float margin = 0.f; + b3Vector3 aabbAMinOut, aabbAMaxOut; + b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); + + b3Vector3 aabbBMinOut, aabbBMaxOut; + b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); numAabbChecks++; - bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); if (nodeOverlap) { bool isLeafA = treeNodesCPU[node.x].isLeafNode(); @@ -1645,23 +1549,23 @@ __kernel void findCompoundPairsKernel( bool isInternalB = !isLeafB; //fail, even though it might hit two leaf nodes - if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) + if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB)) { b3Error("Error: traversal exceeded maxStackDepth\n"); continue; } - if(isInternalA) + if (isInternalA) { - int nodeAleftChild = node.x+1; - bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode(); - int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + treeNodesCPU[node.x+1].getEscapeIndex(); + int nodeAleftChild = node.x + 1; + bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode(); + int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex(); - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); + if (isInternalB) + { + int nodeBleftChild = node.y + 1; + bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); + int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); @@ -1670,90 +1574,83 @@ __kernel void findCompoundPairsKernel( } else { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); + nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y); + nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y); } } else { - if(isInternalB) + if (isInternalB) { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); + int nodeBleftChild = node.y + 1; + bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); + int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); + nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild); + nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild); } else { int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) + if (compoundPairIdx < maxNumCompoundPairsCapacity) { int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex(); int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex(); - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB); } } } } } while (depth); - maxNumAabbChecks = b3Max(numAabbChecks,maxNumAabbChecks); + maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks); } } } - + return; } - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) { - - if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) { - int numChildrenA = collidables[collidableIndexA].m_numChildShapes; - for (int c=0;c<numChildrenA;c++) + for (int c = 0; c < numChildrenA; c++) { - int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; + int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c; int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; float4 posA = rigidBodies[bodyIndexA].m_pos; b3Quat ornA = rigidBodies[bodyIndexA].m_quat; float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - + float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; + b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; - b3Transform transA; transA.setIdentity(); transA.setOrigin(newPosA); transA.setRotation(newOrnA); - b3Scalar margin=0.0f; + b3Scalar margin = 0.0f; - b3Vector3 aabbAMinOut,aabbAMaxOut; + b3Vector3 aabbAMinOut, aabbAMaxOut; - b3TransformAabb2((const b3Float4&)aabbA.m_min,(const b3Float4&)aabbA.m_max, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); + b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) { int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) + for (int b = 0; b < numChildrenB; b++) { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b; int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; b3Quat ornB = rigidBodies[bodyIndexB].m_quat; float4 posB = rigidBodies[bodyIndexB].m_pos; float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); - - + float4 newPosB = transform(&childPosB, &posB, &ornB); + b3Quat newOrnB = b3QuatMul(ornB, childOrnB); b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; @@ -1762,11 +1659,11 @@ __kernel void findCompoundPairsKernel( transB.setOrigin(newPosB); transB.setRotation(newOrnB); - b3Vector3 aabbBMinOut,aabbBMaxOut; - b3TransformAabb2((const b3Float4&)aabbB.m_min,(const b3Float4&)aabbB.m_max, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); + b3Vector3 aabbBMinOut, aabbBMaxOut; + b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); numAabbChecks++; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); + bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); if (aabbOverlap) { /* @@ -1784,22 +1681,22 @@ __kernel void findCompoundPairsKernel( float4 c1 = transform(&c1local,&posB,&ornB); const float4 DeltaC2 = c0 - c1; */ - {// + { // int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) + if (compoundPairIdx < maxNumCompoundPairsCapacity) { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB); } - }// - }//fi(1) - } //for (int b=0 - }//if (collidables[collidableIndexB]. - else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + } // + } //fi(1) + } //for (int b=0 + } //if (collidables[collidableIndexB]. + else //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) { if (1) { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + // float dmin = FLT_MAX; float4 posA = newPosA; posA.w = 0.f; float4 posB = rigidBodies[bodyIndexB].m_pos; @@ -1811,45 +1708,43 @@ __kernel void findCompoundPairsKernel( float4 c1local = convexShapes[shapeIndexB].m_localCenter; b3Quat ornB = rigidBodies[bodyIndexB].m_quat; float4 c1; - c1 = transform(&c1local,&posB,&ornB); - // const float4 DeltaC2 = c0 - c1; + c1 = transform(&c1local, &posB, &ornB); + // const float4 DeltaC2 = c0 - c1; { int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) + if (compoundPairIdx < maxNumCompoundPairsCapacity) { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,-1); - }//if (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - }//for (int b=0;b<numChildrenB;b++) + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1); + } //if (compoundPairIdx<maxNumCompoundPairsCapacity) + } // + } //fi (1) + } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + } //for (int b=0;b<numChildrenB;b++) return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) { int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) + for (int b = 0; b < numChildrenB; b++) { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b; int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; b3Quat ornB = rigidBodies[bodyIndexB].m_quat; float4 posB = rigidBodies[bodyIndexB].m_pos; float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; + b3Quat newOrnB = b3QuatMul(ornB, childOrnB); int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - ////////////////////////////////////// if (1) { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + // float dmin = FLT_MAX; float4 posA = rigidBodies[bodyIndexA].m_pos; posA.w = 0.f; float4 posB = newPosB; @@ -1859,99 +1754,96 @@ __kernel void findCompoundPairsKernel( float4 c0; c0 = transform(&c0local, &posA, &ornA); float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB =newOrnB; + b3Quat ornB = newOrnB; float4 c1; - c1 = transform(&c1local,&posB,&ornB); - // const float4 DeltaC2 = c0 - c1; - {// + c1 = transform(&c1local, &posB, &ornB); + // const float4 DeltaC2 = c0 - c1; + { // int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) + if (compoundPairIdx < maxNumCompoundPairsCapacity) { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,-1,childShapeIndexB); - }//fi (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//for (int b=0;b<numChildrenB;b++) + gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB); + } //fi (compoundPairIdx<maxNumCompoundPairsCapacity) + } // + } //fi (1) + } //for (int b=0;b<numChildrenB;b++) return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) + } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) return; - }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - }//i<numPairs + } //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) + } //i<numPairs } - - -__kernel void processCompoundPairsKernel( __global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - __global b3Aabb* aabbs, - __global const b3GpuChildShape* gpuChildShapes, - __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, - __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, - int numCompoundPairs, - int i - ) +__kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray<b3Float4>& vertices, + __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, + __global const b3AlignedObjectArray<b3GpuFace>& faces, + __global const b3AlignedObjectArray<int>& indices, + __global b3Aabb* aabbs, + __global const b3GpuChildShape* gpuChildShapes, + __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, + __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, + int numCompoundPairs, + int i) { - -// int i = get_global_id(0); - if (i<numCompoundPairs) + // int i = get_global_id(0); + if (i < numCompoundPairs) { int bodyIndexA = gpuCompoundPairs[i].x; int bodyIndexB = gpuCompoundPairs[i].y; int childShapeIndexA = gpuCompoundPairs[i].z; int childShapeIndexB = gpuCompoundPairs[i].w; - + int collidableIndexA = -1; int collidableIndexB = -1; - + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; float4 posA = rigidBodies[bodyIndexA].m_pos; - + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; float4 posB = rigidBodies[bodyIndexB].m_pos; - + if (childShapeIndexA >= 0) { collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; + b3Quat newOrnA = b3QuatMul(ornA, childOrnA); posA = newPosA; ornA = newOrnA; - } else + } + else { collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; } - - if (childShapeIndexB>=0) + + if (childShapeIndexB >= 0) { collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; + b3Quat newOrnB = b3QuatMul(ornB, childOrnB); posB = newPosB; ornB = newOrnB; - } else + } + else { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; } - + gpuHasCompoundSepNormalsOut[i] = 0; - + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - + int shapeTypeA = collidables[collidableIndexA].m_shapeType; int shapeTypeB = collidables[collidableIndexB].m_shapeType; - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) { @@ -1959,145 +1851,142 @@ __kernel void processCompoundPairsKernel( __global const b3Int4* gpuCompoundPa } int hasSeparatingAxis = 5; - - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; + + // int numFacesA = convexShapes[shapeIndexA].m_numFaces; float dmin = FLT_MAX; posA.w = 0.f; posB.w = 0.f; float4 c0local = convexShapes[shapeIndexA].m_localCenter; float4 c0 = transform(&c0local, &posA, &ornA); float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); + float4 c1 = transform(&c1local, &posB, &ornB); const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1,0,0,0); -// bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); - + float4 sepNormal = make_float4(1, 0, 0, 0); + // bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin); + hasSeparatingAxis = 4; if (!sepA) { hasSeparatingAxis = 0; - } else + } + else { - bool sepB = findSeparatingAxis( convexShapes[shapeIndexB],convexShapes[shapeIndexA],posB,ornB,posA,ornA,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); + bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin); if (!sepB) { hasSeparatingAxis = 0; - } else//(!sepB) + } + else //(!sepB) { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); + bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin); if (sepEE) { - gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - }//sepEE - }//(!sepB) - }//(!sepA) - - + gpuCompoundSepNormalsOut[i] = sepNormal; //fastNormalize4(sepNormal); + gpuHasCompoundSepNormalsOut[i] = 1; + } //sepEE + } //(!sepB) + } //(!sepA) } - } - -__kernel void clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - __global const b3GpuChildShape* gpuChildShapes, - __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, - __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, - __global struct b3Contact4Data* globalContactsOut, - int* nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity, int i) +__kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs, + __global const b3RigidBodyData* rigidBodies, + __global const b3Collidable* collidables, + __global const b3ConvexPolyhedronData* convexShapes, + __global const b3AlignedObjectArray<b3Float4>& vertices, + __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, + __global const b3AlignedObjectArray<b3GpuFace>& faces, + __global const b3AlignedObjectArray<int>& indices, + __global const b3GpuChildShape* gpuChildShapes, + __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, + __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, + __global struct b3Contact4Data* globalContactsOut, + int* nGlobalContactsOut, + int numCompoundPairs, int maxContactCapacity, int i) { - -// int i = get_global_id(0); + // int i = get_global_id(0); int pairIndex = i; - + float4 worldVertsB1[64]; float4 worldVertsB2[64]; - int capacityWorldVerts = 64; + int capacityWorldVerts = 64; float4 localContactsOut[64]; - int localContactCapacity=64; - + int localContactCapacity = 64; + float minDist = -1e30f; float maxDist = 0.0f; - if (i<numCompoundPairs) + if (i < numCompoundPairs) { - if (gpuHasCompoundSepNormalsOut[i]) { - int bodyIndexA = gpuCompoundPairs[i].x; int bodyIndexB = gpuCompoundPairs[i].y; - + int childShapeIndexA = gpuCompoundPairs[i].z; int childShapeIndexB = gpuCompoundPairs[i].w; - + int collidableIndexA = -1; int collidableIndexB = -1; - + b3Quat ornA = rigidBodies[bodyIndexA].m_quat; float4 posA = rigidBodies[bodyIndexA].m_pos; - + b3Quat ornB = rigidBodies[bodyIndexB].m_quat; float4 posB = rigidBodies[bodyIndexB].m_pos; - + if (childShapeIndexA >= 0) { collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); + float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; + b3Quat newOrnA = b3QuatMul(ornA, childOrnA); posA = newPosA; ornA = newOrnA; - } else + } + else { collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; } - - if (childShapeIndexB>=0) + + if (childShapeIndexB >= 0) { collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); + b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; + b3Quat newOrnB = b3QuatMul(ornB, childOrnB); posB = newPosB; ornB = newOrnB; - } else + } + else { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; } - + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - + int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - convexShapes[shapeIndexA], convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { + convexShapes[shapeIndexA], convexShapes[shapeIndexB], + posA, ornA, + posB, ornB, + worldVertsB1, worldVertsB2, capacityWorldVerts, + minDist, maxDist, + vertices, faces, indices, + vertices, faces, indices, + localContactsOut, localContactCapacity); + + if (numLocalContactsOut > 0) + { float4 normal = -gpuCompoundSepNormalsOut[i]; int nPoints = numLocalContactsOut; float4* pointsIn = localContactsOut; - b3Int4 contactIdx;// = {-1,-1,-1,-1}; + b3Int4 contactIdx; // = {-1,-1,-1,-1}; contactIdx.s[0] = 0; contactIdx.s[1] = 1; @@ -2105,111 +1994,106 @@ __kernel void clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundP contactIdx.s[3] = 3; int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - + int dstIdx; - dstIdx = b3AtomicInc( nGlobalContactsOut); - if ((dstIdx+nReducedContacts) < maxContactCapacity) + dstIdx = b3AtomicInc(nGlobalContactsOut); + if ((dstIdx + nReducedContacts) < maxContactCapacity) { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + __global struct b3Contact4Data* c = globalContactsOut + dstIdx; c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_restituitionCoeffCmp = (0.f * 0xffff); + c->m_frictionCoeffCmp = (0.7f * 0xffff); c->m_batchIdx = pairIndex; int bodyA = gpuCompoundPairs[pairIndex].x; int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB; c->m_childIndexA = childShapeIndexA; c->m_childIndexB = childShapeIndexB; - for (int i=0;i<nReducedContacts;i++) + for (int i = 0; i < nReducedContacts; i++) { c->m_worldPosB[i] = pointsIn[contactIdx.s[i]]; } - b3Contact4Data_setNumPoints(c,nReducedContacts); + b3Contact4Data_setNumPoints(c, nReducedContacts); } - - }// if (numContactsOut>0) - }// if (gpuHasCompoundSepNormalsOut[i]) - }// if (i<numCompoundPairs) + } // if (numContactsOut>0) + } // if (gpuHasCompoundSepNormalsOut[i]) + } // if (i<numCompoundPairs) } - void computeContactCompoundCompound(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3GpuChildShape* cpuChildShapes, - const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, - const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, - - const b3AlignedObjectArray<b3Vector3>& convexVertices, - const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, - const b3AlignedObjectArray<int>& convexIndices, - const b3AlignedObjectArray<b3GpuFace>& faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, - b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, - b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU - ) + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3GpuChildShape* cpuChildShapes, + const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, + const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, + + const b3AlignedObjectArray<b3Vector3>& convexVertices, + const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, + const b3AlignedObjectArray<int>& convexIndices, + const b3AlignedObjectArray<b3GpuFace>& faces, + + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity, + b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, + b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, + b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU) { - int shapeTypeB = collidables[collidableIndexB].m_shapeType; b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; - int numCompoundPairsOut=0; - int maxNumCompoundPairsCapacity = 8192;//1024; + int numCompoundPairsOut = 0; + int maxNumCompoundPairsCapacity = 8192; //1024; cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); // work-in-progress - findCompoundPairsKernel( - pairIndex, - bodyIndexA,bodyIndexB, - collidableIndexA,collidableIndexB, - rigidBodies, - collidables, - convexShapes, - convexVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - cpuChildShapes, - &cpuCompoundPairsOut[0], - &numCompoundPairsOut, - maxNumCompoundPairsCapacity , - treeNodesCPU, - subTreesCPU, - bvhInfoCPU - ); - - printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); - if (numCompoundPairsOut>maxNumCompoundPairsCapacity) + findCompoundPairsKernel( + pairIndex, + bodyIndexA, bodyIndexB, + collidableIndexA, collidableIndexB, + rigidBodies, + collidables, + convexShapes, + convexVertices, + hostAabbsWorldSpace, + hostAabbsLocalSpace, + cpuChildShapes, + &cpuCompoundPairsOut[0], + &numCompoundPairsOut, + maxNumCompoundPairsCapacity, + treeNodesCPU, + subTreesCPU, + bvhInfoCPU); + + printf("maxNumAabbChecks=%d\n", maxNumAabbChecks); + if (numCompoundPairsOut > maxNumCompoundPairsCapacity) { - b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); - numCompoundPairsOut=maxNumCompoundPairsCapacity; + b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity); + numCompoundPairsOut = maxNumCompoundPairsCapacity; } b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut; b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut; cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); - for (int i=0;i<numCompoundPairsOut;i++) + for (int i = 0; i < numCompoundPairsOut; i++) { - - processCompoundPairsKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,0,cpuChildShapes, - cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,numCompoundPairsOut,i); + processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes, + cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i); } - for (int i=0;i<numCompoundPairsOut;i++) + for (int i = 0; i < numCompoundPairsOut; i++) { - clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,cpuChildShapes, - cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,globalContactsOut,&nGlobalContactsOut,numCompoundPairsOut,maxContactCapacity,i); + clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes, + cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i); } - /* + /* int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; float4 posA = rigidBodies[bodyIndexA].m_pos; @@ -2235,7 +2119,6 @@ void computeContactCompoundCompound(int pairIndex, ); */ - /* if (foundSepAxis) { @@ -2271,8 +2154,8 @@ void computeContactCompoundCompound(int pairIndex, } */ -// return contactIndex; - + // return contactIndex; + /* int numChildrenB = collidables[collidableIndexB].m_numChildShapes; @@ -2294,56 +2177,52 @@ void computeContactCompoundCompound(int pairIndex, } */ - } void computeContactPlaneCompound(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3GpuChildShape* cpuChildShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3GpuChildShape* cpuChildShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) { - int shapeTypeB = collidables[collidableIndexB].m_shapeType; b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int c=0;c<numChildrenB;c++) + for (int c = 0; c < numChildrenB; c++) { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; + int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c; int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; float4 rootPosB = rigidBodies[bodyIndexB].m_pos; b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; - float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; - b3Quaternion ornB = rootOrnB*childOrnB;//b3QuatMul(ornB,childOrnB); + float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB; + b3Quaternion ornB = rootOrnB * childOrnB; //b3QuatMul(ornB,childOrnB); int shapeIndexB = collidables[childColIndexB].m_shapeIndex; const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; - - + b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - // int numContactsOut = 0; - // int numWorldVertsB1= 0; + // int numContactsOut = 0; + // int numWorldVertsB1= 0; b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); + b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); + b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal); float planeConstant = planeEq.w; b3Transform convexWorldTransform; convexWorldTransform.setIdentity(); @@ -2355,16 +2234,16 @@ void computeContactPlaneCompound(int pairIndex, planeTransform.setRotation(ornA); b3Transform planeInConvex; - planeInConvex= convexWorldTransform.inverse() * planeTransform; + planeInConvex = convexWorldTransform.inverse() * planeTransform; b3Transform convexInPlane; convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; + + b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal; float maxDot = -1e30; - int hitVertex=-1; + int hitVertex = -1; b3Vector3 hitVtx; - #define MAX_PLANE_CONVEX_POINTS 64 +#define MAX_PLANE_CONVEX_POINTS 64 b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; int numPoints = 0; @@ -2374,54 +2253,52 @@ void computeContactPlaneCompound(int pairIndex, contactIdx.s[1] = 1; contactIdx.s[2] = 2; contactIdx.s[3] = 3; - - for (int i=0;i<hullB->m_numVertices;i++) + + for (int i = 0; i < hullB->m_numVertices; i++) { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; + b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i]; float curDot = vtx.dot(planeNormalInConvex); - - if (curDot>maxDot) + if (curDot > maxDot) { - hitVertex=i; - maxDot=curDot; + hitVertex = i; + maxDot = curDot; hitVtx = vtx; //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) + if (numPoints == MAX_PLANE_CONVEX_POINTS) numPoints--; } - if (numPoints<MAX_PLANE_CONVEX_POINTS) + if (numPoints < MAX_PLANE_CONVEX_POINTS) { - b3Vector3 vtxWorld = convexWorldTransform*vtx; - b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; - float dist = planeNormal.dot(vtxInPlane)-planeConstant; - if (dist<0.f) + b3Vector3 vtxWorld = convexWorldTransform * vtx; + b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; + float dist = planeNormal.dot(vtxInPlane) - planeConstant; + if (dist < 0.f) { vtxWorld.w = dist; contactPoints[numPoints] = vtxWorld; numPoints++; } } - } - int numReducedPoints = 0; + int numReducedPoints = 0; numReducedPoints = numPoints; - - if (numPoints>4) + + if (numPoints > 4) { - numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); + numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx); } int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints>0) + // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); + + if (numReducedPoints > 0) { if (nGlobalContactsOut < maxContactCapacity) { - dstIdx=nGlobalContactsOut; + dstIdx = nGlobalContactsOut; nGlobalContactsOut++; b3Contact4* c = &globalContactsOut[dstIdx]; @@ -2430,48 +2307,37 @@ void computeContactPlaneCompound(int pairIndex, c->setRestituitionCoeff(0.f); c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - for (int i=0;i<numReducedPoints;i++) + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; + for (int i = 0; i < numReducedPoints; i++) { b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; c->m_worldPosB[i] = pOnB1; } c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - }//if (dstIdx < numPairs) - } - + } //if (dstIdx < numPairs) + } } - - } - - - - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) +void computeContactSphereConvex(int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3RigidBodyData* rigidBodies, + const b3Collidable* collidables, + const b3ConvexPolyhedronData* convexShapes, + const b3Vector3* convexVertices, + const int* convexIndices, + const b3GpuFace* faces, + b3Contact4* globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity) { - float radius = collidables[collidableIndexA].m_radius; float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; - - float4 pos = rigidBodies[bodyIndexB].m_pos; - b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; @@ -2487,64 +2353,65 @@ void computeContactSphereConvex(int pairIndex, int shapeIndex = collidables[collidableIndex].m_shapeIndex; int numFaces = convexShapes[shapeIndex].m_numFaces; float4 closestPnt = b3MakeVector3(0, 0, 0, 0); -// float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); - float minDist = -1000000.f; // TODO: What is the largest/smallest float? + // float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); + float minDist = -1000000.f; // TODO: What is the largest/smallest float? bool bCollide = true; int region = -1; float4 localHitNormal; - for ( int f = 0; f < numFaces; f++ ) + for (int f = 0; f < numFaces; f++) { - b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; + b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f]; float4 planeEqn; - float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); + float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f); + float4 n1 = localPlaneNormal; //quatRotate(quat,localPlaneNormal); planeEqn = n1; planeEqn[3] = face.m_plane.w; float4 pntReturn; float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - if ( dist > radius) + if (dist > radius) { bCollide = false; break; } - if ( dist > 0 ) + if (dist > 0) { //might hit an edge or vertex b3Vector3 out; bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); + &face, + &convexVertices[convexShapes[shapeIndex].m_vertexOffset], + convexIndices, + &out); if (isInPoly) { - if (dist>minDist) + if (dist > minDist) { minDist = dist; closestPnt = pntReturn; localHitNormal = planeEqn; - region=1; + region = 1; } - } else + } + else { - b3Vector3 tmp = spherePos-out; + b3Vector3 tmp = spherePos - out; b3Scalar l2 = tmp.length2(); - if (l2<radius*radius) + if (l2 < radius * radius) { - dist = b3Sqrt(l2); - if (dist>minDist) + dist = b3Sqrt(l2); + if (dist > minDist) { minDist = dist; closestPnt = out; - localHitNormal = tmp/dist; - region=2; + localHitNormal = tmp / dist; + region = 2; } - - } else + } + else { bCollide = false; break; @@ -2553,12 +2420,12 @@ void computeContactSphereConvex(int pairIndex, } else { - if ( dist > minDist ) + if (dist > minDist) { minDist = dist; closestPnt = pntReturn; localHitNormal = planeEqn; - region=3; + region = 3; } } } @@ -2567,128 +2434,113 @@ void computeContactSphereConvex(int pairIndex, if (bCollide && minDist > -10000) { - - float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld; + float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal; //-hitNormalWorld; float4 pOnB1 = tr(closestPnt); //printf("dist ,%f,",minDist); - float actualDepth = minDist-radius; - if (actualDepth<0) + float actualDepth = minDist - radius; + if (actualDepth < 0) { - //printf("actualDepth = ,%f,", actualDepth); - //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); - //printf("region=,%d,\n", region); - pOnB1[3] = actualDepth; + //printf("actualDepth = ,%f,", actualDepth); + //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); + //printf("region=,%d,\n", region); + pOnB1[3] = actualDepth; - int dstIdx; -// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx=nGlobalContactsOut; - nGlobalContactsOut++; + int dstIdx; + // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB1; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); + if (nGlobalContactsOut < maxContactCapacity) + { + dstIdx = nGlobalContactsOut; + nGlobalContactsOut++; - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - int numPoints = 1; - c->m_worldNormalOnB.w = (b3Scalar)numPoints; - }//if (dstIdx < numPairs) + b3Contact4* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = normalOnSurfaceB1; + c->setFrictionCoeff(0.7); + c->setRestituitionCoeff(0.f); + + c->m_batchIdx = pairIndex; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; + c->m_worldPosB[0] = pOnB1; + int numPoints = 1; + c->m_worldNormalOnB.w = (b3Scalar)numPoints; + } //if (dstIdx < numPairs) } - }//if (hasCollision) - + } //if (hasCollision) } - - - int computeContactConvexConvex2( - int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, - const b3AlignedObjectArray<b3Collidable>& collidables, - const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, - const b3AlignedObjectArray<b3Vector3>& convexVertices, - const b3AlignedObjectArray<b3Vector3>& uniqueEdges, - const b3AlignedObjectArray<int>& convexIndices, - const b3AlignedObjectArray<b3GpuFace>& faces, - b3AlignedObjectArray<b3Contact4>& globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - const b3AlignedObjectArray<b3Contact4>& oldContacts - ) + int pairIndex, + int bodyIndexA, int bodyIndexB, + int collidableIndexA, int collidableIndexB, + const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, + const b3AlignedObjectArray<b3Collidable>& collidables, + const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, + const b3AlignedObjectArray<b3Vector3>& convexVertices, + const b3AlignedObjectArray<b3Vector3>& uniqueEdges, + const b3AlignedObjectArray<int>& convexIndices, + const b3AlignedObjectArray<b3GpuFace>& faces, + b3AlignedObjectArray<b3Contact4>& globalContactsOut, + int& nGlobalContactsOut, + int maxContactCapacity, + const b3AlignedObjectArray<b3Contact4>& oldContacts) { int contactIndex = -1; b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - b3ConvexPolyhedronData hullA, hullB; - + b3Vector3 sepNormalWorldSpace; - + b3Collidable colA = collidables[collidableIndexA]; + hullA = convexShapes[colA.m_shapeIndex]; + //printf("numvertsA = %d\n",hullA.m_numVertices); - b3Collidable colA = collidables[collidableIndexA]; - hullA = convexShapes[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - - b3Collidable colB = collidables[collidableIndexB]; - hullB = convexShapes[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); + b3Collidable colB = collidables[collidableIndexB]; + hullB = convexShapes[colB.m_shapeIndex]; + //printf("numvertsB = %d\n",hullB.m_numVertices); -// int contactCapacity = MAX_VERTS; + // int contactCapacity = MAX_VERTS; //int numContactsOut=0; - #ifdef _WIN32 b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); #endif - - bool foundSepAxis = findSeparatingAxis(hullA,hullB, - posA, - ornA, - posB, - ornB, - convexVertices,uniqueEdges,faces,convexIndices, - convexVertices,uniqueEdges,faces,convexIndices, - - sepNormalWorldSpace - ); + bool foundSepAxis = findSeparatingAxis(hullA, hullB, + posA, + ornA, + posB, + ornB, + + convexVertices, uniqueEdges, faces, convexIndices, + convexVertices, uniqueEdges, faces, convexIndices, + + sepNormalWorldSpace); - if (foundSepAxis) { - - contactIndex = clipHullHullSingle( bodyIndexA, bodyIndexB, - posA,ornA, - posB,ornB, + posA, ornA, + posB, ornB, collidableIndexA, collidableIndexB, - &rigidBodies, + &rigidBodies, &globalContactsOut, nGlobalContactsOut, - + convexShapes, convexShapes, - - convexVertices, - uniqueEdges, + + convexVertices, + uniqueEdges, faces, convexIndices, - + convexVertices, uniqueEdges, faces, @@ -2698,50 +2550,42 @@ int computeContactConvexConvex2( collidables, sepNormalWorldSpace, maxContactCapacity); - } return contactIndex; } - - - - - - -void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, - const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, - const b3OpenCLArray<b3Contact4>* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, - const b3OpenCLArray<b3Vector3>& gpuVertices, - const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, - const b3OpenCLArray<b3GpuFace>& gpuFaces, - const b3OpenCLArray<int>& gpuIndices, - const b3OpenCLArray<b3Collidable>& gpuCollidables, - const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - - const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, - const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - - b3OpenCLArray<b3Vector3>& worldVertsB1GPU, - b3OpenCLArray<b3Int4>& clippingFacesOutGPU, - b3OpenCLArray<b3Vector3>& worldNormalsAGPU, - b3OpenCLArray<b3Vector3>& worldVertsA1GPU, - b3OpenCLArray<b3Vector3>& worldVertsB2GPU, - b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, - b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, - b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, - b3OpenCLArray<b3BvhInfo>* bvhInfo, - - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray<b3Int4>& triangleConvexPairsOut, - int& numTriConvexPairsOut - ) +void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs, + const b3OpenCLArray<b3RigidBodyData>* bodyBuf, + b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, + const b3OpenCLArray<b3Contact4>* oldContacts, + int maxContactCapacity, + int compoundPairCapacity, + const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, + const b3OpenCLArray<b3Vector3>& gpuVertices, + const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, + const b3OpenCLArray<b3GpuFace>& gpuFaces, + const b3OpenCLArray<int>& gpuIndices, + const b3OpenCLArray<b3Collidable>& gpuCollidables, + const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, + + const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, + const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, + + b3OpenCLArray<b3Vector3>& worldVertsB1GPU, + b3OpenCLArray<b3Int4>& clippingFacesOutGPU, + b3OpenCLArray<b3Vector3>& worldNormalsAGPU, + b3OpenCLArray<b3Vector3>& worldVertsA1GPU, + b3OpenCLArray<b3Vector3>& worldVertsB2GPU, + b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, + b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, + b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, + b3OpenCLArray<b3BvhInfo>* bvhInfo, + + int numObjects, + int maxTriConvexPairCapacity, + b3OpenCLArray<b3Int4>& triangleConvexPairsOut, + int& numTriConvexPairsOut) { myframecount++; @@ -2750,14 +2594,13 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* #ifdef CHECK_ON_HOST - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; + b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; + b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; + b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; bvhInfo->copyToHost(bvhInfoCPU); b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; @@ -2772,8 +2615,6 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; convexData.copyToHost(hostConvexData); @@ -2788,10 +2629,9 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* gpuIndices.copyToHost(hostIndices); b3AlignedObjectArray<b3Collidable> hostCollidables; gpuCollidables.copyToHost(hostCollidables); - + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; gpuChildShapes.copyToHost(cpuChildShapes); - b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs; @@ -2802,16 +2642,15 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* } b3AlignedObjectArray<b3Contact4> oldHostContacts; - + if (oldContacts->size()) { oldContacts->copyToHost(oldHostContacts); } - hostContacts.resize(maxContactCapacity); - for (int i=0;i<nPairs;i++) + for (int i = 0; i < nPairs; i++) { int bodyIndexA = hostPairs[i].x; int bodyIndexB = hostPairs[i].y; @@ -2821,84 +2660,73 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { - computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); + computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); } if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) { - computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); + computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); //printf("convex-sphere\n"); - } if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) { - computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("convex-plane\n"); - + computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); + // printf("convex-plane\n"); } if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { - computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("plane-convex\n"); - + computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); + // printf("plane-convex\n"); } - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) { - computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0], - nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU); -// printf("convex-plane\n"); - + computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0], + nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU); + // printf("convex-plane\n"); } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && + if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) { - computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("convex-plane\n"); - + computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); + // printf("convex-plane\n"); } if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) { - computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("plane-convex\n"); - + computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], + &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); + // printf("plane-convex\n"); } if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) { //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts); //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - - if (contactIndex>=0) + if (contactIndex >= 0) { -// printf("convex convex contactIndex = %d\n",contactIndex); + // printf("convex convex contactIndex = %d\n",contactIndex); hostPairs[i].z = contactIndex; } -// printf("plane-convex\n"); - + // printf("plane-convex\n"); } - - } if (hostPairs.size()) @@ -2908,81 +2736,76 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* hostContacts.resize(nContacts); if (nContacts) - { - - contactOut->copyFromHost(hostContacts); - } else + { + contactOut->copyFromHost(hostContacts); + } + else { contactOut->resize(0); - } + } - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - //printf("(HOST) nContacts = %d\n",nContacts); + m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); + //printf("(HOST) nContacts = %d\n",nContacts); #else { if (nPairs) { - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); B3_PROFILE("primitiveContactsKernel"); b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(nPairs); launcher.setConst(maxContactCapacity); int num = nPairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); - + nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); } } - -#endif//CHECK_ON_HOST - +#endif //CHECK_ON_HOST + B3_PROFILE("computeConvexConvexContactsGPUSAT"); - // printf("nContacts = %d\n",nContacts); - - + // printf("nContacts = %d\n",nContacts); + m_sepNormals.resize(nPairs); m_hasSeparatingNormals.resize(nPairs); - - int concaveCapacity=maxTriConvexPairCapacity; + + int concaveCapacity = maxTriConvexPairCapacity; m_concaveSepNormals.resize(concaveCapacity); m_concaveHasSeparatingNormals.resize(concaveCapacity); m_numConcavePairsOut.resize(0); m_numConcavePairsOut.push_back(0); - m_gpuCompoundPairs.resize(compoundPairCapacity); m_gpuCompoundSepNormals.resize(compoundPairCapacity); - - + m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); - + m_numCompoundPairsOut.resize(0); m_numCompoundPairsOut.push_back(0); int numCompoundPairs = 0; - int numConcavePairs =0; + int numConcavePairs = 0; { clFinish(m_queue); @@ -2991,33 +2814,30 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* m_dmins.resize(nPairs); if (splitSearchSepAxisConvex) { - - if (useMprGpu) { nContacts = m_totalContactsOut.at(0); { B3_PROFILE("mprPenetrationKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_mprPenetrationKernel,"mprPenetrationKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); launcher.setConst(maxContactCapacity); - launcher.setConst( nPairs ); + launcher.setConst(nPairs); int num = nPairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); /* b3AlignedObjectArray<int>hostHasSepAxis; @@ -3027,173 +2847,160 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* */ nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); - // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); - if (nContacts>maxContactCapacity) + // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); + if (nContacts > maxContactCapacity) { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); nContacts = maxContactCapacity; } - } } - + if (1) { - if (1) { - { - B3_PROFILE("findSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); + { + B3_PROFILE("findSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(nPairs); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } + int num = nPairs; + launcher.launch1D(num); + clFinish(m_queue); + } + int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); - - { - B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()), - b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) - - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numDirections); - launcher.setConst( nPairs ); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); + { + B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL()), + b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true) - } + }; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(numDirections); + launcher.setConst(nPairs); + int num = nPairs; + launcher.launch1D(num); + clFinish(m_queue); + } } if (useMprGpu) { B3_PROFILE("findSeparatingAxisUnitSphereKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); - launcher.setConst( numDirections); - - launcher.setConst( nPairs ); - + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); + launcher.setConst(numDirections); + + launcher.setConst(nPairs); + int num = nPairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); } + } } - - - } else + else { B3_PROFILE("findSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(nPairs); int num = nPairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); } - - } - else - { - + else + { B3_PROFILE("findSeparatingAxisKernel CPU"); - - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; - convexData.copyToHost(hostConvexShapeData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<int> hostHasSepAxis; - hostHasSepAxis.resize(nPairs); - b3AlignedObjectArray<b3Vector3> hostSepAxis; - hostSepAxis.resize(nPairs); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - + + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; + convexData.copyToHost(hostConvexShapeData); + + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + + b3AlignedObjectArray<int> hostHasSepAxis; + hostHasSepAxis.resize(nPairs); + b3AlignedObjectArray<b3Vector3> hostSepAxis; + hostSepAxis.resize(nPairs); + + b3AlignedObjectArray<b3Vector3> hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray<b3Contact4> hostContacts; if (nContacts) { @@ -3201,61 +3008,56 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* } hostContacts.resize(maxContactCapacity); int nGlobalContactsOut = nContacts; - - - for (int i=0;i<nPairs;i++) - { - - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - hostHasSepAxis[i] = 0; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((hostBodyBuf[bodyIndexA].m_invMass==0) &&(hostBodyBuf[bodyIndexB].m_invMass==0)) - { - continue; - } - - - if ((hostCollidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(hostCollidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - continue; - } - - float dmin = FLT_MAX; - - b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; - b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; - b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; - b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; - b3Quaternion ornA =hostBodyBuf[bodyIndexA].m_quat; - b3Quaternion ornB =hostBodyBuf[bodyIndexB].m_quat; - - - if (useGjk) + + for (int i = 0; i < nPairs; i++) + { + int bodyIndexA = hostPairs[i].x; + int bodyIndexB = hostPairs[i].y; + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + + int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; + + hostHasSepAxis[i] = 0; + + //once the broadphase avoids static-static pairs, we can remove this test + if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0)) { + continue; + } + if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL)) + { + continue; + } + + float dmin = FLT_MAX; + + b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; + b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; + b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; + b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; + b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat; + b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat; + + if (useGjk) + { //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR { b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); + b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB); b3Vector3 DeltaC2 = c0 - c1; - + b3Vector3 sepAxis; - + bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + if (hasSepAxisA) { bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, @@ -3264,11 +3066,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* &sepAxis, &dmin); if (hasSepAxisB) { - bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin,false); - + bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin, false); + if (hasEdgeEdge) { hostHasSepAxis[i] = 1; @@ -3282,163 +3084,150 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* if (hostHasSepAxis[i]) { int pairIndex = i; - + bool useMpr = true; if (useMpr) { - int res=0; + int res = 0; float depth = 0.f; - b3Vector3 sepAxis2 = b3MakeVector3(1,0,0); - b3Vector3 resultPointOnBWorld = b3MakeVector3(0,0,0); + b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0); + b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0); - float depthOut; - b3Vector3 dirOut; - b3Vector3 posOut; - + float depthOut; + b3Vector3 dirOut; + b3Vector3 posOut; - //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); - res = b3MprPenetration(pairIndex,bodyIndexA,bodyIndexB,&hostBodyBuf[0],&hostConvexShapeData[0],&hostCollidables[0],&hostVertices[0],&hostSepAxis[0],&hostHasSepAxis[0],&depthOut,&dirOut,&posOut); - depth = depthOut; - sepAxis2 = b3MakeVector3(-dirOut.x,-dirOut.y,-dirOut.z); - resultPointOnBWorld = posOut; - //hostHasSepAxis[i] = 0; + //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); + res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut); + depth = depthOut; + sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z); + resultPointOnBWorld = posOut; + //hostHasSepAxis[i] = 0; + if (res == 0) + { + //add point? + //printf("depth = %f\n",depth); + //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); + //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); - if (res==0) - { - //add point? - //printf("depth = %f\n",depth); - //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); - //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); - - - - float dist=0.f; + float dist = 0.f; - const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; - const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; + const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; + const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; - if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) - { - if (depth > dist) + if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist)) { - float diff = depth - dist; - - static float maxdiff = 0.f; - if (maxdiff < diff) + if (depth > dist) { - maxdiff = diff; - printf("maxdiff = %20.10f\n",maxdiff); + float diff = depth - dist; + + static float maxdiff = 0.f; + if (maxdiff < diff) + { + maxdiff = diff; + printf("maxdiff = %20.10f\n", maxdiff); + } } } - } - if (depth > dmin) - { - b3Vector3 oldAxis = hostSepAxis[i]; - depth = dmin; - sepAxis2 = oldAxis; - } - - + if (depth > dmin) + { + b3Vector3 oldAxis = hostSepAxis[i]; + depth = dmin; + sepAxis2 = oldAxis; + } - if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) - { - if (depth > dist) + if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist)) { - float diff = depth - dist; - //printf("?diff = %f\n",diff ); - static float maxdiff = 0.f; - if (maxdiff < diff) + if (depth > dist) + { + float diff = depth - dist; + //printf("?diff = %f\n",diff ); + static float maxdiff = 0.f; + if (maxdiff < diff) + { + maxdiff = diff; + printf("maxdiff = %20.10f\n", maxdiff); + } + } + //this is used for SAT + //hostHasSepAxis[i] = 1; + //hostSepAxis[i] = sepAxis2; + + //add contact point + + //int contactIndex = nGlobalContactsOut; + b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); + nGlobalContactsOut++; + newContact.m_batchIdx = 0; //i; + newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; + newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; + + newContact.m_frictionCoeffCmp = 45874; + newContact.m_restituitionCoeffCmp = 0; + + static float maxDepth = 0.f; + + if (depth > maxDepth) { - maxdiff = diff; - printf("maxdiff = %20.10f\n",maxdiff); + maxDepth = depth; + printf("MPR maxdepth = %f\n", maxDepth); } + + resultPointOnBWorld.w = -depth; + newContact.m_worldPosB[0] = resultPointOnBWorld; + //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; + newContact.m_worldNormalOnB = sepAxis2; + newContact.m_worldNormalOnB.w = (b3Scalar)1; } - //this is used for SAT - //hostHasSepAxis[i] = 1; - //hostSepAxis[i] = sepAxis2; - - //add contact point - - //int contactIndex = nGlobalContactsOut; - b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); - nGlobalContactsOut++; - newContact.m_batchIdx = 0;//i; - newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; - newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; - - newContact.m_frictionCoeffCmp = 45874; - newContact.m_restituitionCoeffCmp = 0; - - - static float maxDepth = 0.f; - - if (depth > maxDepth) + else { - maxDepth = depth; - printf("MPR maxdepth = %f\n",maxDepth ); - + printf("rejected\n"); } - - - resultPointOnBWorld.w = -depth; - newContact.m_worldPosB[0] = resultPointOnBWorld; - //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; - newContact.m_worldNormalOnB = sepAxis2; - newContact.m_worldNormalOnB.w = (b3Scalar)1; - } else - { - printf("rejected\n"); } - - } - } else + else { - - - - //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - b3AlignedObjectArray<b3Contact4> oldHostContacts; + //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); + b3AlignedObjectArray<b3Contact4> oldHostContacts; int result; - result = computeContactConvexConvex2( //hostPairs, - pairIndex, - bodyIndexA, bodyIndexB, - collidableIndexA, collidableIndexB, - hostBodyBuf, - hostCollidables, - hostConvexShapeData, - hostVertices, - hostUniqueEdges, - hostIndices, - hostFaces, - hostContacts, - nGlobalContactsOut, - maxContactCapacity, - oldHostContacts - //hostHasSepAxis, - //hostSepAxis - - ); - }//mpr - }//hostHasSepAxis[i] = 1; - - } else + result = computeContactConvexConvex2( //hostPairs, + pairIndex, + bodyIndexA, bodyIndexB, + collidableIndexA, collidableIndexB, + hostBodyBuf, + hostCollidables, + hostConvexShapeData, + hostVertices, + hostUniqueEdges, + hostIndices, + hostFaces, + hostContacts, + nGlobalContactsOut, + maxContactCapacity, + oldHostContacts + //hostHasSepAxis, + //hostSepAxis + + ); + } //mpr + } //hostHasSepAxis[i] = 1; + } + else { - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); + b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB); b3Vector3 DeltaC2 = c0 - c1; - + b3Vector3 sepAxis; - + bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin); + if (hasSepAxisA) { bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, @@ -3447,11 +3236,11 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* &sepAxis, &dmin); if (hasSepAxisB) { - bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin,true); - + bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), + &sepAxis, &dmin, true); + if (hasEdgeEdge) { hostHasSepAxis[i] = 1; @@ -3460,21 +3249,21 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* } } } - } - - if (useGjkContacts)//nGlobalContactsOut>0) + } + + if (useGjkContacts) //nGlobalContactsOut>0) { //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); nContacts = nGlobalContactsOut; contactOut->copyFromHost(hostContacts); - - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); + + m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); } - - m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); - m_sepNormals.copyFromHost(hostSepAxis); - - /* + + m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); + m_sepNormals.copyFromHost(hostSepAxis); + + /* //double-check results from GPU (comment-out the 'else' so both paths are executed b3AlignedObjectArray<int> checkHasSepAxis; m_hasSeparatingNormals.copyToHost(checkHasSepAxis); @@ -3491,352 +3280,314 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); // m_sepNormals.copyFromHost(hostSepAxis); */ - } - - - numCompoundPairs = m_numCompoundPairsOut.at(0); - bool useGpuFindCompoundPairs=true; - if (useGpuFindCompoundPairs) - { - B3_PROFILE("findCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), - b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), - b3BufferInfoCL(subTreesGPU->getBufferCL()), - b3BufferInfoCL(treeNodesGPU->getBufferCL()), - b3BufferInfoCL(bvhInfo->getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); - launcher.setConst( compoundPairCapacity); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - numCompoundPairs = m_numCompoundPairsOut.at(0); - //printf("numCompoundPairs =%d\n",numCompoundPairs ); - if (numCompoundPairs) - { - //printf("numCompoundPairs=%d\n",numCompoundPairs); - } - - - } else - { - - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); + } - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); + numCompoundPairs = m_numCompoundPairsOut.at(0); + bool useGpuFindCompoundPairs = true; + if (useGpuFindCompoundPairs) + { + B3_PROFILE("findCompoundPairsKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()), + b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()), + b3BufferInfoCL(subTreesGPU->getBufferCL()), + b3BufferInfoCL(treeNodesGPU->getBufferCL()), + b3BufferInfoCL(bvhInfo->getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(nPairs); + launcher.setConst(compoundPairCapacity); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + int num = nPairs; + launcher.launch1D(num); + clFinish(m_queue); - b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); + numCompoundPairs = m_numCompoundPairsOut.at(0); + //printf("numCompoundPairs =%d\n",numCompoundPairs ); + if (numCompoundPairs) + { + //printf("numCompoundPairs=%d\n",numCompoundPairs); + } + } + else + { + b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; - cpuCompoundPairsOut.resize(compoundPairCapacity); + b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; + clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); + b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; + cpuCompoundPairsOut.resize(compoundPairCapacity); - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); - for (int pairIndex=0;pairIndex<nPairs;pairIndex++) - { - int bodyIndexA = hostPairs[pairIndex].x; - int bodyIndexB = hostPairs[pairIndex].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + for (int pairIndex = 0; pairIndex < nPairs; pairIndex++) + { + int bodyIndexA = hostPairs[pairIndex].x; + int bodyIndexB = hostPairs[pairIndex].y; + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; if (cpuChildShapes.size()) { - findCompoundPairsKernel( - pairIndex, - bodyIndexA, - bodyIndexB, - collidableIndexA, - collidableIndexB, - &hostBodyBuf[0], - &hostCollidables[0], - &hostConvexData[0], - hostVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - &cpuChildShapes[0], - &cpuCompoundPairsOut[0], - &numCompoundPairs, - compoundPairCapacity, - treeNodesCPU, - subTreesCPU, - bvhInfoCPU - ); + findCompoundPairsKernel( + pairIndex, + bodyIndexA, + bodyIndexB, + collidableIndexA, + collidableIndexB, + &hostBodyBuf[0], + &hostCollidables[0], + &hostConvexData[0], + hostVertices, + hostAabbsWorldSpace, + hostAabbsLocalSpace, + &cpuChildShapes[0], + &cpuCompoundPairsOut[0], + &numCompoundPairs, + compoundPairCapacity, + treeNodesCPU, + subTreesCPU, + bvhInfoCPU); } - } - + } - m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs,1,0,true); + m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true); if (numCompoundPairs) { b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0]; - m_gpuCompoundPairs.copyFromHostPointer(ptr,numCompoundPairs,0,true); + m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true); } //cpuCompoundPairsOut - - } + } if (numCompoundPairs) { - printf("numCompoundPairs=%d\n",numCompoundPairs); + printf("numCompoundPairs=%d\n", numCompoundPairs); } - if (numCompoundPairs > compoundPairCapacity) - { - b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); - numCompoundPairs = compoundPairCapacity; - } - - - - m_gpuCompoundPairs.resize(numCompoundPairs); - m_gpuHasCompoundSepNormals.resize(numCompoundPairs); - m_gpuCompoundSepNormals.resize(numCompoundPairs); - - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsPrimitivesKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numCompoundPairs ); - launcher.setConst(maxContactCapacity); - - int num = numCompoundPairs; - launcher.launch1D( num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); - if (nContacts>maxContactCapacity) - { - - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL()), - b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numCompoundPairs ); - - int num = numCompoundPairs; - launcher.launch1D( num); - clFinish(m_queue); - - } - - - //printf("numConcave = %d\n",numConcave); - - - -// printf("hostNormals.size()=%d\n",hostNormals.size()); + if (numCompoundPairs > compoundPairCapacity) + { + b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); + numCompoundPairs = compoundPairCapacity; + } + + m_gpuCompoundPairs.resize(numCompoundPairs); + m_gpuHasCompoundSepNormals.resize(numCompoundPairs); + m_gpuCompoundSepNormals.resize(numCompoundPairs); + + if (numCompoundPairs) + { + B3_PROFILE("processCompoundPairsPrimitivesKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(numCompoundPairs); + launcher.setConst(maxContactCapacity); + + int num = numCompoundPairs; + launcher.launch1D(num); + clFinish(m_queue); + nContacts = m_totalContactsOut.at(0); + //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); + if (nContacts > maxContactCapacity) + { + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; + } + } + + if (numCompoundPairs) + { + B3_PROFILE("processCompoundPairsKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()), + b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(numCompoundPairs); + + int num = numCompoundPairs; + launcher.launch1D(num); + clFinish(m_queue); + } + + //printf("numConcave = %d\n",numConcave); + + // printf("hostNormals.size()=%d\n",hostNormals.size()); //int numPairs = pairCount.at(0); - - - } int vertexFaceCapacity = 64; - - { //now perform the tree query on GPU - - - - + if (treeNodesGPU->size() && treeNodesGPU->size()) { if (bvhTraversalKernelGPU) { - B3_PROFILE("m_bvhTraversalKernel"); - - + numConcavePairs = m_numConcavePairsOut.at(0); - - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); - launcher.setBuffer( pairs->getBufferCL()); - launcher.setBuffer( bodyBuf->getBufferCL()); - launcher.setBuffer( gpuCollidables.getBufferCL()); - launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer( subTreesGPU->getBufferCL()); - launcher.setBuffer( treeNodesGPU->getBufferCL()); - launcher.setBuffer( bvhInfo->getBufferCL()); - - launcher.setConst( nPairs ); - launcher.setConst( maxTriConvexPairCapacity); + + b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel"); + launcher.setBuffer(pairs->getBufferCL()); + launcher.setBuffer(bodyBuf->getBufferCL()); + launcher.setBuffer(gpuCollidables.getBufferCL()); + launcher.setBuffer(clAabbsWorldSpace.getBufferCL()); + launcher.setBuffer(triangleConvexPairsOut.getBufferCL()); + launcher.setBuffer(m_numConcavePairsOut.getBufferCL()); + launcher.setBuffer(subTreesGPU->getBufferCL()); + launcher.setBuffer(treeNodesGPU->getBufferCL()); + launcher.setBuffer(bvhInfo->getBufferCL()); + + launcher.setConst(nPairs); + launcher.setConst(maxTriConvexPairCapacity); int num = nPairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); numConcavePairs = m_numConcavePairsOut.at(0); - } else + } + else { - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - //int maxTriConvexPairCapacity, - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + //int maxTriConvexPairCapacity, + b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; + triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - //int numTriConvexPairsOutHost=0; - numConcavePairs = 0; - //m_numConcavePairsOut + //int numTriConvexPairsOutHost=0; + numConcavePairs = 0; + //m_numConcavePairsOut - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - //compute it... + b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; + treeNodesGPU->copyToHost(treeNodesCPU); + b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; + subTreesGPU->copyToHost(subTreesCPU); + b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; + bvhInfo->copyToHost(bvhInfoCPU); + //compute it... - volatile int hostNumConcavePairsOut=0; + volatile int hostNumConcavePairsOut = 0; - // - for (int i=0;i<nPairs;i++) - { - b3BvhTraversal( &hostPairs.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostAabbsWorldSpace.at(0), - &triangleConvexPairsOutHost.at(0), - &hostNumConcavePairsOut, - &subTreesCPU.at(0), - &treeNodesCPU.at(0), - &bvhInfoCPU.at(0), - nPairs, - maxTriConvexPairCapacity, - i); - } - numConcavePairs = hostNumConcavePairsOut; + // + for (int i = 0; i < nPairs; i++) + { + b3BvhTraversal(&hostPairs.at(0), + &hostBodyBuf.at(0), + &hostCollidables.at(0), + &hostAabbsWorldSpace.at(0), + &triangleConvexPairsOutHost.at(0), + &hostNumConcavePairsOut, + &subTreesCPU.at(0), + &treeNodesCPU.at(0), + &bvhInfoCPU.at(0), + nPairs, + maxTriConvexPairCapacity, + i); + } + numConcavePairs = hostNumConcavePairsOut; - if (hostNumConcavePairsOut) - { - triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); - triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); - } - // + if (hostNumConcavePairsOut) + { + triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); + triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); + } + // - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(numConcavePairs); + m_numConcavePairsOut.resize(0); + m_numConcavePairsOut.push_back(numConcavePairs); } - //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); - + //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); + if (numConcavePairs > maxTriConvexPairCapacity) { static int exceeded_maxTriConvexPairCapacity_count = 0; b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", - numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); + numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++); numConcavePairs = maxTriConvexPairCapacity; } triangleConvexPairsOut.resize(numConcavePairs); - + if (numConcavePairs) { - - - - clippingFacesOutGPU.resize(numConcavePairs); worldNormalsAGPU.resize(numConcavePairs); - worldVertsA1GPU.resize(vertexFaceCapacity*(numConcavePairs)); - worldVertsB1GPU.resize(vertexFaceCapacity*(numConcavePairs)); - + worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs)); + worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs)); if (findConcaveSeparatingAxisKernelGPU) { - /* m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); @@ -3846,236 +3597,213 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* */ //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - if (splitSearchSepAxisConcave) - { - //printf("numConcavePairs = %d\n",numConcavePairs); - m_dmins.resize(numConcavePairs); - { - B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel,"m_findConcaveSeparatingAxisVertexFaceKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - - - } -// numConcavePairs = 0; - if (1) - { - B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel,"m_findConcaveSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - - // numConcavePairs = 0; - - - - - - - } else - { - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - - } else - { - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; + if (splitSearchSepAxisConcave) + { + //printf("numConcavePairs = %d\n",numConcavePairs); + m_dmins.resize(numConcavePairs); + { + B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(numConcavePairs); - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); + int num = numConcavePairs; + launcher.launch1D(num); + clFinish(m_queue); + } + // numConcavePairs = 0; + if (1) + { + B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(m_dmins.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(numConcavePairs); - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); + int num = numConcavePairs; + launcher.launch1D(num); + clFinish(m_queue); + } - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); + // numConcavePairs = 0; + } + else + { + B3_PROFILE("findConcaveSeparatingAxisKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(numConcavePairs); - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); + int num = numConcavePairs; + launcher.launch1D(num); + clFinish(m_queue); + } + } + else + { + b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; + b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; + b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; + triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); + //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); + b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; + clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); - b3GpuChildShape* childShapePointerCPU = 0; - if (cpuChildShapes.size()) - childShapePointerCPU = &cpuChildShapes.at(0); + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); - clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - worldNormalsACPU.resize(worldNormalsAGPU.size()); - worldVertsB1CPU.resize(worldVertsB1GPU.size()); + b3AlignedObjectArray<b3Vector3> hostUniqueEdges; + gpuUniqueEdges.copyToHost(hostUniqueEdges); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); + b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; + gpuChildShapes.copyToHost(cpuChildShapes); - for (int i=0;i<numConcavePairs;i++) - { - b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostConvexData.at(0), &hostVertices.at(0),&hostUniqueEdges.at(0), - &hostFaces.at(0),&hostIndices.at(0),childShapePointerCPU, - &hostAabbsWorldSpace.at(0), - &concaveSepNormalsHost.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - vertexFaceCapacity, - numConcavePairs,i); - }; + b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; + m_concaveSepNormals.copyToHost(concaveSepNormalsHost); + concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); - m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + b3GpuChildShape* childShapePointerCPU = 0; + if (cpuChildShapes.size()) + childShapePointerCPU = &cpuChildShapes.at(0); + clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); + worldVertsA1CPU.resize(worldVertsA1GPU.size()); + worldNormalsACPU.resize(worldNormalsAGPU.size()); + worldVertsB1CPU.resize(worldVertsB1GPU.size()); + for (int i = 0; i < numConcavePairs; i++) + { + b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), + &hostBodyBuf.at(0), + &hostCollidables.at(0), + &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), + &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU, + &hostAabbsWorldSpace.at(0), + &concaveSepNormalsHost.at(0), + &clippingFacesOutCPU.at(0), + &worldVertsA1CPU.at(0), + &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + &concaveHasSeparatingNormalsCPU.at(0), + vertexFaceCapacity, + numConcavePairs, i); + }; + m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); + m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); } -// b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; -// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); -// b3AlignedObjectArray<b3Int4> cpuConcavePairs; -// triangleConvexPairsOut.copyToHost(cpuConcavePairs); - - + // b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; + // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); + // b3AlignedObjectArray<b3Int4> cpuConcavePairs; + // triangleConvexPairsOut.copyToHost(cpuConcavePairs); } } - - } if (numConcavePairs) { - if (numConcavePairs) + if (numConcavePairs) { B3_PROFILE("findConcaveSphereContactsKernel"); - nContacts = m_totalContactsOut.at(0); -// printf("nContacts1 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - - launcher.setConst( numConcavePairs ); + nContacts = m_totalContactsOut.at(0); + // printf("nContacts1 = %d\n",nContacts); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + + launcher.setConst(numConcavePairs); launcher.setConst(maxContactCapacity); int num = numConcavePairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); @@ -4088,11 +3816,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* nContacts = maxContactCapacity; } } - } - - #ifdef __APPLE__ bool contactClippingOnGpu = true; #else @@ -4101,9 +3826,8 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* if (contactClippingOnGpu) { - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -// printf("nContacts3 = %d\n",nContacts); - + m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); + // printf("nContacts3 = %d\n",nContacts); //B3_PROFILE("clipHullHullKernel"); @@ -4122,15 +3846,12 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* if (breakupConcaveConvexKernel) { - - worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); - + worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs); //clipFacesAndFindContacts if (clipConcaveFacesAndFindContactsCPU) { - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; b3AlignedObjectArray<b3Vector3> worldNormalsACPU; @@ -4141,120 +3862,108 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* worldNormalsAGPU.copyToHost(worldNormalsACPU); worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - - b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; + b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; worldVertsB2CPU.resize(worldVertsB2GPU.size()); - - for (int i=0;i<numConcavePairs;i++) + for (int i = 0; i < numConcavePairs; i++) { - - clipFacesAndFindContactsKernel( &concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - vertexFaceCapacity, - i); + clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0), + &concaveHasSeparatingNormalsCPU.at(0), + &clippingFacesOutCPU.at(0), + &worldVertsA1CPU.at(0), + &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + &worldVertsB2CPU.at(0), + vertexFaceCapacity, + i); } clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - - - } else + } + else { - if (1) { - - - B3_PROFILE("clipFacesAndFindContacts"); //nContacts = m_totalContactsOut.at(0); //int h = m_hasSeparatingNormals.at(0); //int4 p = clippingFacesOutGPU.at(0); b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); + launcher.setConst(numConcavePairs); int debugMode = 0; - launcher.setConst( debugMode); + launcher.setConst(debugMode); int num = numConcavePairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); //int bla = m_totalContactsOut.at(0); } } //contactReduction { - int newContactCapacity=nContacts+numConcavePairs; + int newContactCapacity = nContacts + numConcavePairs; contactOut->reserve(newContactCapacity); if (reduceConcaveContactsOnGPU) { -// printf("newReservation = %d\n",newReservation); + // printf("newReservation = %d\n",newReservation); { B3_PROFILE("newContactReductionKernel"); b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + { + b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); launcher.setConst(vertexFaceCapacity); launcher.setConst(newContactCapacity); - launcher.setConst( numConcavePairs ); + launcher.setConst(numConcavePairs); int num = numConcavePairs; - launcher.launch1D( num); + launcher.launch1D(num); } nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - }else + } + else { - volatile int nGlobalContactsOut = nContacts; b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; + b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - b3AlignedObjectArray<b3Contact4> hostContacts; if (nContacts) { @@ -4268,67 +3977,59 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - - for (int i=0;i<numConcavePairs;i++) + for (int i = 0; i < numConcavePairs; i++) { - b3NewContactReductionKernel( &triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &hostContacts.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - numConcavePairs, - i - ); - + b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0), + &hostBodyBuf.at(0), + &concaveSepNormalsHost.at(0), + &concaveHasSeparatingNormalsCPU.at(0), + &hostContacts.at(0), + &clippingFacesOutCPU.at(0), + &worldVertsB2CPU.at(0), + &nGlobalContactsOut, + vertexFaceCapacity, + newContactCapacity, + numConcavePairs, + i); } - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -// nContacts = m_totalContactsOut.at(0); + m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); + // nContacts = m_totalContactsOut.at(0); //contactOut->resize(nContacts); hostContacts.resize(nContacts); //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); contactOut->copyFromHost(hostContacts); } - } //re-use? - - - } else + } + else { B3_PROFILE("clipHullHullConcaveConvexKernel"); nContacts = m_totalContactsOut.at(0); int newContactCapacity = contactOut->capacity(); //printf("contactOut5 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); launcher.setConst(newContactCapacity); - launcher.setConst( numConcavePairs ); + launcher.setConst(numConcavePairs); int num = numConcavePairs; - launcher.launch1D( num); + launcher.launch1D(num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); contactOut->resize(nContacts); @@ -4337,12 +4038,10 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* contactOut->copyToHost(cpuContacts); } // printf("nContacts after = %d\n", nContacts); - }//numConcavePairs - - + } //numConcavePairs //convex-convex contact clipping - + bool breakupKernel = false; #ifdef __APPLE__ @@ -4350,166 +4049,149 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* #endif #ifdef CHECK_ON_HOST - bool computeConvexConvex = false; + bool computeConvexConvex = false; #else - bool computeConvexConvex = true; -#endif//CHECK_ON_HOST + bool computeConvexConvex = true; +#endif //CHECK_ON_HOST if (computeConvexConvex) { B3_PROFILE("clipHullHullKernel"); - if (breakupKernel) - { - - - - - worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - clippingFacesOutGPU.resize(nPairs); - worldNormalsAGPU.resize(nPairs); - worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); - worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); - - if (findConvexClippingFacesGPU) - { - B3_PROFILE("findClippingFacesKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( vertexFaceCapacity); - launcher.setConst( nPairs ); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - } else + if (breakupKernel) { - - float minDist = -1e30f; - float maxDist = 0.02f; + worldVertsB1GPU.resize(vertexFaceCapacity * nPairs); + clippingFacesOutGPU.resize(nPairs); + worldNormalsAGPU.resize(nPairs); + worldVertsA1GPU.resize(vertexFaceCapacity * nPairs); + worldVertsB2GPU.resize(vertexFaceCapacity * nPairs); - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); + if (findConvexClippingFacesGPU) + { + B3_PROFILE("findClippingFacesKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(nPairs); + int num = nPairs; + launcher.launch1D(num); + clFinish(m_queue); + } + else + { + float minDist = -1e30f; + float maxDist = 0.02f; - b3AlignedObjectArray<int> hostHasSepNormals; - m_hasSeparatingNormals.copyToHost(hostHasSepNormals); - b3AlignedObjectArray<b3Vector3> cpuSepNormals; - m_sepNormals.copyToHost(cpuSepNormals); + b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; + convexData.copyToHost(hostConvexData); + b3AlignedObjectArray<b3Collidable> hostCollidables; + gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<int> hostHasSepNormals; + m_hasSeparatingNormals.copyToHost(hostHasSepNormals); + b3AlignedObjectArray<b3Vector3> cpuSepNormals; + m_sepNormals.copyToHost(cpuSepNormals); + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); - //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); + //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + worldVertsB1GPU.copyToHost(worldVertsB1CPU); - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); + b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; + clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - worldNormalsACPU.resize(nPairs); + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + worldNormalsACPU.resize(nPairs); - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - - - for (int i=0;i<nPairs;i++) - { + b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; + worldVertsA1CPU.resize(worldVertsA1GPU.size()); - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - + b3AlignedObjectArray<b3Vector3> hostVertices; + gpuVertices.copyToHost(hostVertices); + b3AlignedObjectArray<b3GpuFace> hostFaces; + gpuFaces.copyToHost(hostFaces); + b3AlignedObjectArray<int> hostIndices; + gpuIndices.copyToHost(hostIndices); - if (hostHasSepNormals[i]) + for (int i = 0; i < nPairs; i++) { - b3FindClippingFaces(cpuSepNormals[i], - &hostConvexData[shapeIndexA], - &hostConvexData[shapeIndexB], - hostBodyBuf[bodyIndexA].m_pos,hostBodyBuf[bodyIndexA].m_quat, - hostBodyBuf[bodyIndexB].m_pos,hostBodyBuf[bodyIndexB].m_quat, - &worldVertsA1CPU.at(0),&worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - vertexFaceCapacity,minDist,maxDist, - &hostVertices.at(0),&hostFaces.at(0), - &hostIndices.at(0), - &hostVertices.at(0),&hostFaces.at(0), - &hostIndices.at(0),&clippingFacesOutCPU.at(0),i); - } - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - - } + int bodyIndexA = hostPairs[i].x; + int bodyIndexB = hostPairs[i].y; + int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; + int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; + int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; + if (hostHasSepNormals[i]) + { + b3FindClippingFaces(cpuSepNormals[i], + &hostConvexData[shapeIndexA], + &hostConvexData[shapeIndexB], + hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat, + hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat, + &worldVertsA1CPU.at(0), &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + vertexFaceCapacity, minDist, maxDist, + &hostVertices.at(0), &hostFaces.at(0), + &hostIndices.at(0), + &hostVertices.at(0), &hostFaces.at(0), + &hostIndices.at(0), &clippingFacesOutCPU.at(0), i); + } + } + clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); + worldVertsA1GPU.copyFromHost(worldVertsA1CPU); + worldNormalsAGPU.copyFromHost(worldNormalsACPU); + worldVertsB1GPU.copyFromHost(worldVertsB1CPU); + } - ///clip face B against face A, reduce contacts and append them to a global contact array - if (1) - { - if (clipConvexFacesAndFindContactsCPU) + ///clip face B against face A, reduce contacts and append them to a global contact array + if (1) { + if (clipConvexFacesAndFindContactsCPU) + { + //b3AlignedObjectArray<b3Int4> hostPairs; + //pairs->copyToHost(hostPairs); - //b3AlignedObjectArray<b3Int4> hostPairs; - //pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3Vector3> hostSepNormals; + m_sepNormals.copyToHost(hostSepNormals); + b3AlignedObjectArray<int> hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray<b3Vector3> hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray<int> hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + b3AlignedObjectArray<b3Int4> hostClippingFaces; + clippingFacesOutGPU.copyToHost(hostClippingFaces); + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + worldVertsB2CPU.resize(vertexFaceCapacity * nPairs); - b3AlignedObjectArray<b3Int4> hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2CPU.resize(vertexFaceCapacity*nPairs); - - b3AlignedObjectArray<b3Vector3>worldVertsA1CPU; - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - worldNormalsAGPU.copyToHost(worldNormalsACPU); + b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; + worldVertsA1GPU.copyToHost(worldVertsA1CPU); + b3AlignedObjectArray<b3Vector3> worldNormalsACPU; + worldNormalsAGPU.copyToHost(worldNormalsACPU); - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); + b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; + worldVertsB1GPU.copyToHost(worldVertsB1CPU); - /* + /* __global const b3Float4* separatingNormals, __global const int* hasSeparatingAxis, __global b3Int4* clippingFacesOut, @@ -4520,214 +4202,207 @@ void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* int vertexFaceCapacity, int pairIndex */ - for (int i=0;i<nPairs;i++) - { - clipFacesAndFindContactsKernel( - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostClippingFaces.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(hostClippingFaces); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); + for (int i = 0; i < nPairs; i++) + { + clipFacesAndFindContactsKernel( + &hostSepNormals.at(0), + &hostHasSepAxis.at(0), + &hostClippingFaces.at(0), + &worldVertsA1CPU.at(0), + &worldNormalsACPU.at(0), + &worldVertsB1CPU.at(0), + &worldVertsB2CPU.at(0), - } else - { - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) - }; + vertexFaceCapacity, + i); + } - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); + clippingFacesOutGPU.copyFromHost(hostClippingFaces); + worldVertsB2GPU.copyFromHost(worldVertsB2CPU); + } + else + { + B3_PROFILE("clipFacesAndFindContacts"); + //nContacts = m_totalContactsOut.at(0); + //int h = m_hasSeparatingNormals.at(0); + //int4 p = clippingFacesOutGPU.at(0); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), + b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), + b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(vertexFaceCapacity); - launcher.setConst( nPairs ); - int debugMode = 0; - launcher.setConst( debugMode); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } + launcher.setConst(nPairs); + int debugMode = 0; + launcher.setConst(debugMode); + int num = nPairs; + launcher.launch1D(num); + clFinish(m_queue); + } - { - nContacts = m_totalContactsOut.at(0); - //printf("nContacts = %d\n",nContacts); + { + nContacts = m_totalContactsOut.at(0); + //printf("nContacts = %d\n",nContacts); - int newContactCapacity = nContacts+nPairs; - contactOut->reserve(newContactCapacity); + int newContactCapacity = nContacts + nPairs; + contactOut->reserve(newContactCapacity); - if (reduceConvexContactsOnGPU) - { + if (reduceConvexContactsOnGPU) { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst( nPairs ); - int num = nPairs; - - launcher.launch1D( num); + B3_PROFILE("newContactReductionKernel"); + b3BufferInfoCL bInfo[] = + { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), + b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + + b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(vertexFaceCapacity); + launcher.setConst(newContactCapacity); + launcher.setConst(nPairs); + int num = nPairs; + + launcher.launch1D(num); + } + nContacts = m_totalContactsOut.at(0); + contactOut->resize(nContacts); } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } else - { - - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Vector3> hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray<int> hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray<b3Contact4> hostContactsOut; - contactOut->copyToHost(hostContactsOut); - hostContactsOut.resize(newContactCapacity); - - b3AlignedObjectArray<b3Int4> hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - for (int i=0;i<nPairs;i++) + else { - b3NewContactReductionKernel(&hostPairs.at(0), - &hostBodyBuf.at(0), - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostContactsOut.at(0), - &hostClippingFaces.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - nPairs, - i); + volatile int nGlobalContactsOut = nContacts; + b3AlignedObjectArray<b3Int4> hostPairs; + pairs->copyToHost(hostPairs); + b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; + bodyBuf->copyToHost(hostBodyBuf); + b3AlignedObjectArray<b3Vector3> hostSepNormals; + m_sepNormals.copyToHost(hostSepNormals); + b3AlignedObjectArray<int> hostHasSepAxis; + m_hasSeparatingNormals.copyToHost(hostHasSepAxis); + b3AlignedObjectArray<b3Contact4> hostContactsOut; + contactOut->copyToHost(hostContactsOut); + hostContactsOut.resize(newContactCapacity); + + b3AlignedObjectArray<b3Int4> hostClippingFaces; + clippingFacesOutGPU.copyToHost(hostClippingFaces); + b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; + worldVertsB2GPU.copyToHost(worldVertsB2CPU); + + for (int i = 0; i < nPairs; i++) + { + b3NewContactReductionKernel(&hostPairs.at(0), + &hostBodyBuf.at(0), + &hostSepNormals.at(0), + &hostHasSepAxis.at(0), + &hostContactsOut.at(0), + &hostClippingFaces.at(0), + &worldVertsB2CPU.at(0), + &nGlobalContactsOut, + vertexFaceCapacity, + newContactCapacity, + nPairs, + i); + } + + nContacts = nGlobalContactsOut; + m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); + hostContactsOut.resize(nContacts); + //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); + contactOut->copyFromHost(hostContactsOut); } + // b3Contact4 pt = contactOut->at(0); + // printf("nContacts = %d\n",nContacts); + } + } + } + else //breakupKernel + { + if (nPairs) + { + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(pairs->getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(m_sepNormals.getBufferCL()), + b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(nPairs); + launcher.setConst(maxContactCapacity); - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - hostContactsOut.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContactsOut); + int num = nPairs; + launcher.launch1D(num); + clFinish(m_queue); + + nContacts = m_totalContactsOut.at(0); + if (nContacts >= maxContactCapacity) + { + b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity); + nContacts = maxContactCapacity; } - // b3Contact4 pt = contactOut->at(0); - // printf("nContacts = %d\n",nContacts); + contactOut->resize(nContacts); } } - } - else//breakupKernel - { - if (nPairs) + int nCompoundsPairs = m_gpuCompoundPairs.size(); + + if (nCompoundsPairs) { b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); + b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), + b3BufferInfoCL(bodyBuf->getBufferCL(), true), + b3BufferInfoCL(gpuCollidables.getBufferCL(), true), + b3BufferInfoCL(convexData.getBufferCL(), true), + b3BufferInfoCL(gpuVertices.getBufferCL(), true), + b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), + b3BufferInfoCL(gpuFaces.getBufferCL(), true), + b3BufferInfoCL(gpuIndices.getBufferCL(), true), + b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), + b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true), + b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true), + b3BufferInfoCL(contactOut->getBufferCL()), + b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); + launcher.setConst(nCompoundsPairs); launcher.setConst(maxContactCapacity); - int num = nPairs; - launcher.launch1D( num); + int num = nCompoundsPairs; + launcher.launch1D(num); clFinish(m_queue); nContacts = m_totalContactsOut.at(0); - if (nContacts >= maxContactCapacity) + if (nContacts > maxContactCapacity) { - b3Error("Exceeded contact capacity (%d/%d)\n",nContacts,maxContactCapacity); + b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); nContacts = maxContactCapacity; } contactOut->resize(nContacts); - } - } - - - int nCompoundsPairs = m_gpuCompoundPairs.size(); - - if (nCompoundsPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL(),true), - b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nCompoundsPairs ); - launcher.setConst(maxContactCapacity); - - int num = nCompoundsPairs; - launcher.launch1D( num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts>maxContactCapacity) - { - - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - }//if nCompoundsPairs + } //if nCompoundsPairs } - }//contactClippingOnGpu + } //contactClippingOnGpu //printf("nContacts end = %d\n",nContacts); - + //printf("frameCount = %d\n",frameCount++); } |