diff options
Diffstat (limited to 'thirdparty/bullet/src/Bullet3OpenCL')
124 files changed, 0 insertions, 57881 deletions
diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h deleted file mode 100644 index 0ed8aa8232..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h +++ /dev/null @@ -1,44 +0,0 @@ - -#ifndef B3_GPU_BROADPHASE_INTERFACE_H -#define B3_GPU_BROADPHASE_INTERFACE_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3Vector3.h" -#include "b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" - -class b3GpuBroadphaseInterface -{ -public: - - typedef class b3GpuBroadphaseInterface* (CreateFunc)(cl_context ctx,cl_device_id device, cl_command_queue q); - - virtual ~b3GpuBroadphaseInterface() - { - } - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask)=0; - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask)=0; - - virtual void calculateOverlappingPairs(int maxPairs)=0; - virtual void calculateOverlappingPairsHost(int maxPairs)=0; - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu()=0; - - virtual cl_mem getAabbBufferWS()=0; - virtual int getNumOverlap()=0; - virtual cl_mem getOverlappingPairBuffer()=0; - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()=0; - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()=0; - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() = 0; - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() = 0; - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() = 0; - -}; - -#endif //B3_GPU_BROADPHASE_INTERFACE_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp deleted file mode 100644 index 74d0c8056c..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ /dev/null @@ -1,385 +0,0 @@ - -#include "b3GpuGridBroadphase.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "kernels/gridBroadphaseKernels.h" -#include "kernels/sapKernels.h" -//#include "kernels/gridBroadphase.cl" - - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - - - -#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" -#define B3_GRID_BROADPHASE_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl" - -cl_kernel kCalcHashAABB; -cl_kernel kClearCellStart; -cl_kernel kFindCellStart; -cl_kernel kFindOverlappingPairs; -cl_kernel m_copyAabbsKernel; -cl_kernel m_sap2Kernel; - - - - - -//int maxPairsPerBody = 64; -int maxBodiesPerCell = 256;//?? - -b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q ) -:m_context(ctx), -m_device(device), -m_queue(q), -m_allAabbsGPU1(ctx,q), -m_smallAabbsMappingGPU(ctx,q), -m_largeAabbsMappingGPU(ctx,q), -m_gpuPairs(ctx,q), - -m_hashGpu(ctx,q), - -m_cellStartGpu(ctx,q), -m_paramsGPU(ctx,q) -{ - - - b3Vector3 gridSize = b3MakeVector3(3,3,3); - b3Vector3 invGridSize = b3MakeVector3(1.f/gridSize[0],1.f/gridSize[1],1.f/gridSize[2]); - - m_paramsCPU.m_gridSize[0] = 128; - m_paramsCPU.m_gridSize[1] = 128; - m_paramsCPU.m_gridSize[2] = 128; - m_paramsCPU.m_gridSize[3] = maxBodiesPerCell; - m_paramsCPU.setMaxBodiesPerCell(maxBodiesPerCell); - m_paramsCPU.m_invCellSize[0] = invGridSize[0]; - m_paramsCPU.m_invCellSize[1] = invGridSize[1]; - m_paramsCPU.m_invCellSize[2] = invGridSize[2]; - m_paramsCPU.m_invCellSize[3] = 0.f; - m_paramsGPU.push_back(m_paramsCPU); - - cl_int errNum=0; - - { - const char* sapSrc = sapCL; - cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH); - b3Assert(errNum==CL_SUCCESS); - m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg ); - m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg ); - b3Assert(errNum==CL_SUCCESS); - } - - { - - cl_program gridProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,gridBroadphaseCL,&errNum,"",B3_GRID_BROADPHASE_PATH); - b3Assert(errNum==CL_SUCCESS); - - kCalcHashAABB = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kCalcHashAABB",&errNum,gridProg); - b3Assert(errNum==CL_SUCCESS); - - kClearCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kClearCellStart",&errNum,gridProg); - b3Assert(errNum==CL_SUCCESS); - - kFindCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindCellStart",&errNum,gridProg); - b3Assert(errNum==CL_SUCCESS); - - - kFindOverlappingPairs = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,gridBroadphaseCL, "kFindOverlappingPairs",&errNum,gridProg); - b3Assert(errNum==CL_SUCCESS); - - - - - } - - m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue); - -} -b3GpuGridBroadphase::~b3GpuGridBroadphase() -{ - clReleaseKernel( kCalcHashAABB); - clReleaseKernel( kClearCellStart); - clReleaseKernel( kFindCellStart); - clReleaseKernel( kFindOverlappingPairs); - clReleaseKernel( m_sap2Kernel); - clReleaseKernel( m_copyAabbsKernel); - - - - delete m_sorter; -} - - - -void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask) -{ - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr; - m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); - - m_allAabbsCPU1.push_back(aabb); - -} -void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask) -{ - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size();//NOT userPtr; - m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); - - m_allAabbsCPU1.push_back(aabb); -} - -void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) -{ - B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs"); - - - if (0) - { - calculateOverlappingPairsHost(maxPairs); - /* - b3AlignedObjectArray<b3Int4> cpuPairs; - m_gpuPairs.copyToHost(cpuPairs); - printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size()); - for (int i=0;i<m_gpuPairs.size();i++) - { - printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y); - } - */ - return; - } - - - - - - int numSmallAabbs = m_smallAabbsMappingGPU.size(); - - b3OpenCLArray<int> pairCount(m_context,m_queue); - pairCount.push_back(0); - m_gpuPairs.resize(maxPairs);//numSmallAabbs*maxPairsPerBody); - - { - int numLargeAabbs = m_largeAabbsMappingGPU.size(); - if (numLargeAabbs && numSmallAabbs) - { - B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU1.getBufferCL() ), - b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ), - b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), - b3BufferInfoCL( m_gpuPairs.getBufferCL() ), - b3BufferInfoCL(pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numLargeAabbs ); - launcher.setConst( numSmallAabbs); - launcher.setConst( 0 );//axis is not used - launcher.setConst( maxPairs ); - //@todo: use actual maximum work item sizes of the device instead of hardcoded values - launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); - - int numPairs = pairCount.at(0); - - if (numPairs >maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs =maxPairs; - } - } - } - - - - - if (numSmallAabbs) - { - B3_PROFILE("gridKernel"); - m_hashGpu.resize(numSmallAabbs); - { - B3_PROFILE("kCalcHashAABB"); - b3LauncherCL launch(m_queue,kCalcHashAABB,"kCalcHashAABB"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_allAabbsGPU1.getBufferCL()); - launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(this->m_paramsGPU.getBufferCL()); - launch.launch1D(numSmallAabbs); - } - - m_sorter->execute(m_hashGpu); - - int numCells = this->m_paramsCPU.m_gridSize[0]*this->m_paramsCPU.m_gridSize[1]*this->m_paramsCPU.m_gridSize[2]; - m_cellStartGpu.resize(numCells); - //b3AlignedObjectArray<int > cellStartCpu; - - - { - B3_PROFILE("kClearCellStart"); - b3LauncherCL launch(m_queue,kClearCellStart,"kClearCellStart"); - launch.setConst(numCells); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - launch.launch1D(numCells); - //m_cellStartGpu.copyToHost(cellStartCpu); - //printf("??\n"); - - } - - - { - B3_PROFILE("kFindCellStart"); - b3LauncherCL launch(m_queue,kFindCellStart,"kFindCellStart"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - launch.launch1D(numSmallAabbs); - //m_cellStartGpu.copyToHost(cellStartCpu); - //printf("??\n"); - - } - - { - B3_PROFILE("kFindOverlappingPairs"); - - - b3LauncherCL launch(m_queue,kFindOverlappingPairs,"kFindOverlappingPairs"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_allAabbsGPU1.getBufferCL()); - launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - - launch.setBuffer(m_paramsGPU.getBufferCL()); - //launch.setBuffer(0); - launch.setBuffer(pairCount.getBufferCL()); - launch.setBuffer(m_gpuPairs.getBufferCL()); - - launch.setConst(maxPairs); - launch.launch1D(numSmallAabbs); - - - int numPairs = pairCount.at(0); - if (numPairs >maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs =maxPairs; - } - - m_gpuPairs.resize(numPairs); - - if (0) - { - b3AlignedObjectArray<b3Int4> pairsCpu; - m_gpuPairs.copyToHost(pairsCpu); - - int sz = m_gpuPairs.size(); - printf("m_gpuPairs.size()=%d\n",sz); - for (int i=0;i<m_gpuPairs.size();i++) - { - printf("pair %d = %d,%d\n",i,pairsCpu[i].x,pairsCpu[i].y); - } - - printf("?!?\n"); - } - - } - - - } - - - - - - //calculateOverlappingPairsHost(maxPairs); -} -void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - - m_hostPairs.resize(0); - m_allAabbsGPU1.copyToHost(m_allAabbsCPU1); - for (int i=0;i<m_allAabbsCPU1.size();i++) - { - for (int j=i+1;j<m_allAabbsCPU1.size();j++) - { - if (b3TestAabbAgainstAabb2(m_allAabbsCPU1[i].m_minVec, m_allAabbsCPU1[i].m_maxVec, - m_allAabbsCPU1[j].m_minVec,m_allAabbsCPU1[j].m_maxVec)) - { - b3Int4 pair; - int a = m_allAabbsCPU1[j].m_minIndices[3]; - int b = m_allAabbsCPU1[i].m_minIndices[3]; - if (a<=b) - { - pair.x = a; - pair.y = b;//store the original index in the unsorted aabb array - } else - { - pair.x = b; - pair.y = a;//store the original index in the unsorted aabb array - } - - if (m_hostPairs.size()<maxPairs) - { - m_hostPairs.push_back(pair); - } - } - } - } - - - m_gpuPairs.copyFromHost(m_hostPairs); - - -} - - //call writeAabbsToGpu after done making all changes (createProxy etc) -void b3GpuGridBroadphase::writeAabbsToGpu() -{ - m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1); - m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); - m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); - -} - -cl_mem b3GpuGridBroadphase::getAabbBufferWS() -{ - return this->m_allAabbsGPU1.getBufferCL(); -} -int b3GpuGridBroadphase::getNumOverlap() -{ - return m_gpuPairs.size(); -} -cl_mem b3GpuGridBroadphase::getOverlappingPairBuffer() -{ - return m_gpuPairs.getBufferCL(); -} - -b3OpenCLArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsGPU() -{ - return m_allAabbsGPU1; -} - -b3AlignedObjectArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsCPU() -{ - return m_allAabbsCPU1; -} - -b3OpenCLArray<b3Int4>& b3GpuGridBroadphase::getOverlappingPairsGPU() -{ - return m_gpuPairs; -} -b3OpenCLArray<int>& b3GpuGridBroadphase::getSmallAabbIndicesGPU() -{ - return m_smallAabbsMappingGPU; -} -b3OpenCLArray<int>& b3GpuGridBroadphase::getLargeAabbIndicesGPU() -{ - return m_largeAabbsMappingGPU; -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h deleted file mode 100644 index ec18c9f716..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h +++ /dev/null @@ -1,88 +0,0 @@ -#ifndef B3_GPU_GRID_BROADPHASE_H -#define B3_GPU_GRID_BROADPHASE_H - -#include "b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -struct b3ParamsGridBroadphaseCL -{ - - float m_invCellSize[4]; - int m_gridSize[4]; - - int getMaxBodiesPerCell() const - { - return m_gridSize[3]; - } - - void setMaxBodiesPerCell(int maxOverlap) - { - m_gridSize[3] = maxOverlap; - } -}; - - -class b3GpuGridBroadphase : public b3GpuBroadphaseInterface -{ -protected: - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3OpenCLArray<b3SapAabb> m_allAabbsGPU1; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU1; - - b3OpenCLArray<int> m_smallAabbsMappingGPU; - b3AlignedObjectArray<int> m_smallAabbsMappingCPU; - - b3OpenCLArray<int> m_largeAabbsMappingGPU; - b3AlignedObjectArray<int> m_largeAabbsMappingCPU; - - b3AlignedObjectArray<b3Int4> m_hostPairs; - b3OpenCLArray<b3Int4> m_gpuPairs; - - b3OpenCLArray<b3SortData> m_hashGpu; - b3OpenCLArray<int> m_cellStartGpu; - - - b3ParamsGridBroadphaseCL m_paramsCPU; - b3OpenCLArray<b3ParamsGridBroadphaseCL> m_paramsGPU; - - class b3RadixSort32CL* m_sorter; - -public: - - b3GpuGridBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q ); - virtual ~b3GpuGridBroadphase(); - - static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx,cl_device_id device, cl_command_queue q) - { - return new b3GpuGridBroadphase(ctx,device,q); - } - - - - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask); - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual cl_mem getAabbBufferWS(); - virtual int getNumOverlap(); - virtual cl_mem getOverlappingPairBuffer(); - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU(); - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU(); - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU(); - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU(); - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU(); - -}; - -#endif //B3_GPU_GRID_BROADPHASE_H
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp deleted file mode 100644 index 641df9eb12..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ /dev/null @@ -1,577 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#include "b3GpuParallelLinearBvh.h" - -b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) : - m_queue(queue), - m_radixSorter(context, device, queue), - - m_rootNodeIndex(context, queue), - m_maxDistanceFromRoot(context, queue), - m_temp(context, queue), - - m_internalNodeAabbs(context, queue), - m_internalNodeLeafIndexRanges(context, queue), - m_internalNodeChildNodes(context, queue), - m_internalNodeParentNodes(context, queue), - - m_commonPrefixes(context, queue), - m_commonPrefixLengths(context, queue), - m_distanceFromRoot(context, queue), - - m_leafNodeParentNodes(context, queue), - m_mortonCodesAndAabbIndicies(context, queue), - m_mergedAabb(context, queue), - m_leafNodeAabbs(context, queue), - - m_largeAabbs(context, queue) -{ - m_rootNodeIndex.resize(1); - m_maxDistanceFromRoot.resize(1); - m_temp.resize(1); - - // - const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl"; - - const char* kernelSource = parallelLinearBvhCL; //parallelLinearBvhCL.h - cl_int error; - char* additionalMacros = 0; - m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH); - b3Assert(m_parallelLinearBvhProgram); - - m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_separateAabbsKernel); - m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_findAllNodesMergedAabbKernel); - m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_assignMortonCodesAndAabbIndiciesKernel); - - m_computeAdjacentPairCommonPrefixKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "computeAdjacentPairCommonPrefix", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_computeAdjacentPairCommonPrefixKernel); - m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_buildBinaryRadixTreeLeafNodesKernel); - m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_buildBinaryRadixTreeInternalNodesKernel); - m_findDistanceFromRootKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findDistanceFromRoot", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_findDistanceFromRootKernel); - m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel); - - m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_findLeafIndexRangesKernel); - - m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_plbvhCalculateOverlappingPairsKernel); - m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_plbvhRayTraverseKernel); - m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_plbvhLargeAabbAabbTestKernel); - m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString( context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros ); - b3Assert(m_plbvhLargeAabbRayTestKernel); -} - -b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() -{ - clReleaseKernel(m_separateAabbsKernel); - clReleaseKernel(m_findAllNodesMergedAabbKernel); - clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel); - - clReleaseKernel(m_computeAdjacentPairCommonPrefixKernel); - clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel); - clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel); - clReleaseKernel(m_findDistanceFromRootKernel); - clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel); - - clReleaseKernel(m_findLeafIndexRangesKernel); - - clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); - clReleaseKernel(m_plbvhRayTraverseKernel); - clReleaseKernel(m_plbvhLargeAabbAabbTestKernel); - clReleaseKernel(m_plbvhLargeAabbRayTestKernel); - - clReleaseProgram(m_parallelLinearBvhProgram); -} - -void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices, - const b3OpenCLArray<int>& largeAabbIndices) -{ - B3_PROFILE("b3ParallelLinearBvh::build()"); - - int numLargeAabbs = largeAabbIndices.size(); - int numSmallAabbs = smallAabbIndices.size(); - - //Since all AABBs(both large and small) are input as a contiguous array, - //with 2 additional arrays used to indicate the indices of large and small AABBs, - //it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH. - { - B3_PROFILE("Separate large and small AABBs"); - - m_largeAabbs.resize(numLargeAabbs); - m_leafNodeAabbs.resize(numSmallAabbs); - - //Write large AABBs into m_largeAabbs - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ), - b3BufferInfoCL( largeAabbIndices.getBufferCL() ), - - b3BufferInfoCL( m_largeAabbs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numLargeAabbs); - - launcher.launch1D(numLargeAabbs); - } - - //Write small AABBs into m_leafNodeAabbs - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( worldSpaceAabbs.getBufferCL() ), - b3BufferInfoCL( smallAabbIndices.getBufferCL() ), - - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numSmallAabbs); - - launcher.launch1D(numSmallAabbs); - } - - clFinish(m_queue); - } - - // - int numLeaves = numSmallAabbs; //Number of leaves in the BVH == Number of rigid bodies with small AABBs - int numInternalNodes = numLeaves - 1; - - if(numLeaves < 2) - { - //Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(), - //so it does not matter if numLeaves == 0 and rootNodeIndex == -1 - int rootNodeIndex = numLeaves - 1; - m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1); - - //Since the AABBs need to be rearranged(sorted) for the BVH construction algorithm, - //m_mortonCodesAndAabbIndicies.m_value is used to map a sorted AABB index to the unsorted AABB index - //instead of directly moving the AABBs. It needs to be set for the ray cast traversal kernel to work. - //( m_mortonCodesAndAabbIndicies[].m_value == unsorted index == index of m_leafNodeAabbs ) - if(numLeaves == 1) - { - b3SortData leaf; - leaf.m_value = 0; //1 leaf so index is always 0; leaf.m_key does not need to be set - - m_mortonCodesAndAabbIndicies.resize(1); - m_mortonCodesAndAabbIndicies.copyFromHostPointer(&leaf, 1); - } - - return; - } - - // - { - m_internalNodeAabbs.resize(numInternalNodes); - m_internalNodeLeafIndexRanges.resize(numInternalNodes); - m_internalNodeChildNodes.resize(numInternalNodes); - m_internalNodeParentNodes.resize(numInternalNodes); - - m_commonPrefixes.resize(numInternalNodes); - m_commonPrefixLengths.resize(numInternalNodes); - m_distanceFromRoot.resize(numInternalNodes); - - m_leafNodeParentNodes.resize(numLeaves); - m_mortonCodesAndAabbIndicies.resize(numLeaves); - m_mergedAabb.resize(numLeaves); - } - - //Find the merged AABB of all small AABBs; this is used to define the size of - //each cell in the virtual grid for the next kernel(2^10 cells in each dimension). - { - B3_PROFILE("Find AABB of merged nodes"); - - m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs); //Need to make a copy since the kernel modifies the array - - for(int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2; - numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_mergedAabb.getBufferCL() ) //Resulting AABB is stored in m_mergedAabb[0] - }; - - b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabbKernel, "m_findAllNodesMergedAabbKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numAabbsNeedingMerge); - - launcher.launch1D(numAabbsNeedingMerge); - } - - clFinish(m_queue); - } - - //Insert the center of the AABBs into a virtual grid, - //then convert the discrete grid coordinates into a morton code - //For each element in m_mortonCodesAndAabbIndicies, set - // m_key == morton code (value to sort by) - // m_value == small AABB index - { - B3_PROFILE("Assign morton codes"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_mergedAabb.getBufferCL() ), - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } - - // - { - B3_PROFILE("Sort leaves by morton codes"); - - m_radixSorter.execute(m_mortonCodesAndAabbIndicies); - clFinish(m_queue); - } - - // - constructBinaryRadixTree(); - - - //Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices. - //The root node contains leaf node indices in the range [0, numLeafNodes - 1]. - //The child nodes of each node split their parent's index range into 2 contiguous halves. - // - //For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31]. - //The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31]. - // - //This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice - { - B3_PROFILE("m_findLeafIndexRangesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } -} - -void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs) -{ - int maxPairs = out_overlappingPairs.size(); - b3OpenCLArray<int>& numPairsGpu = m_temp; - - int reset = 0; - numPairsGpu.copyFromHostPointer(&reset, 1); - - // - if( m_leafNodeAabbs.size() > 1 ) - { - B3_PROFILE("PLBVH small-small AABB test"); - - int numQueryAabbs = m_leafNodeAabbs.size(); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), - - b3BufferInfoCL( numPairsGpu.getBufferCL() ), - b3BufferInfoCL( out_overlappingPairs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_plbvhCalculateOverlappingPairsKernel, "m_plbvhCalculateOverlappingPairsKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(maxPairs); - launcher.setConst(numQueryAabbs); - - launcher.launch1D(numQueryAabbs); - clFinish(m_queue); - } - - int numLargeAabbRigids = m_largeAabbs.size(); - if( numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0 ) - { - B3_PROFILE("PLBVH large-small AABB test"); - - int numQueryAabbs = m_leafNodeAabbs.size(); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_largeAabbs.getBufferCL() ), - - b3BufferInfoCL( numPairsGpu.getBufferCL() ), - b3BufferInfoCL( out_overlappingPairs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(maxPairs); - launcher.setConst(numLargeAabbRigids); - launcher.setConst(numQueryAabbs); - - launcher.launch1D(numQueryAabbs); - clFinish(m_queue); - } - - - // - int numPairs = -1; - numPairsGpu.copyToHostPointer(&numPairs, 1); - if(numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - numPairsGpu.copyFromHostPointer(&maxPairs, 1); - } - - out_overlappingPairs.resize(numPairs); -} - - -void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays, - b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs) -{ - B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()"); - - int numRays = rays.size(); - int maxRayRigidPairs = out_rayRigidPairs.size(); - - int reset = 0; - out_numRayRigidPairs.copyFromHostPointer(&reset, 1); - - // - if( m_leafNodeAabbs.size() > 0 ) - { - B3_PROFILE("PLBVH ray test small AABB"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_internalNodeLeafIndexRanges.getBufferCL() ), - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), - - b3BufferInfoCL( rays.getBufferCL() ), - - b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ), - b3BufferInfoCL( out_rayRigidPairs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); - } - - int numLargeAabbRigids = m_largeAabbs.size(); - if(numLargeAabbRigids > 0) - { - B3_PROFILE("PLBVH ray test large AABB"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_largeAabbs.getBufferCL() ), - b3BufferInfoCL( rays.getBufferCL() ), - - b3BufferInfoCL( out_numRayRigidPairs.getBufferCL() ), - b3BufferInfoCL( out_rayRigidPairs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numLargeAabbRigids); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); - } - - // - int numRayRigidPairs = -1; - out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1); - - if(numRayRigidPairs > maxRayRigidPairs) - b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs); - -} - -void b3GpuParallelLinearBvh::constructBinaryRadixTree() -{ - B3_PROFILE("b3GpuParallelLinearBvh::constructBinaryRadixTree()"); - - int numLeaves = m_leafNodeAabbs.size(); - int numInternalNodes = numLeaves - 1; - - //Each internal node is placed in between 2 leaf nodes. - //By using this arrangement and computing the common prefix between - //these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree. - { - B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), - b3BufferInfoCL( m_commonPrefixes.getBufferCL() ), - b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_computeAdjacentPairCommonPrefixKernel, "m_computeAdjacentPairCommonPrefixKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //For each leaf node, select its parent node by - //comparing the 2 nearest internal nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ), - b3BufferInfoCL( m_leafNodeParentNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } - - //For each internal node, perform 2 binary searches among the other internal nodes - //to its left and right to find its potential parent nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_commonPrefixes.getBufferCL() ), - b3BufferInfoCL( m_commonPrefixLengths.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ), - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Find the number of nodes seperating each internal node and the root node - //so that the AABBs can be set using the next kernel. - //Also determine the maximum number of nodes separating an internal node and the root node. - { - B3_PROFILE("m_findDistanceFromRootKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_rootNodeIndex.getBufferCL() ), - b3BufferInfoCL( m_internalNodeParentNodes.getBufferCL() ), - b3BufferInfoCL( m_maxDistanceFromRoot.getBufferCL() ), - b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Starting from the internal nodes nearest to the leaf nodes, recursively move up - //the tree towards the root to set the AABBs of each internal node; each internal node - //checks its children and merges their AABBs - { - B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel"); - - int maxDistanceFromRoot = -1; - { - B3_PROFILE("copy maxDistanceFromRoot to CPU"); - m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1); - clFinish(m_queue); - } - - for(int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_distanceFromRoot.getBufferCL() ), - b3BufferInfoCL( m_mortonCodesAndAabbIndicies.getBufferCL() ), - b3BufferInfoCL( m_internalNodeChildNodes.getBufferCL() ), - b3BufferInfoCL( m_leafNodeAabbs.getBufferCL() ), - b3BufferInfoCL( m_internalNodeAabbs.getBufferCL() ) - }; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(maxDistanceFromRoot); - launcher.setConst(distanceFromRoot); - launcher.setConst(numInternalNodes); - - //It may seem inefficent to launch a thread for each internal node when a - //much smaller number of nodes is actually processed, but this is actually - //faster than determining the exact nodes that are ready to merge their child AABBs. - launcher.launch1D(numInternalNodes); - } - - clFinish(m_queue); - } -} - -
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h deleted file mode 100644 index effe617b7b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H -#define B3_GPU_PARALLEL_LINEAR_BVH_H - -//#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" - -#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h" - -#define b3Int64 cl_long - -///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame -///@remarks -///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details. -///@par -///Related papers: \n -///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n -///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n -///@par -///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages: -/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid) -/// - [fully parallel] Sort morton codes -/// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH) -/// - [somewhat parallel] Set internal node AABBs -///@par -///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages. -///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree. -///Instead of searching for the child nodes of each internal node, we search for the parent node of each node. -///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs. -class b3GpuParallelLinearBvh -{ - cl_command_queue m_queue; - - cl_program m_parallelLinearBvhProgram; - - cl_kernel m_separateAabbsKernel; - cl_kernel m_findAllNodesMergedAabbKernel; - cl_kernel m_assignMortonCodesAndAabbIndiciesKernel; - - //Binary radix tree construction kernels - cl_kernel m_computeAdjacentPairCommonPrefixKernel; - cl_kernel m_buildBinaryRadixTreeLeafNodesKernel; - cl_kernel m_buildBinaryRadixTreeInternalNodesKernel; - cl_kernel m_findDistanceFromRootKernel; - cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel; - - cl_kernel m_findLeafIndexRangesKernel; - - //Traversal kernels - cl_kernel m_plbvhCalculateOverlappingPairsKernel; - cl_kernel m_plbvhRayTraverseKernel; - cl_kernel m_plbvhLargeAabbAabbTestKernel; - cl_kernel m_plbvhLargeAabbRayTestKernel; - - b3RadixSort32CL m_radixSorter; - - //1 element - b3OpenCLArray<int> m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node - b3OpenCLArray<int> m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node - b3OpenCLArray<int> m_temp; //Used to hold the number of pairs in calculateOverlappingPairs() - - //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1) - b3OpenCLArray<b3SapAabb> m_internalNodeAabbs; - b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index - b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node - b3OpenCLArray<int> m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal - - //1 element per internal node; for binary radix tree construction - b3OpenCLArray<b3Int64> m_commonPrefixes; - b3OpenCLArray<int> m_commonPrefixLengths; - b3OpenCLArray<int> m_distanceFromRoot; //Number of internal nodes between this node and the root - - //1 element per leaf node (leaf nodes only include small AABBs) - b3OpenCLArray<int> m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal - b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs - b3OpenCLArray<b3SapAabb> m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes - b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs - - //1 element per large AABB, which is not stored in the BVH - b3OpenCLArray<b3SapAabb> m_largeAabbs; - -public: - b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue); - virtual ~b3GpuParallelLinearBvh(); - - ///Must be called before any other function - void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices, - const b3OpenCLArray<int>& largeAabbIndices); - - ///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs. - ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs. - ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized. - void calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs); - - ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections; - ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough. - ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index. - ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded. - void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays, - b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs); - -private: - void constructBinaryRadixTree(); -}; - -#endif diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp deleted file mode 100644 index d2618024ac..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#include "b3GpuParallelLinearBvhBroadphase.h" - -b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) : - m_plbvh(context, device, queue), - - m_overlappingPairsGpu(context, queue), - - m_aabbsGpu(context, queue), - m_smallAabbsMappingGpu(context, queue), - m_largeAabbsMappingGpu(context, queue) -{ -} - -void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int newAabbIndex = m_aabbsCpu.size(); - - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = newAabbIndex; - - m_smallAabbsMappingCpu.push_back(newAabbIndex); - - m_aabbsCpu.push_back(aabb); -} -void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int newAabbIndex = m_aabbsCpu.size(); - - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = newAabbIndex; - - m_largeAabbsMappingCpu.push_back(newAabbIndex); - - m_aabbsCpu.push_back(aabb); -} - -void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs) -{ - //Reconstruct BVH - m_plbvh.build(m_aabbsGpu, m_smallAabbsMappingGpu, m_largeAabbsMappingGpu); - - // - m_overlappingPairsGpu.resize(maxPairs); - m_plbvh.calculateOverlappingPairs(m_overlappingPairsGpu); -} -void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - b3Assert(0); //CPU version not implemented -} - -void b3GpuParallelLinearBvhBroadphase::writeAabbsToGpu() -{ - m_aabbsGpu.copyFromHost(m_aabbsCpu); - m_smallAabbsMappingGpu.copyFromHost(m_smallAabbsMappingCpu); - m_largeAabbsMappingGpu.copyFromHost(m_largeAabbsMappingCpu); -} - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h deleted file mode 100644 index e518500637..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#ifndef B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H -#define B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H - -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" - -#include "b3GpuParallelLinearBvh.h" - -class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface -{ - b3GpuParallelLinearBvh m_plbvh; - - b3OpenCLArray<b3Int4> m_overlappingPairsGpu; - - b3OpenCLArray<b3SapAabb> m_aabbsGpu; - b3OpenCLArray<int> m_smallAabbsMappingGpu; - b3OpenCLArray<int> m_largeAabbsMappingGpu; - - b3AlignedObjectArray<b3SapAabb> m_aabbsCpu; - b3AlignedObjectArray<int> m_smallAabbsMappingCpu; - b3AlignedObjectArray<int> m_largeAabbsMappingCpu; - -public: - b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue); - virtual ~b3GpuParallelLinearBvhBroadphase() {} - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); } - virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); } - - virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); } - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; } - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() { return m_overlappingPairsGpu; } - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; } - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; } - - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() { return m_aabbsCpu; } - - static b3GpuBroadphaseInterface* CreateFunc(cl_context context, cl_device_id device, cl_command_queue queue) - { - return new b3GpuParallelLinearBvhBroadphase(context, device, queue); - } -}; - -#endif diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp deleted file mode 100644 index c45fbbdcaa..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ /dev/null @@ -1,1366 +0,0 @@ - -bool searchIncremental3dSapOnGpu = true; -#include <limits.h> -#include "b3GpuSapBroadphase.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h" - - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/sapKernels.h" - -#include "Bullet3Common/b3MinMax.h" - -#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" - -/* - - - - - - - b3OpenCLArray<int> m_pairCount; - - - b3OpenCLArray<b3SapAabb> m_allAabbsGPU; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() - { - return m_allAabbsGPU; - } - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() - { - return m_allAabbsCPU; - } - - b3OpenCLArray<b3Vector3> m_sum; - b3OpenCLArray<b3Vector3> m_sum2; - b3OpenCLArray<b3Vector3> m_dst; - - b3OpenCLArray<int> m_smallAabbsMappingGPU; - b3AlignedObjectArray<int> m_smallAabbsMappingCPU; - - b3OpenCLArray<int> m_largeAabbsMappingGPU; - b3AlignedObjectArray<int> m_largeAabbsMappingCPU; - - - b3OpenCLArray<b3Int4> m_overlappingPairs; - - //temporary gpu work memory - b3OpenCLArray<b3SortData> m_gpuSmallSortData; - b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs; - - class b3PrefixScanFloat4CL* m_prefixScanFloat4; - */ - -b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType) -:m_context(ctx), -m_device(device), -m_queue(q), - -m_objectMinMaxIndexGPUaxis0(ctx,q), -m_objectMinMaxIndexGPUaxis1(ctx,q), -m_objectMinMaxIndexGPUaxis2(ctx,q), -m_objectMinMaxIndexGPUaxis0prev(ctx,q), -m_objectMinMaxIndexGPUaxis1prev(ctx,q), -m_objectMinMaxIndexGPUaxis2prev(ctx,q), -m_sortedAxisGPU0(ctx,q), -m_sortedAxisGPU1(ctx,q), -m_sortedAxisGPU2(ctx,q), -m_sortedAxisGPU0prev(ctx,q), -m_sortedAxisGPU1prev(ctx,q), -m_sortedAxisGPU2prev(ctx,q), -m_addedHostPairsGPU(ctx,q), -m_removedHostPairsGPU(ctx,q), -m_addedCountGPU(ctx,q), -m_removedCountGPU(ctx,q), -m_currentBuffer(-1), -m_pairCount(ctx,q), -m_allAabbsGPU(ctx,q), -m_sum(ctx,q), -m_sum2(ctx,q), -m_dst(ctx,q), -m_smallAabbsMappingGPU(ctx,q), -m_largeAabbsMappingGPU(ctx,q), -m_overlappingPairs(ctx,q), -m_gpuSmallSortData(ctx,q), -m_gpuSmallSortedAabbs(ctx,q) -{ - const char* sapSrc = sapCL; - - - cl_int errNum=0; - - b3Assert(m_context); - b3Assert(m_device); - cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH); - b3Assert(errNum==CL_SUCCESS); - - - b3Assert(errNum==CL_SUCCESS); -#ifndef __APPLE__ - m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context,m_device,m_queue); -#else - m_prefixScanFloat4 = 0; -#endif - m_sapKernel = 0; - - switch (kernelType) - { - case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU: - { - m_sapKernel=0; - break; - } - case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBruteForce",&errNum,sapProg ); - break; - } - - case B3_GPU_SAP_KERNEL_ORIGINAL: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg ); - break; - } - case B3_GPU_SAP_KERNEL_BARRIER: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg ); - break; - } - case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg ); - break; - } - - default: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg ); - b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory"); - } - }; - - - - m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg ); - b3Assert(errNum==CL_SUCCESS); - - m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "prepareSumVarianceKernel",&errNum,sapProg ); - b3Assert(errNum==CL_SUCCESS); - - - m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg ); - - m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg ); - - m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg ); - - m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue); -} - -b3GpuSapBroadphase::~b3GpuSapBroadphase() -{ - delete m_sorter; - delete m_prefixScanFloat4; - - clReleaseKernel(m_scatterKernel); - clReleaseKernel(m_flipFloatKernel); - clReleaseKernel(m_copyAabbsKernel); - clReleaseKernel(m_sapKernel); - clReleaseKernel(m_sap2Kernel); - clReleaseKernel(m_prepareSumVarianceKernel); - - -} - -/// conservative test for overlap between two aabbs -static bool TestAabbAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1, - const b3Vector3 &aabbMin2, const b3Vector3 &aabbMax2) -{ - bool overlap = true; - overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap; - overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap; - overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap; - return overlap; -} - - - -//http://stereopsis.com/radix.html -static unsigned int FloatFlip(float fl) -{ - unsigned int f = *(unsigned int*)&fl; - unsigned int mask = -(int)(f >> 31) | 0x80000000; - return f ^ mask; -}; - -void b3GpuSapBroadphase::init3dSap() -{ - if (m_currentBuffer<0) - { - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - - m_currentBuffer = 0; - for (int axis=0;axis<3;axis++) - { - for (int buf=0;buf<2;buf++) - { - int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = 2*totalNumAabbs; - m_sortedAxisCPU[axis][buf].resize(numEndPoints); - - if (buf==m_currentBuffer) - { - for (int i=0;i<totalNumAabbs;i++) - { - m_sortedAxisCPU[axis][buf][i*2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis])-1; - m_sortedAxisCPU[axis][buf][i*2].m_value = i*2; - m_sortedAxisCPU[axis][buf][i*2+1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis])+1; - m_sortedAxisCPU[axis][buf][i*2+1].m_value = i*2+1; - } - } - } - } - - for (int axis=0;axis<3;axis++) - { - m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); - } - - for (int axis=0;axis<3;axis++) - { - //int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); - m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints); - for (int i=0;i<numEndPoints;i++) - { - int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - int newDest = destIndex/2; - if (destIndex&1) - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i; - } else - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i; - } - } - } - - } -} - - -static bool b3PairCmp(const b3Int4& p, const b3Int4& q) -{ - return ((p.x<q.x) || ((p.x==q.x) && (p.y<q.y))); -} - - -static bool operator==(const b3Int4& a,const b3Int4& b) -{ - return a.x == b.x && a.y == b.y; -}; - -static bool operator<(const b3Int4& a,const b3Int4& b) -{ - return a.x < b.x || (a.x == b.x && a.y < b.y); -}; - -static bool operator>(const b3Int4& a,const b3Int4& b) -{ - return a.x > b.x || (a.x == b.x && a.y > b.y); -}; - -b3AlignedObjectArray<b3Int4> addedHostPairs; -b3AlignedObjectArray<b3Int4> removedHostPairs; - -b3AlignedObjectArray<b3SapAabb> preAabbs; - -void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() -{ - //static int framepje = 0; - //printf("framepje=%d\n",framepje++); - - - B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap"); - - addedHostPairs.resize(0); - removedHostPairs.resize(0); - - b3Assert(m_currentBuffer>=0); - - { - preAabbs.resize(m_allAabbsCPU.size()); - for (int i=0;i<preAabbs.size();i++) - { - preAabbs[i]=m_allAabbsCPU[i]; - } - } - - - if (m_currentBuffer<0) - return; - { - B3_PROFILE("m_allAabbsGPU.copyToHost"); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - } - - b3AlignedObjectArray<b3Int4> allPairs; - { - B3_PROFILE("m_overlappingPairs.copyToHost"); - m_overlappingPairs.copyToHost(allPairs); - } - if (0) - { - { - printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n", - m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1],m_allAabbsCPU[40].m_min[2], - m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1],m_allAabbsCPU[40].m_max[2]); - } - - { - printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n", - m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1],m_allAabbsCPU[53].m_min[2], - m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1],m_allAabbsCPU[53].m_max[2]); - } - - - { - b3Int4 newPair; - newPair.x = 40; - newPair.y = 53; - int index = allPairs.findBinarySearch(newPair); - printf("hasPair(40,53)=%d out of %d\n",index, allPairs.size()); - - { - int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max,(const b3Vector3&)m_allAabbsCPU[53].m_min,(const b3Vector3&)m_allAabbsCPU[53].m_max); - printf("overlap=%d\n",overlap); - } - - if (preAabbs.size()) - { - int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max,(const b3Vector3&)preAabbs[53].m_min,(const b3Vector3&)preAabbs[53].m_max); - printf("prevoverlap=%d\n",prevOverlap); - } else - { - printf("unknown prevoverlap\n"); - } - - } - } - - - if (0) - { - for (int i=0;i<m_allAabbsCPU.size();i++) - { - //printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]); - - - } - - for (int axis=0;axis<3;axis++) - { - for (int buf=0;buf<2;buf++) - { - b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size()*2); - } - } - } - - - - m_currentBuffer = 1-m_currentBuffer; - - - - int totalNumAabbs = m_allAabbsCPU.size(); - - { - B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)"); - for (int i=0;i<totalNumAabbs;i++) - { - - - unsigned int keyMin[3]; - unsigned int keyMax[3]; - for (int axis=0;axis<3;axis++) - { - float vmin=m_allAabbsCPU[i].m_min[axis]; - float vmax = m_allAabbsCPU[i].m_max[axis]; - keyMin[axis] = FloatFlip(vmin); - keyMax[axis] = FloatFlip(vmax); - - m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_key = keyMin[axis]-1; - m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_value = i*2; - m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_key = keyMax[axis]+1; - m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_value = i*2+1; - } - //printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]); - - } - } - - - - { - B3_PROFILE("sort m_sortedAxisCPU"); - for (int axis=0;axis<3;axis++) - m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); - } - -#if 0 - if (0) - { - for (int axis=0;axis<3;axis++) - { - //printf("axis %d\n",axis); - for (int i=0;i<m_sortedAxisCPU[axis][m_currentBuffer].size();i++) - { - //int key = m_sortedAxisCPU[axis][m_currentBuffer][i].m_key; - //int value = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - //printf("[%d]=%d\n",i,value); - } - - } - } -#endif - - { - B3_PROFILE("assign m_objectMinMaxIndexCPU"); - for (int axis=0;axis<3;axis++) - { - int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); - m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(totalNumAabbs); - for (int i=0;i<numEndPoints;i++) - { - int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - int newDest = destIndex/2; - if (destIndex&1) - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i; - } else - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i; - } - } - } - } - -#if 0 - if (0) - { - printf("==========================\n"); - for (int axis=0;axis<3;axis++) - { - unsigned int curMinIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].x; - unsigned int curMaxIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].y; - unsigned int prevMaxIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].y; - unsigned int prevMinIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].x; - - int dmin40 = curMinIndex40 - prevMinIndex40; - int dmax40 = curMinIndex40 - prevMinIndex40; - printf("axis %d curMinIndex40=%d prevMinIndex40=%d\n",axis,curMinIndex40, prevMinIndex40); - printf("axis %d curMaxIndex40=%d prevMaxIndex40=%d\n",axis,curMaxIndex40, prevMaxIndex40); - } - printf(".........................\n"); - for (int axis=0;axis<3;axis++) - { - unsigned int curMinIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].x; - unsigned int curMaxIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].y; - unsigned int prevMaxIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].y; - unsigned int prevMinIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].x; - - int dmin40 = curMinIndex53 - prevMinIndex53; - int dmax40 = curMinIndex53 - prevMinIndex53; - printf("axis %d curMinIndex53=%d prevMinIndex53=%d\n",axis,curMinIndex53, prevMinIndex53); - printf("axis %d curMaxIndex53=%d prevMaxIndex53=%d\n",axis,curMaxIndex53, prevMaxIndex53); - } - - } -#endif - - - int a = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); - int b = m_objectMinMaxIndexCPU[1][m_currentBuffer].size(); - int c = m_objectMinMaxIndexCPU[2][m_currentBuffer].size(); - b3Assert(a==b); - b3Assert(b==c); - /* - if (searchIncremental3dSapOnGpu) - { - B3_PROFILE("computePairsIncremental3dSapKernelGPU"); - int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); - int maxCapacity = 1024*1024; - { - B3_PROFILE("copy from host"); - m_objectMinMaxIndexGPUaxis0.copyFromHost(m_objectMinMaxIndexCPU[0][m_currentBuffer]); - m_objectMinMaxIndexGPUaxis1.copyFromHost(m_objectMinMaxIndexCPU[1][m_currentBuffer]); - m_objectMinMaxIndexGPUaxis2.copyFromHost(m_objectMinMaxIndexCPU[2][m_currentBuffer]); - m_objectMinMaxIndexGPUaxis0prev.copyFromHost(m_objectMinMaxIndexCPU[0][1-m_currentBuffer]); - m_objectMinMaxIndexGPUaxis1prev.copyFromHost(m_objectMinMaxIndexCPU[1][1-m_currentBuffer]); - m_objectMinMaxIndexGPUaxis2prev.copyFromHost(m_objectMinMaxIndexCPU[2][1-m_currentBuffer]); - - m_sortedAxisGPU0.copyFromHost(m_sortedAxisCPU[0][m_currentBuffer]); - m_sortedAxisGPU1.copyFromHost(m_sortedAxisCPU[1][m_currentBuffer]); - m_sortedAxisGPU2.copyFromHost(m_sortedAxisCPU[2][m_currentBuffer]); - m_sortedAxisGPU0prev.copyFromHost(m_sortedAxisCPU[0][1-m_currentBuffer]); - m_sortedAxisGPU1prev.copyFromHost(m_sortedAxisCPU[1][1-m_currentBuffer]); - m_sortedAxisGPU2prev.copyFromHost(m_sortedAxisCPU[2][1-m_currentBuffer]); - - - m_addedHostPairsGPU.resize(maxCapacity); - m_removedHostPairsGPU.resize(maxCapacity); - - m_addedCountGPU.resize(0); - m_addedCountGPU.push_back(0); - m_removedCountGPU.resize(0); - m_removedCountGPU.push_back(0); - } - - { - B3_PROFILE("launch1D"); - b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel,"m_computePairsIncremental3dSapKernel"); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis0prev.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis1prev.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis2prev.getBufferCL()); - - launcher.setBuffer(m_sortedAxisGPU0.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU1.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU2.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU0prev.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU1prev.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU2prev.getBufferCL()); - - - launcher.setBuffer(m_addedHostPairsGPU.getBufferCL()); - launcher.setBuffer(m_removedHostPairsGPU.getBufferCL()); - launcher.setBuffer(m_addedCountGPU.getBufferCL()); - launcher.setBuffer(m_removedCountGPU.getBufferCL()); - launcher.setConst(maxCapacity); - launcher.setConst( numObjects); - launcher.launch1D( numObjects); - clFinish(m_queue); - } - - { - B3_PROFILE("copy to host"); - int addedCountGPU = m_addedCountGPU.at(0); - m_addedHostPairsGPU.resize(addedCountGPU); - m_addedHostPairsGPU.copyToHost(addedHostPairs); - - //printf("addedCountGPU=%d\n",addedCountGPU); - int removedCountGPU = m_removedCountGPU.at(0); - m_removedHostPairsGPU.resize(removedCountGPU); - m_removedHostPairsGPU.copyToHost(removedHostPairs); - //printf("removedCountGPU=%d\n",removedCountGPU); - - } - - - - } - else - */ - { - int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); - - B3_PROFILE("actual search"); - for (int i=0;i<numObjects;i++) - { - //int numObjects = m_objectMinMaxIndexCPU[axis][m_currentBuffer].size(); - //int checkObjects[]={40,53}; - //int numCheckObjects = sizeof(checkObjects)/sizeof(int); - - //for (int a=0;a<numCheckObjects ;a++) - - for (int axis=0;axis<3;axis++) - { - //int i = checkObjects[a]; - - unsigned int curMinIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].x; - unsigned int curMaxIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].y; - unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].x; - int dmin = curMinIndex - prevMinIndex; - - unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].y; - - - - int dmax = curMaxIndex - prevMaxIndex; - if (dmin!=0) - { - //printf("for object %d, dmin=%d\n",i,dmin); - } - if (dmax!=0) - { - //printf("for object %d, dmax=%d\n",i,dmax); - } - for (int otherbuffer = 0;otherbuffer<2;otherbuffer++) - { - if (dmin!=0) - { - int stepMin = dmin<0 ? -1 : 1; - for (int j=prevMinIndex;j!=curMinIndex;j+=stepMin) - { - int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; - int otherIndex = otherIndex2/2; - if (otherIndex!=i) - { - bool otherIsMax = ((otherIndex2&1)!=0); - - if (otherIsMax) - { - //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); - //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); - - bool overlap = true; - - for (int ax=0;ax<3;ax++) - { - if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) - overlap=false; - } - - // b3Assert(overlap2==overlap); - - bool prevOverlap = true; - - for (int ax=0;ax<3;ax++) - { - if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x)) - prevOverlap=false; - } - - - //b3Assert(overlap==overlap2); - - - - if (dmin<0) - { - if (overlap && !prevOverlap) - { - //add a pair - b3Int4 newPair; - if (i<=otherIndex) - { - newPair.x = i; - newPair.y = otherIndex; - } else - { - newPair.x = otherIndex; - newPair.y = i; - } - addedHostPairs.push_back(newPair); - } - } - else - { - if (!overlap && prevOverlap) - { - - //remove a pair - b3Int4 removedPair; - if (i<=otherIndex) - { - removedPair.x = i; - removedPair.y = otherIndex; - } else - { - removedPair.x = otherIndex; - removedPair.y = i; - } - removedHostPairs.push_back(removedPair); - } - }//otherisMax - }//if (dmin<0) - }//if (otherIndex!=i) - }//for (int j= - } - - if (dmax!=0) - { - int stepMax = dmax<0 ? -1 : 1; - for (int j=prevMaxIndex;j!=curMaxIndex;j+=stepMax) - { - int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; - int otherIndex = otherIndex2/2; - if (otherIndex!=i) - { - //bool otherIsMin = ((otherIndex2&1)==0); - //if (otherIsMin) - { - //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); - //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); - - bool overlap = true; - - for (int ax=0;ax<3;ax++) - { - if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) - overlap=false; - } - //b3Assert(overlap2==overlap); - - bool prevOverlap = true; - - for (int ax=0;ax<3;ax++) - { - if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x)) - prevOverlap=false; - } - - - if (dmax>0) - { - if (overlap && !prevOverlap) - { - //add a pair - b3Int4 newPair; - if (i<=otherIndex) - { - newPair.x = i; - newPair.y = otherIndex; - } else - { - newPair.x = otherIndex; - newPair.y = i; - } - addedHostPairs.push_back(newPair); - - } - } - else - { - if (!overlap && prevOverlap) - { - //if (otherIndex2&1==0) -> min? - //remove a pair - b3Int4 removedPair; - if (i<=otherIndex) - { - removedPair.x = i; - removedPair.y = otherIndex; - } else - { - removedPair.x = otherIndex; - removedPair.y = i; - } - removedHostPairs.push_back(removedPair); - - } - } - - }//if (dmin<0) - }//if (otherIndex!=i) - }//for (int j= - } - }//for (int otherbuffer - }//for (int axis=0; - }//for (int i=0;i<numObjects - } - - //remove duplicates and add/remove then to existing m_overlappingPairs - - - - { - { - B3_PROFILE("sort allPairs"); - allPairs.quickSort(b3PairCmp); - } - { - B3_PROFILE("sort addedHostPairs"); - addedHostPairs.quickSort(b3PairCmp); - } - { - B3_PROFILE("sort removedHostPairs"); - removedHostPairs.quickSort(b3PairCmp); - } - } - - b3Int4 prevPair; - prevPair.x = -1; - prevPair.y = -1; - - int uniqueRemovedPairs = 0; - - b3AlignedObjectArray<int> removedPositions; - - { - B3_PROFILE("actual removing"); - for (int i=0;i<removedHostPairs.size();i++) - { - b3Int4 removedPair = removedHostPairs[i]; - if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y)) - { - - int index1 = allPairs.findBinarySearch(removedPair); - - //#ifdef _DEBUG - - - - int index2 = allPairs.findLinearSearch(removedPair); - b3Assert(index1==index2); - - //b3Assert(index1!=allPairs.size()); - if (index1<allPairs.size()) - //#endif//_DEBUG - { - uniqueRemovedPairs++; - removedPositions.push_back(index1); - { - //printf("framepje(%d) remove pair(%d):%d,%d\n",framepje,i,removedPair.x,removedPair.y); - } - } - } - prevPair = removedPair; - } - - if (uniqueRemovedPairs) - { - for (int i=0;i<removedPositions.size();i++) - { - allPairs[removedPositions[i]].x = INT_MAX ; - allPairs[removedPositions[i]].y = INT_MAX ; - } - allPairs.quickSort(b3PairCmp); - allPairs.resize(allPairs.size()-uniqueRemovedPairs); - } - } - //if (uniqueRemovedPairs) - // printf("uniqueRemovedPairs=%d\n",uniqueRemovedPairs); - //printf("removedHostPairs.size = %d\n",removedHostPairs.size()); - - prevPair.x = -1; - prevPair.y = -1; - - int uniqueAddedPairs=0; - b3AlignedObjectArray<b3Int4> actualAddedPairs; - - { - B3_PROFILE("actual adding"); - for (int i=0;i<addedHostPairs.size();i++) - { - b3Int4 newPair = addedHostPairs[i]; - if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y)) - { -//#ifdef _DEBUG - int index1 = allPairs.findBinarySearch(newPair); - - - int index2 = allPairs.findLinearSearch(newPair); - b3Assert(index1==index2); - - - b3Assert(index1==allPairs.size()); - if (index1!=allPairs.size()) - { - printf("??\n"); - } - - if (index1==allPairs.size()) -//#endif //_DEBUG - { - uniqueAddedPairs++; - actualAddedPairs.push_back(newPair); - } - } - prevPair = newPair; - } - for (int i=0;i<actualAddedPairs.size();i++) - { - //printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y); - allPairs.push_back(actualAddedPairs[i]); - } - } - - //if (uniqueAddedPairs) - // printf("uniqueAddedPairs=%d\n", uniqueAddedPairs); - - - { - B3_PROFILE("m_overlappingPairs.copyFromHost"); - m_overlappingPairs.copyFromHost(allPairs); - } - - -} - - - - -void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - //test -// if (m_currentBuffer>=0) - // return calculateOverlappingPairsHostIncremental3Sap(); - - b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - - - - int axis=0; - { - B3_PROFILE("CPU compute best variance axis"); - b3Vector3 s=b3MakeVector3(0,0,0),s2=b3MakeVector3(0,0,0); - int numRigidBodies = m_smallAabbsMappingCPU.size(); - - for(int i=0;i<numRigidBodies;i++) - { - b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - - b3Vector3 maxAabb=b3MakeVector3(aabb.m_max[0],aabb.m_max[1],aabb.m_max[2]); - b3Vector3 minAabb=b3MakeVector3(aabb.m_min[0],aabb.m_min[1],aabb.m_min[2]); - b3Vector3 centerAabb=(maxAabb+minAabb)*0.5f; - - s += centerAabb; - s2 += centerAabb*centerAabb; - } - b3Vector3 v = s2 - (s*s) / (float)numRigidBodies; - - if(v[1] > v[0]) - axis = 1; - if(v[2] > v[axis]) - axis = 2; - } - - - - - b3AlignedObjectArray<b3Int4> hostPairs; - - { - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - for (int i=0;i<numSmallAabbs;i++) - { - b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - //float reference = smallAabbi.m_max[axis]; - - for (int j=i+1;j<numSmallAabbs;j++) - { - - b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]]; - - if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, - (b3Vector3&)smallAabbj.m_min,(b3Vector3&)smallAabbj.m_max)) - { - b3Int4 pair; - int a = smallAabbi.m_minIndices[3]; - int b = smallAabbj.m_minIndices[3]; - if (a<=b) - { - pair.x = a;//store the original index in the unsorted aabb array - pair.y = b; - } else - { - pair.x = b;//store the original index in the unsorted aabb array - pair.y = a; - } - hostPairs.push_back(pair); - } - } - } - } - - - { - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - for (int i=0;i<numSmallAabbs;i++) - { - b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - - //float reference = smallAabbi.m_max[axis]; - int numLargeAabbs = m_largeAabbsMappingCPU.size(); - - for (int j=0;j<numLargeAabbs;j++) - { - b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]]; - if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, - (b3Vector3&)largeAabbj.m_min,(b3Vector3&)largeAabbj.m_max)) - { - b3Int4 pair; - int a = largeAabbj.m_minIndices[3]; - int b = smallAabbi.m_minIndices[3]; - if (a<=b) - { - pair.x = a; - pair.y = b;//store the original index in the unsorted aabb array - } else - { - pair.x = b; - pair.y = a;//store the original index in the unsorted aabb array - } - - hostPairs.push_back(pair); - } - } - } - } - - if (hostPairs.size() > maxPairs) - { - hostPairs.resize(maxPairs); - } - - if (hostPairs.size()) - { - m_overlappingPairs.copyFromHost(hostPairs); - } else - { - m_overlappingPairs.resize(0); - } - - //init3dSap(); - -} - -void b3GpuSapBroadphase::reset() -{ - m_allAabbsGPU.resize(0); - m_allAabbsCPU.resize(0); - - - m_smallAabbsMappingGPU.resize(0); - m_smallAabbsMappingCPU.resize(0); - - m_pairCount.resize(0); - - m_largeAabbsMappingGPU.resize(0); - m_largeAabbsMappingCPU.resize(0); - -} - - -void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) -{ - if (m_sapKernel==0) - { - calculateOverlappingPairsHost(maxPairs); - return; - } - - //if (m_currentBuffer>=0) - // return calculateOverlappingPairsHostIncremental3Sap(); - - //calculateOverlappingPairsHost(maxPairs); - - B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs"); - - int axis = 0; - - { - - //bool syncOnHost = false; - - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - if (m_prefixScanFloat4 && numSmallAabbs) - { - B3_PROFILE("GPU compute best variance axis"); - - if (m_dst.size()!=(numSmallAabbs+1)) - { - m_dst.resize(numSmallAabbs+128); - m_sum.resize(numSmallAabbs+128); - m_sum2.resize(numSmallAabbs+128); - m_sum.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow? - m_sum2.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow? - } - - b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel"); - launcher.setBuffer(m_allAabbsGPU.getBufferCL()); - - launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launcher.setBuffer(m_sum.getBufferCL()); - launcher.setBuffer(m_sum2.getBufferCL()); - launcher.setConst( numSmallAabbs ); - int num = numSmallAabbs; - launcher.launch1D( num); - - - b3Vector3 s; - b3Vector3 s2; - m_prefixScanFloat4->execute(m_sum,m_dst,numSmallAabbs+1,&s); - m_prefixScanFloat4->execute(m_sum2,m_dst,numSmallAabbs+1,&s2); - - b3Vector3 v = s2 - (s*s) / (float)numSmallAabbs; - - if(v[1] > v[0]) - axis = 1; - if(v[2] > v[axis]) - axis = 2; - } - - - - m_gpuSmallSortData.resize(numSmallAabbs); - - -#if 1 - if (m_smallAabbsMappingGPU.size()) - { - - B3_PROFILE("flipFloatKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), - b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true), - b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numSmallAabbs ); - launcher.setConst( axis ); - - int num = numSmallAabbs; - launcher.launch1D( num); - clFinish(m_queue); - } - - if (m_gpuSmallSortData.size()) - { - B3_PROFILE("gpu radix sort"); - m_sorter->execute(m_gpuSmallSortData); - clFinish(m_queue); - } - - m_gpuSmallSortedAabbs.resize(numSmallAabbs); - if (numSmallAabbs) - { - B3_PROFILE("scatterKernel"); - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), - b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true), - b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true), - b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel "); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numSmallAabbs); - int num = numSmallAabbs; - launcher.launch1D( num); - clFinish(m_queue); - - } - - - m_overlappingPairs.resize(maxPairs); - - m_pairCount.resize(0); - m_pairCount.push_back(0); - int numPairs=0; - - { - int numLargeAabbs = m_largeAabbsMappingGPU.size(); - if (numLargeAabbs && numSmallAabbs) - { - //@todo - B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_allAabbsGPU.getBufferCL() ), - b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ), - b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), - b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), - b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numLargeAabbs ); - launcher.setConst( numSmallAabbs); - launcher.setConst( axis ); - launcher.setConst( maxPairs ); -//@todo: use actual maximum work item sizes of the device instead of hardcoded values - launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); - - numPairs = m_pairCount.at(0); - if (numPairs >maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs =maxPairs; - } - } - } - if (m_gpuSmallSortedAabbs.size()) - { - B3_PROFILE("sapKernel"); - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sapKernel,"m_sapKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numSmallAabbs ); - launcher.setConst( axis ); - launcher.setConst( maxPairs ); - - - int num = numSmallAabbs; -#if 0 - int buffSize = launcher.getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; - for (int i=0;i<buffSize+1;i++) - { - unsigned char* ptr = (unsigned char*)&buf[i]; - *ptr = 0xff; - } - int actualWrite = launcher.serializeArguments(buf,buffSize); - - unsigned char* cptr = (unsigned char*)&buf[buffSize]; - // printf("buf[buffSize] = %d\n",*cptr); - - assert(buf[buffSize]==0xff);//check for buffer overrun - int* ptr = (int*)&buf[buffSize]; - - *ptr = num; - - FILE* f = fopen("m_sapKernelArgs.bin","wb"); - fwrite(buf,buffSize+sizeof(int),1,f); - fclose(f); -#endif// - - launcher.launch1D( num); - clFinish(m_queue); - - numPairs = m_pairCount.at(0); - if (numPairs>maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - m_pairCount.resize(0); - m_pairCount.push_back(maxPairs); - } - } - -#else - int numPairs = 0; - - - b3LauncherCL launcher(m_queue, m_sapKernel); - - const char* fileName = "m_sapKernelArgs.bin"; - FILE* f = fopen(fileName,"rb"); - if (f) - { - int sizeInBytes=0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*) malloc(sizeInBytes); - fread(buf,sizeInBytes,1,f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); - int num = *(int*)&buf[serializedBytes]; - launcher.launch1D( num); - - b3OpenCLArray<int> pairCount(m_context, m_queue); - int numElements = launcher.m_arrays[2]->size()/sizeof(int); - pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements); - numPairs = pairCount.at(0); - //printf("overlapping pairs = %d\n",numPairs); - b3AlignedObjectArray<b3Int4> hostOoverlappingPairs; - b3OpenCLArray<b3Int4> tmpGpuPairs(m_context,m_queue); - tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs ); - - tmpGpuPairs.copyToHost(hostOoverlappingPairs); - m_overlappingPairs.copyFromHost(hostOoverlappingPairs); - //printf("hello %d\n", m_overlappingPairs.size()); - free(buf); - fclose(f); - - } else { - printf("error: cannot find file %s\n",fileName); - } - - clFinish(m_queue); - - -#endif - - - m_overlappingPairs.resize(numPairs); - - }//B3_PROFILE("GPU_RADIX SORT"); - //init3dSap(); -} - -void b3GpuSapBroadphase::writeAabbsToGpu() -{ - m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); - m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); - - m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this - - - -} - -void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask) -{ - int index = userPtr; - b3SapAabb aabb; - for (int i=0;i<4;i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - } - aabb.m_minIndices[3] = index; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); - m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size()); - - m_allAabbsCPU.push_back(aabb); -} - -void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask) -{ - int index = userPtr; - b3SapAabb aabb; - for (int i=0;i<4;i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - } - aabb.m_minIndices[3] = index; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); - m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size()); - - - m_allAabbsCPU.push_back(aabb); -} - -cl_mem b3GpuSapBroadphase::getAabbBufferWS() -{ - return m_allAabbsGPU.getBufferCL(); -} - -int b3GpuSapBroadphase::getNumOverlap() -{ - return m_overlappingPairs.size(); -} -cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer() -{ - return m_overlappingPairs.getBufferCL(); -} - -b3OpenCLArray<b3Int4>& b3GpuSapBroadphase::getOverlappingPairsGPU() -{ - return m_overlappingPairs; -} -b3OpenCLArray<int>& b3GpuSapBroadphase::getSmallAabbIndicesGPU() -{ - return m_smallAabbsMappingGPU; -} -b3OpenCLArray<int>& b3GpuSapBroadphase::getLargeAabbIndicesGPU() -{ - return m_largeAabbsMappingGPU; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h deleted file mode 100644 index 8d36ac78f2..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ /dev/null @@ -1,151 +0,0 @@ -#ifndef B3_GPU_SAP_BROADPHASE_H -#define B3_GPU_SAP_BROADPHASE_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 -class b3Vector3; -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -#include "b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" - -#include "b3GpuBroadphaseInterface.h" - - -class b3GpuSapBroadphase : public b3GpuBroadphaseInterface -{ - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - cl_kernel m_flipFloatKernel; - cl_kernel m_scatterKernel ; - cl_kernel m_copyAabbsKernel; - cl_kernel m_sapKernel; - cl_kernel m_sap2Kernel; - cl_kernel m_prepareSumVarianceKernel; - - - class b3RadixSort32CL* m_sorter; - - ///test for 3d SAP - b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2]; - b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2]; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev; - - b3OpenCLArray<b3SortData> m_sortedAxisGPU0; - b3OpenCLArray<b3SortData> m_sortedAxisGPU1; - b3OpenCLArray<b3SortData> m_sortedAxisGPU2; - b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev; - b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev; - b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev; - - - b3OpenCLArray<b3Int4> m_addedHostPairsGPU; - b3OpenCLArray<b3Int4> m_removedHostPairsGPU; - b3OpenCLArray<int> m_addedCountGPU; - b3OpenCLArray<int> m_removedCountGPU; - - int m_currentBuffer; - -public: - - b3OpenCLArray<int> m_pairCount; - - - b3OpenCLArray<b3SapAabb> m_allAabbsGPU; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() - { - return m_allAabbsGPU; - } - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() - { - return m_allAabbsCPU; - } - - b3OpenCLArray<b3Vector3> m_sum; - b3OpenCLArray<b3Vector3> m_sum2; - b3OpenCLArray<b3Vector3> m_dst; - - b3OpenCLArray<int> m_smallAabbsMappingGPU; - b3AlignedObjectArray<int> m_smallAabbsMappingCPU; - - b3OpenCLArray<int> m_largeAabbsMappingGPU; - b3AlignedObjectArray<int> m_largeAabbsMappingCPU; - - - b3OpenCLArray<b3Int4> m_overlappingPairs; - - //temporary gpu work memory - b3OpenCLArray<b3SortData> m_gpuSmallSortData; - b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs; - - class b3PrefixScanFloat4CL* m_prefixScanFloat4; - - enum b3GpuSapKernelType - { - B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1, - B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU, - B3_GPU_SAP_KERNEL_ORIGINAL, - B3_GPU_SAP_KERNEL_BARRIER, - B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY - }; - - b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); - virtual ~b3GpuSapBroadphase(); - - static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU); - } - - static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU); - } - - static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL); - } - static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER); - } - static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); - } - - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - void reset(); - - void init3dSap(); - virtual void calculateOverlappingPairsHostIncremental3Sap(); - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual cl_mem getAabbBufferWS(); - virtual int getNumOverlap(); - virtual cl_mem getOverlappingPairBuffer(); - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU(); - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU(); - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU(); -}; - -#endif //B3_GPU_SAP_BROADPHASE_H
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h deleted file mode 100644 index ea6550fede..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef B3_SAP_AABB_H -#define B3_SAP_AABB_H - -#include "Bullet3Common/b3Scalar.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" - -///just make sure that the b3Aabb is 16-byte aligned -B3_ATTRIBUTE_ALIGNED16(struct) b3SapAabb : public b3Aabb -{ - -}; - - -#endif //B3_SAP_AABB_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl deleted file mode 100644 index ded4796d33..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl +++ /dev/null @@ -1,216 +0,0 @@ - - -int getPosHash(int4 gridPos, __global float4* pParams) -{ - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x &= gridDim.x - 1; - gridPos.y &= gridDim.y - 1; - gridPos.z &= gridDim.z - 1; - int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; - return hash; -} - -int4 getGridPos(float4 worldPos, __global float4* pParams) -{ - int4 gridPos; - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1); - gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1); - gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1); - return gridPos; -} - - -// calculate grid hash value for each body using its AABB -__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams ) -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - float4 bbMin = allpAABB[smallAabbMapping[index]*2]; - float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - pos.w = 0.f; - // get address in grid - int4 gridPos = getGridPos(pos, pParams); - int gridHash = getPosHash(gridPos, pParams); - // store grid hash and body index - int2 hashVal; - hashVal.x = gridHash; - hashVal.y = index; - pHash[index] = hashVal; -} - -__kernel void kClearCellStart( int numCells, - __global int* pCellStart ) -{ - int index = get_global_id(0); - if(index >= numCells) - { - return; - } - pCellStart[index] = -1; -} - -__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart ) -{ - __local int sharedHash[513]; - int index = get_global_id(0); - int2 sortedData; - - if(index < numObjects) - { - sortedData = pHash[index]; - // Load hash data into shared memory so that we can look - // at neighboring body's hash value without loading - // two hash values per thread - sharedHash[get_local_id(0) + 1] = sortedData.x; - if((index > 0) && (get_local_id(0) == 0)) - { - // first thread in block must load neighbor body hash - sharedHash[0] = pHash[index-1].x; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - if(index < numObjects) - { - if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)])) - { - cellStart[sortedData.x] = index; - } - } -} - -int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1) -{ - return (min0.x <= max1.x)&& (min1.x <= max0.x) && - (min0.y <= max1.y)&& (min1.y <= max0.y) && - (min0.z <= max1.z)&& (min1.z <= max0.z); -} - - - - -//search for AABB 'index' against other AABBs' in this cell -void findPairsInCell( int numObjects, - int4 gridPos, - int index, - __global int2* pHash, - __global int* pCellStart, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global float4* pParams, - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) -{ - int4 pGridDim = *((__global int4*)(pParams + 1)); - int maxBodiesPerCell = pGridDim.w; - int gridHash = getPosHash(gridPos, pParams); - // get start of bucket for this cell - int bucketStart = pCellStart[gridHash]; - if (bucketStart == -1) - { - return; // cell empty - } - // iterate over bodies in this cell - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - int handleIndex = as_int(min0.w); - - int bucketEnd = bucketStart + maxBodiesPerCell; - bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd; - for(int index2 = bucketStart; index2 < bucketEnd; index2++) - { - int2 cellData = pHash[index2]; - if (cellData.x != gridHash) - { - break; // no longer in same bucket - } - int unsorted_indx2 = cellData.y; - //if (unsorted_indx2 < unsorted_indx) // check not colliding with self - if (unsorted_indx2 != unsorted_indx) // check not colliding with self - { - float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0]; - float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1]; - if(testAABBOverlap(min0, max0, min1, max1)) - { - if (pairCount) - { - int handleIndex2 = as_int(min1.w); - if (handleIndex<handleIndex2) - { - int curPair = atomic_add(pairCount,1); - if (curPair<maxPairs) - { - int4 newpair; - newpair.x = handleIndex; - newpair.y = handleIndex2; - newpair.z = -1; - newpair.w = -1; - pPairBuff2[curPair] = newpair; - } - } - - } - } - } - } -} - -__kernel void kFindOverlappingPairs( int numObjects, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global int2* pHash, - __global int* pCellStart, - __global float4* pParams , - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) - -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - // get address in grid - int4 gridPosA = getGridPos(pos, pParams); - int4 gridPosB; - // examine only neighbouring cells - for(int z=-1; z<=1; z++) - { - gridPosB.z = gridPosA.z + z; - for(int y=-1; y<=1; y++) - { - gridPosB.y = gridPosA.y + y; - for(int x=-1; x<=1; x++) - { - gridPosB.x = gridPosA.x + x; - findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs); - } - } - } -} - - - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h deleted file mode 100644 index dad42477c3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h +++ /dev/null @@ -1,199 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* gridBroadphaseCL= \ -"int getPosHash(int4 gridPos, __global float4* pParams)\n" -"{\n" -" int4 gridDim = *((__global int4*)(pParams + 1));\n" -" gridPos.x &= gridDim.x - 1;\n" -" gridPos.y &= gridDim.y - 1;\n" -" gridPos.z &= gridDim.z - 1;\n" -" int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n" -" return hash;\n" -"} \n" -"int4 getGridPos(float4 worldPos, __global float4* pParams)\n" -"{\n" -" int4 gridPos;\n" -" int4 gridDim = *((__global int4*)(pParams + 1));\n" -" gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n" -" gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n" -" gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n" -" return gridPos;\n" -"}\n" -"// calculate grid hash value for each body using its AABB\n" -"__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n" -"{\n" -" int index = get_global_id(0);\n" -" if(index >= numObjects)\n" -" {\n" -" return;\n" -" }\n" -" float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n" -" float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n" -" float4 pos;\n" -" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" -" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" -" pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" -" pos.w = 0.f;\n" -" // get address in grid\n" -" int4 gridPos = getGridPos(pos, pParams);\n" -" int gridHash = getPosHash(gridPos, pParams);\n" -" // store grid hash and body index\n" -" int2 hashVal;\n" -" hashVal.x = gridHash;\n" -" hashVal.y = index;\n" -" pHash[index] = hashVal;\n" -"}\n" -"__kernel void kClearCellStart( int numCells, \n" -" __global int* pCellStart )\n" -"{\n" -" int index = get_global_id(0);\n" -" if(index >= numCells)\n" -" {\n" -" return;\n" -" }\n" -" pCellStart[index] = -1;\n" -"}\n" -"__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n" -"{\n" -" __local int sharedHash[513];\n" -" int index = get_global_id(0);\n" -" int2 sortedData;\n" -" if(index < numObjects)\n" -" {\n" -" sortedData = pHash[index];\n" -" // Load hash data into shared memory so that we can look \n" -" // at neighboring body's hash value without loading\n" -" // two hash values per thread\n" -" sharedHash[get_local_id(0) + 1] = sortedData.x;\n" -" if((index > 0) && (get_local_id(0) == 0))\n" -" {\n" -" // first thread in block must load neighbor body hash\n" -" sharedHash[0] = pHash[index-1].x;\n" -" }\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" if(index < numObjects)\n" -" {\n" -" if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n" -" {\n" -" cellStart[sortedData.x] = index;\n" -" }\n" -" }\n" -"}\n" -"int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n" -"{\n" -" return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n" -" (min0.y <= max1.y)&& (min1.y <= max0.y) && \n" -" (min0.z <= max1.z)&& (min1.z <= max0.z); \n" -"}\n" -"//search for AABB 'index' against other AABBs' in this cell\n" -"void findPairsInCell( int numObjects,\n" -" int4 gridPos,\n" -" int index,\n" -" __global int2* pHash,\n" -" __global int* pCellStart,\n" -" __global float4* allpAABB, \n" -" __global const int* smallAabbMapping,\n" -" __global float4* pParams,\n" -" volatile __global int* pairCount,\n" -" __global int4* pPairBuff2,\n" -" int maxPairs\n" -" )\n" -"{\n" -" int4 pGridDim = *((__global int4*)(pParams + 1));\n" -" int maxBodiesPerCell = pGridDim.w;\n" -" int gridHash = getPosHash(gridPos, pParams);\n" -" // get start of bucket for this cell\n" -" int bucketStart = pCellStart[gridHash];\n" -" if (bucketStart == -1)\n" -" {\n" -" return; // cell empty\n" -" }\n" -" // iterate over bodies in this cell\n" -" int2 sortedData = pHash[index];\n" -" int unsorted_indx = sortedData.y;\n" -" float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n" -" float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" -" int handleIndex = as_int(min0.w);\n" -" \n" -" int bucketEnd = bucketStart + maxBodiesPerCell;\n" -" bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n" -" for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n" -" {\n" -" int2 cellData = pHash[index2];\n" -" if (cellData.x != gridHash)\n" -" {\n" -" break; // no longer in same bucket\n" -" }\n" -" int unsorted_indx2 = cellData.y;\n" -" //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n" -" if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n" -" { \n" -" float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n" -" float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n" -" if(testAABBOverlap(min0, max0, min1, max1))\n" -" {\n" -" if (pairCount)\n" -" {\n" -" int handleIndex2 = as_int(min1.w);\n" -" if (handleIndex<handleIndex2)\n" -" {\n" -" int curPair = atomic_add(pairCount,1);\n" -" if (curPair<maxPairs)\n" -" {\n" -" int4 newpair;\n" -" newpair.x = handleIndex;\n" -" newpair.y = handleIndex2;\n" -" newpair.z = -1;\n" -" newpair.w = -1;\n" -" pPairBuff2[curPair] = newpair;\n" -" }\n" -" }\n" -" \n" -" }\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void kFindOverlappingPairs( int numObjects,\n" -" __global float4* allpAABB, \n" -" __global const int* smallAabbMapping,\n" -" __global int2* pHash, \n" -" __global int* pCellStart, \n" -" __global float4* pParams ,\n" -" volatile __global int* pairCount,\n" -" __global int4* pPairBuff2,\n" -" int maxPairs\n" -" )\n" -"{\n" -" int index = get_global_id(0);\n" -" if(index >= numObjects)\n" -" {\n" -" return;\n" -" }\n" -" int2 sortedData = pHash[index];\n" -" int unsorted_indx = sortedData.y;\n" -" float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n" -" float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" -" float4 pos;\n" -" pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" -" pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" -" pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" -" // get address in grid\n" -" int4 gridPosA = getGridPos(pos, pParams);\n" -" int4 gridPosB; \n" -" // examine only neighbouring cells\n" -" for(int z=-1; z<=1; z++) \n" -" {\n" -" gridPosB.z = gridPosA.z + z;\n" -" for(int y=-1; y<=1; y++) \n" -" {\n" -" gridPosB.y = gridPosA.y + y;\n" -" for(int x=-1; x<=1; x++) \n" -" {\n" -" gridPosB.x = gridPosA.x + x;\n" -" findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n" -" }\n" -" }\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl deleted file mode 100644 index c375b9bf37..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ /dev/null @@ -1,767 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -typedef float b3Scalar; -typedef float4 b3Vector3; -#define b3Max max -#define b3Min min -#define b3Sqrt sqrt - -typedef struct -{ - unsigned int m_key; - unsigned int m_value; -} SortDataCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} b3AabbCL; - - -unsigned int interleaveBits(unsigned int x) -{ - //........ ........ ......12 3456789A //x - //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits - - //......12 3456789A ......12 3456789A //x ^ (x << 16) - //11111111 ........ ........ 11111111 //0x FF 00 00 FF - //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF; - - //......12 ........ 3456789A 3456789A //x ^ (x << 8) - //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F - //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F; - - //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4) - //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3 - //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3; - - //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2) - //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49 - //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249; - - //........ ........ ......11 11111111 //0x000003FF - x &= 0x000003FF; //Clear all bits above bit 10 - - x = (x ^ (x << 16)) & 0xFF0000FF; - x = (x ^ (x << 8)) & 0x0300F00F; - x = (x ^ (x << 4)) & 0x030C30C3; - x = (x ^ (x << 2)) & 0x09249249; - - return x; -} -unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z) -{ - return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2; -} - -__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate) -{ - int separatedAabbIndex = get_global_id(0); - if(separatedAabbIndex >= numAabbsToSeparate) return; - - int unseparatedAabbIndex = aabbIndices[separatedAabbIndex]; - out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex]; -} - -//Should replace with an optimized parallel reduction -__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge) -{ - //Each time this kernel is added to the command queue, - //the number of AABBs needing to be merged is halved - // - //Example with 159 AABBs: - // numRemainingAabbs == 159 / 2 + 159 % 2 == 80 - // numMergedAabbs == 159 - 80 == 79 - //So, indices [0, 78] are merged with [0 + 80, 78 + 80] - - int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2; - int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs; - - int aabbIndex = get_global_id(0); - if(aabbIndex >= numMergedAabbs) return; - - int otherAabbIndex = aabbIndex + numRemainingAabbs; - - b3AabbCL aabb = out_mergedAabb[aabbIndex]; - b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min); - mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max); - out_mergedAabb[aabbIndex] = mergedAabb; -} - -__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, - __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs) -{ - int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index - if(leafNodeIndex >= numAabbs) return; - - b3AabbCL mergedAabb = mergedAabbOfAllNodes[0]; - b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f; - b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024; - - b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex]; - b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f; - b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter; - - //Quantize into integer coordinates - //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size - b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize; - - int4 discretePosition; - discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) ); - discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) ); - discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) ); - - //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023] - discretePosition = b3Max( -512, b3Min(discretePosition, 511) ); - discretePosition += 512; - - //Interleave bits(assign a morton code, also known as a z-curve) - unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z); - - // - SortDataCL mortonCodeIndexPair; - mortonCodeIndexPair.m_key = mortonCode; - mortonCodeIndexPair.m_value = leafNodeIndex; - - out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair; -} - -#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128 - -//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes. -//If it is set, then the index is for an internal node; otherwise, it is a leaf node. -//In both cases, the bit should be cleared to access the actual node index. -int isLeafNode(int index) { return (index >> 31 == 0); } -int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); } -int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); } - -//From sap.cl -#define NEW_PAIR_MARKER -1 - -bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -//From sap.cl - -__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - - __global SortDataCL* mortonCodesAndAabbIndices, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numQueryAabbs) -{ - //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since - //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent) - int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(queryBvhNodeIndex >= numQueryAabbs) return; - - int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value; - b3AabbCL queryAabb = rigidAabbs[queryRigidIndex]; - - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //Optimization - if the BVH is structured as a binary radix tree, then - //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]). - //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself. - { - int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y; - if(highestLeafIndex <= queryBvhNodeIndex) continue; - } - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) ) - { - if(isLeaf) - { - int4 pair; - pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3]; - pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - - } -} - - -//From rayCastKernels.cl -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; -//From rayCastKernels.cl - -b3Vector3 b3Vector3_normalize(b3Vector3 v) -{ - b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f}; - return normalize(normal); //OpenCL normalize == vector4 normalize -} -b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; } -b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } - -int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) -{ - //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ). - //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane. - // - //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane - //and min.x will be the far plane; otherwise, it is reversed. - // - //In order for there to be a collision, the t_min and t_max of each pair must overlap. - //This can be tested for by selecting the highest t_min and lowest t_max and comparing them. - - int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y) - - //When using vector types, the select() function checks the most signficant bit, - //but isless() sets the least significant bit. - isNegative <<= 31; - - //select(b, a, condition) == condition ? a : b - //When using select() with vector types, (condition[i]) is true if its most significant bit is 1 - b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection; - b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection; - - b3Scalar t_min_final = 0.0f; - b3Scalar t_max_final = rayLength; - - //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. - //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4]) - //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN. - t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) ); - t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) ); - - return (t_min_final <= t_max_final); -} - -__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - __global SortDataCL* mortonCodesAndAabbIndices, - - __global b3RayInfo* rays, - - __global int* out_numRayRigidPairs, - __global int2* out_rayRigidPairs, - int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - // - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - // - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) ) - { - if(isLeaf) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - } -} - -__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids) -{ - int smallAabbIndex = get_global_id(0); - if(smallAabbIndex >= numSmallAabbRigids) return; - - b3AabbCL smallAabb = smallAabbs[smallAabbIndex]; - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL largeAabb = largeAabbs[i]; - if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) ) - { - int4 pair; - pair.x = largeAabb.m_minIndices[3]; - pair.y = smallAabb.m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - } -} -__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays, - __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs, - int numLargeAabbRigids, int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL rigidAabb = largeRigidAabbs[i]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) ) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabb.m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - } -} - - -//Set so that it is always greater than the actual common prefixes, and never selected as a parent node. -//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve. -//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node. -//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve). -#define B3_PLBVH_INVALID_COMMON_PREFIX 128 - -#define B3_PLBVH_ROOT_NODE_MARKER -1 - -#define b3Int64 long - -int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); } -b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) -{ - //This function only needs to return (i & j) in order for the algorithm to work, - //but it may help with debugging to mask out the lower bits. - - b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j); - - b3Int64 sharedBits = i & j; - b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0 - - return sharedBits & bitmask; -} - -//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths -int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB) -{ - return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) ); -} - -__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices, - __global b3Int64* out_commonPrefixes, - __global int* out_commonPrefixLengths, - int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if (internalNodeIndex >= numInternalNodes) return; - - //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index, - //and the number of internal nodes is always numLeafNodes - 1 - int leftLeafIndex = internalNodeIndex; - int rightLeafIndex = internalNodeIndex + 1; - - int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key; - int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key; - - //Binary radix tree construction algorithm does not work if there are duplicate morton codes. - //Append the index of each leaf node to each morton code so that there are no duplicates. - //The algorithm also requires that the morton codes are sorted in ascending order; this requirement - //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true. - // - //upsample(a, b) == ( ((b3Int64)a) << 32) | b - b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex); - b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex); - - out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); - out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); -} - - -__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes, - __global int2* out_childNodes, int numLeafNodes) -{ - int leafNodeIndex = get_global_id(0); - if (leafNodeIndex >= numLeafNodes) return; - - int numInternalNodes = numLeafNodes - 1; - - int leftSplitIndex = leafNodeIndex - 1; - int rightSplitIndex = leafNodeIndex; - - int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - //Parent node is the highest adjacent common prefix that is lower than the node's common prefix - //Leaf nodes are considered as having the highest common prefix - int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix); - - //Handle cases for the edge nodes; the first and last node - //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX - if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false; - if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true; - - int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex; - out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex; - - int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - int isLeaf = 1; - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex); -} - -__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths, - __global int2* out_childNodes, - __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex, - int numInternalNodes) -{ - int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - b3Int64 nodePrefix = commonPrefixes[internalNodeIndex]; - int nodePrefixLength = commonPrefixLengths[internalNodeIndex]; - -//#define USE_LINEAR_SEARCH -#ifdef USE_LINEAR_SEARCH - int leftIndex = -1; - int rightIndex = -1; - - //Find nearest element to left with a lower common prefix - for(int i = internalNodeIndex - 1; i >= 0; --i) - { - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - leftIndex = i; - break; - } - } - - //Find nearest element to right with a lower common prefix - for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i) - { - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - rightIndex = i; - break; - } - } - -#else //Use binary search - - //Find nearest element to left with a lower common prefix - int leftIndex = -1; - { - int lower = 0; - int upper = internalNodeIndex - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int right = mid + 1; - if(right < internalNodeIndex) - { - b3Int64 rightPrefix = commonPrefixes[right]; - int rightPrefixLength = commonPrefixLengths[right]; - - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - lower = right; - leftIndex = right; - } - else - { - leftIndex = mid; - break; - } - } - else - { - leftIndex = mid; - break; - } - } - else upper = mid - 1; - } - } - - //Find nearest element to right with a lower common prefix - int rightIndex = -1; - { - int lower = internalNodeIndex + 1; - int upper = numInternalNodes - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int left = mid - 1; - if(left > internalNodeIndex) - { - b3Int64 leftPrefix = commonPrefixes[left]; - int leftPrefixLength = commonPrefixLengths[left]; - - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - upper = left; - rightIndex = left; - } - else - { - rightIndex = mid; - break; - } - } - else - { - rightIndex = mid; - break; - } - } - else lower = mid + 1; - } - } -#endif - - //Select parent - { - int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength); - - if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false; - else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true; - - int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex; - - int isRootNode = (leftIndex == -1 && rightIndex == -1); - out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER; - - int isLeaf = 0; - if(!isRootNode) - { - int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } - else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } -} - -__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes, - __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes) -{ - if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0); - - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - // - int distanceFromRoot = 0; - { - int parentIndex = internalNodeParentNodes[internalNodeIndex]; - while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER) - { - parentIndex = internalNodeParentNodes[parentIndex]; - ++distanceFromRoot; - } - } - out_distanceFromRoot[internalNodeIndex] = distanceFromRoot; - - // - __local int localMaxDistanceFromRoot; - if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0; - barrier(CLK_LOCAL_MEM_FENCE); - - atomic_max(&localMaxDistanceFromRoot, distanceFromRoot); - barrier(CLK_LOCAL_MEM_FENCE); - - if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot); -} - -__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices, - __global int2* childNodes, - __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs, - int maxDistanceFromRoot, int processedDistance, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int distance = distanceFromRoot[internalNodeIndex]; - - if(distance == processedDistance) - { - int leftChildIndex = childNodes[internalNodeIndex].x; - int rightChildIndex = childNodes[internalNodeIndex].y; - - int isLeftChildLeaf = isLeafNode(leftChildIndex); - int isRightChildLeaf = isLeafNode(rightChildIndex); - - leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex); - rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex); - - //leftRigidIndex/rightRigidIndex is not used if internal node - int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1; - int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1; - - b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex]; - b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min); - mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max); - internalNodeAabbs[internalNodeIndex] = mergedAabb; - } -} - -__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int numLeafNodes = numInternalNodes + 1; - - int2 childNodes = internalNodeChildNodes[internalNodeIndex]; - - int2 leafIndexRange; //x == min leaf index, y == max leaf index - - //Find lowest leaf index covered by this internal node - { - int lowestIndex = childNodes.x; //childNodes.x == Left child - while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x; - leafIndexRange.x = lowestIndex; - } - - //Find highest leaf index covered by this internal node - { - int highestIndex = childNodes.y; //childNodes.y == Right child - while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y; - leafIndexRange.y = highestIndex; - } - - // - out_leafIndexRanges[internalNodeIndex] = leafIndexRange; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h deleted file mode 100644 index 5eb8f45b16..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ /dev/null @@ -1,729 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* parallelLinearBvhCL= \ -"/*\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Initial Author Jackson Lee, 2014\n" -"typedef float b3Scalar;\n" -"typedef float4 b3Vector3;\n" -"#define b3Max max\n" -"#define b3Min min\n" -"#define b3Sqrt sqrt\n" -"typedef struct\n" -"{\n" -" unsigned int m_key;\n" -" unsigned int m_value;\n" -"} SortDataCL;\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} b3AabbCL;\n" -"unsigned int interleaveBits(unsigned int x)\n" -"{\n" -" //........ ........ ......12 3456789A //x\n" -" //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n" -" \n" -" //......12 3456789A ......12 3456789A //x ^ (x << 16)\n" -" //11111111 ........ ........ 11111111 //0x FF 00 00 FF\n" -" //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;\n" -" \n" -" //......12 ........ 3456789A 3456789A //x ^ (x << 8)\n" -" //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F\n" -" //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;\n" -" \n" -" //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)\n" -" //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3\n" -" //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;\n" -" \n" -" //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)\n" -" //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49\n" -" //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;\n" -" \n" -" //........ ........ ......11 11111111 //0x000003FF\n" -" x &= 0x000003FF; //Clear all bits above bit 10\n" -" \n" -" x = (x ^ (x << 16)) & 0xFF0000FF;\n" -" x = (x ^ (x << 8)) & 0x0300F00F;\n" -" x = (x ^ (x << 4)) & 0x030C30C3;\n" -" x = (x ^ (x << 2)) & 0x09249249;\n" -" \n" -" return x;\n" -"}\n" -"unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n" -"{\n" -" return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n" -"}\n" -"__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n" -"{\n" -" int separatedAabbIndex = get_global_id(0);\n" -" if(separatedAabbIndex >= numAabbsToSeparate) return;\n" -" int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n" -" out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n" -"}\n" -"//Should replace with an optimized parallel reduction\n" -"__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n" -"{\n" -" //Each time this kernel is added to the command queue, \n" -" //the number of AABBs needing to be merged is halved\n" -" //\n" -" //Example with 159 AABBs:\n" -" // numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n" -" // numMergedAabbs == 159 - 80 == 79\n" -" //So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n" -" \n" -" int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n" -" int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n" -" \n" -" int aabbIndex = get_global_id(0);\n" -" if(aabbIndex >= numMergedAabbs) return;\n" -" \n" -" int otherAabbIndex = aabbIndex + numRemainingAabbs;\n" -" \n" -" b3AabbCL aabb = out_mergedAabb[aabbIndex];\n" -" b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n" -" \n" -" b3AabbCL mergedAabb;\n" -" mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n" -" mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n" -" out_mergedAabb[aabbIndex] = mergedAabb;\n" -"}\n" -"__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n" -" __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n" -"{\n" -" int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n" -" if(leafNodeIndex >= numAabbs) return;\n" -" \n" -" b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n" -" b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n" -" b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n" -" \n" -" b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n" -" b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n" -" b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n" -" \n" -" //Quantize into integer coordinates\n" -" //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n" -" b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n" -" \n" -" int4 discretePosition;\n" -" discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n" -" discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n" -" discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n" -" \n" -" //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n" -" discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n" -" discretePosition += 512;\n" -" \n" -" //Interleave bits(assign a morton code, also known as a z-curve)\n" -" unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n" -" \n" -" //\n" -" SortDataCL mortonCodeIndexPair;\n" -" mortonCodeIndexPair.m_key = mortonCode;\n" -" mortonCodeIndexPair.m_value = leafNodeIndex;\n" -" \n" -" out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n" -"}\n" -"#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n" -"//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n" -"//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n" -"//In both cases, the bit should be cleared to access the actual node index.\n" -"int isLeafNode(int index) { return (index >> 31 == 0); }\n" -"int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n" -"int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n" -"//From sap.cl\n" -"#define NEW_PAIR_MARKER -1\n" -"bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"//From sap.cl\n" -"__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n" -" __global int* rootNodeIndex, \n" -" __global int2* internalNodeChildIndices, \n" -" __global b3AabbCL* internalNodeAabbs,\n" -" __global int2* internalNodeLeafIndexRanges,\n" -" \n" -" __global SortDataCL* mortonCodesAndAabbIndices,\n" -" __global int* out_numPairs, __global int4* out_overlappingPairs, \n" -" int maxPairs, int numQueryAabbs)\n" -"{\n" -" //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n" -" //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n" -" int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" -" if(queryBvhNodeIndex >= numQueryAabbs) return;\n" -" \n" -" int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n" -" b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n" -" \n" -" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" -" \n" -" int stackSize = 1;\n" -" stack[0] = *rootNodeIndex;\n" -" \n" -" while(stackSize)\n" -" {\n" -" int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" -" --stackSize;\n" -" \n" -" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" -" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" -" \n" -" //Optimization - if the BVH is structured as a binary radix tree, then\n" -" //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n" -" //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n" -" {\n" -" int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n" -" if(highestLeafIndex <= queryBvhNodeIndex) continue;\n" -" }\n" -" \n" -" //bvhRigidIndex is not used if internal node\n" -" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" -" \n" -" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" -" if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n" -" {\n" -" if(isLeaf)\n" -" {\n" -" int4 pair;\n" -" pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n" -" pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" -" pair.z = NEW_PAIR_MARKER;\n" -" pair.w = NEW_PAIR_MARKER;\n" -" \n" -" int pairIndex = atomic_inc(out_numPairs);\n" -" if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" -" }\n" -" \n" -" if(!isLeaf) //Internal node\n" -" {\n" -" if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" -" {\n" -" //Error\n" -" }\n" -" else\n" -" {\n" -" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" -" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" -" }\n" -" }\n" -" }\n" -" \n" -" }\n" -"}\n" -"//From rayCastKernels.cl\n" -"typedef struct\n" -"{\n" -" float4 m_from;\n" -" float4 m_to;\n" -"} b3RayInfo;\n" -"//From rayCastKernels.cl\n" -"b3Vector3 b3Vector3_normalize(b3Vector3 v)\n" -"{\n" -" b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n" -" return normalize(normal); //OpenCL normalize == vector4 normalize\n" -"}\n" -"b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n" -"b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n" -"int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" -"{\n" -" //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n" -" //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n" -" //\n" -" //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n" -" //and min.x will be the far plane; otherwise, it is reversed.\n" -" //\n" -" //In order for there to be a collision, the t_min and t_max of each pair must overlap.\n" -" //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n" -" \n" -" int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)\n" -" \n" -" //When using vector types, the select() function checks the most signficant bit, \n" -" //but isless() sets the least significant bit.\n" -" isNegative <<= 31;\n" -" //select(b, a, condition) == condition ? a : b\n" -" //When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n" -" b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" -" b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" -" \n" -" b3Scalar t_min_final = 0.0f;\n" -" b3Scalar t_max_final = rayLength;\n" -" \n" -" //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n" -" //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n" -" //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n" -" t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n" -" t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n" -" \n" -" return (t_min_final <= t_max_final);\n" -"}\n" -"__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" -" __global int* rootNodeIndex, \n" -" __global int2* internalNodeChildIndices, \n" -" __global b3AabbCL* internalNodeAabbs,\n" -" __global int2* internalNodeLeafIndexRanges,\n" -" __global SortDataCL* mortonCodesAndAabbIndices,\n" -" \n" -" __global b3RayInfo* rays,\n" -" \n" -" __global int* out_numRayRigidPairs, \n" -" __global int2* out_rayRigidPairs,\n" -" int maxRayRigidPairs, int numRays)\n" -"{\n" -" int rayIndex = get_global_id(0);\n" -" if(rayIndex >= numRays) return;\n" -" \n" -" //\n" -" b3Vector3 rayFrom = rays[rayIndex].m_from;\n" -" b3Vector3 rayTo = rays[rayIndex].m_to;\n" -" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" -" b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" -" \n" -" //\n" -" int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" -" \n" -" int stackSize = 1;\n" -" stack[0] = *rootNodeIndex;\n" -" \n" -" while(stackSize)\n" -" {\n" -" int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" -" --stackSize;\n" -" \n" -" int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" -" int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" -" \n" -" //bvhRigidIndex is not used if internal node\n" -" int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" -" \n" -" b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" -" if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )\n" -" {\n" -" if(isLeaf)\n" -" {\n" -" int2 rayRigidPair;\n" -" rayRigidPair.x = rayIndex;\n" -" rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" -" \n" -" int pairIndex = atomic_inc(out_numRayRigidPairs);\n" -" if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" -" }\n" -" \n" -" if(!isLeaf) //Internal node\n" -" {\n" -" if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" -" {\n" -" //Error\n" -" }\n" -" else\n" -" {\n" -" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" -" stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" -" }\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n" -" __global int* out_numPairs, __global int4* out_overlappingPairs, \n" -" int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n" -"{\n" -" int smallAabbIndex = get_global_id(0);\n" -" if(smallAabbIndex >= numSmallAabbRigids) return;\n" -" \n" -" b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n" -" for(int i = 0; i < numLargeAabbRigids; ++i)\n" -" {\n" -" b3AabbCL largeAabb = largeAabbs[i];\n" -" if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n" -" {\n" -" int4 pair;\n" -" pair.x = largeAabb.m_minIndices[3];\n" -" pair.y = smallAabb.m_minIndices[3];\n" -" pair.z = NEW_PAIR_MARKER;\n" -" pair.w = NEW_PAIR_MARKER;\n" -" \n" -" int pairIndex = atomic_inc(out_numPairs);\n" -" if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" -" }\n" -" }\n" -"}\n" -"__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n" -" __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n" -" int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n" -"{\n" -" int rayIndex = get_global_id(0);\n" -" if(rayIndex >= numRays) return;\n" -" \n" -" b3Vector3 rayFrom = rays[rayIndex].m_from;\n" -" b3Vector3 rayTo = rays[rayIndex].m_to;\n" -" b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" -" b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" -" \n" -" for(int i = 0; i < numLargeAabbRigids; ++i)\n" -" {\n" -" b3AabbCL rigidAabb = largeRigidAabbs[i];\n" -" if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n" -" {\n" -" int2 rayRigidPair;\n" -" rayRigidPair.x = rayIndex;\n" -" rayRigidPair.y = rigidAabb.m_minIndices[3];\n" -" \n" -" int pairIndex = atomic_inc(out_numRayRigidPairs);\n" -" if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" -" }\n" -" }\n" -"}\n" -"//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n" -"//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n" -"//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n" -"//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n" -"#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n" -"#define B3_PLBVH_ROOT_NODE_MARKER -1\n" -"#define b3Int64 long\n" -"int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n" -"b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n" -"{\n" -" //This function only needs to return (i & j) in order for the algorithm to work,\n" -" //but it may help with debugging to mask out the lower bits.\n" -" b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n" -" b3Int64 sharedBits = i & j;\n" -" b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0\n" -" \n" -" return sharedBits & bitmask;\n" -"}\n" -"//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n" -"int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n" -"{\n" -" return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n" -"}\n" -"__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n" -" __global b3Int64* out_commonPrefixes,\n" -" __global int* out_commonPrefixLengths,\n" -" int numInternalNodes)\n" -"{\n" -" int internalNodeIndex = get_global_id(0);\n" -" if (internalNodeIndex >= numInternalNodes) return;\n" -" \n" -" //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n" -" //and the number of internal nodes is always numLeafNodes - 1\n" -" int leftLeafIndex = internalNodeIndex;\n" -" int rightLeafIndex = internalNodeIndex + 1;\n" -" \n" -" int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n" -" int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n" -" \n" -" //Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n" -" //Append the index of each leaf node to each morton code so that there are no duplicates.\n" -" //The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n" -" //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n" -" //\n" -" //upsample(a, b) == ( ((b3Int64)a) << 32) | b\n" -" b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n" -" b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n" -" \n" -" out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" -" out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" -"}\n" -"__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n" -" __global int2* out_childNodes, int numLeafNodes)\n" -"{\n" -" int leafNodeIndex = get_global_id(0);\n" -" if (leafNodeIndex >= numLeafNodes) return;\n" -" \n" -" int numInternalNodes = numLeafNodes - 1;\n" -" \n" -" int leftSplitIndex = leafNodeIndex - 1;\n" -" int rightSplitIndex = leafNodeIndex;\n" -" \n" -" int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" -" int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" -" \n" -" //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n" -" //Leaf nodes are considered as having the highest common prefix\n" -" int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n" -" \n" -" //Handle cases for the edge nodes; the first and last node\n" -" //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n" -" if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n" -" if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n" -" \n" -" int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n" -" out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n" -" \n" -" int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa\n" -" \n" -" //out_childNodesAsInt[0] == int2.x == left child\n" -" //out_childNodesAsInt[1] == int2.y == right child\n" -" int isLeaf = 1;\n" -" __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" -" out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n" -"}\n" -"__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n" -" __global int2* out_childNodes,\n" -" __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n" -" int numInternalNodes)\n" -"{\n" -" int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" -" if(internalNodeIndex >= numInternalNodes) return;\n" -" \n" -" b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n" -" int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n" -" \n" -"//#define USE_LINEAR_SEARCH\n" -"#ifdef USE_LINEAR_SEARCH\n" -" int leftIndex = -1;\n" -" int rightIndex = -1;\n" -" \n" -" //Find nearest element to left with a lower common prefix\n" -" for(int i = internalNodeIndex - 1; i >= 0; --i)\n" -" {\n" -" int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" -" if(nodeLeftSharedPrefixLength < nodePrefixLength)\n" -" {\n" -" leftIndex = i;\n" -" break;\n" -" }\n" -" }\n" -" \n" -" //Find nearest element to right with a lower common prefix\n" -" for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n" -" {\n" -" int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" -" if(nodeRightSharedPrefixLength < nodePrefixLength)\n" -" {\n" -" rightIndex = i;\n" -" break;\n" -" }\n" -" }\n" -" \n" -"#else //Use binary search\n" -" //Find nearest element to left with a lower common prefix\n" -" int leftIndex = -1;\n" -" {\n" -" int lower = 0;\n" -" int upper = internalNodeIndex - 1;\n" -" \n" -" while(lower <= upper)\n" -" {\n" -" int mid = (lower + upper) / 2;\n" -" b3Int64 midPrefix = commonPrefixes[mid];\n" -" int midPrefixLength = commonPrefixLengths[mid];\n" -" \n" -" int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" -" if(nodeMidSharedPrefixLength < nodePrefixLength) \n" -" {\n" -" int right = mid + 1;\n" -" if(right < internalNodeIndex)\n" -" {\n" -" b3Int64 rightPrefix = commonPrefixes[right];\n" -" int rightPrefixLength = commonPrefixLengths[right];\n" -" \n" -" int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n" -" if(nodeRightSharedPrefixLength < nodePrefixLength) \n" -" {\n" -" lower = right;\n" -" leftIndex = right;\n" -" }\n" -" else \n" -" {\n" -" leftIndex = mid;\n" -" break;\n" -" }\n" -" }\n" -" else \n" -" {\n" -" leftIndex = mid;\n" -" break;\n" -" }\n" -" }\n" -" else upper = mid - 1;\n" -" }\n" -" }\n" -" \n" -" //Find nearest element to right with a lower common prefix\n" -" int rightIndex = -1;\n" -" {\n" -" int lower = internalNodeIndex + 1;\n" -" int upper = numInternalNodes - 1;\n" -" \n" -" while(lower <= upper)\n" -" {\n" -" int mid = (lower + upper) / 2;\n" -" b3Int64 midPrefix = commonPrefixes[mid];\n" -" int midPrefixLength = commonPrefixLengths[mid];\n" -" \n" -" int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" -" if(nodeMidSharedPrefixLength < nodePrefixLength) \n" -" {\n" -" int left = mid - 1;\n" -" if(left > internalNodeIndex)\n" -" {\n" -" b3Int64 leftPrefix = commonPrefixes[left];\n" -" int leftPrefixLength = commonPrefixLengths[left];\n" -" \n" -" int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n" -" if(nodeLeftSharedPrefixLength < nodePrefixLength) \n" -" {\n" -" upper = left;\n" -" rightIndex = left;\n" -" }\n" -" else \n" -" {\n" -" rightIndex = mid;\n" -" break;\n" -" }\n" -" }\n" -" else \n" -" {\n" -" rightIndex = mid;\n" -" break;\n" -" }\n" -" }\n" -" else lower = mid + 1;\n" -" }\n" -" }\n" -"#endif\n" -" \n" -" //Select parent\n" -" {\n" -" int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" -" int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" -" \n" -" int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n" -" \n" -" if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n" -" else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n" -" \n" -" int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n" -" \n" -" int isRootNode = (leftIndex == -1 && rightIndex == -1);\n" -" out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n" -" \n" -" int isLeaf = 0;\n" -" if(!isRootNode)\n" -" {\n" -" int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa\n" -" \n" -" //out_childNodesAsInt[0] == int2.x == left child\n" -" //out_childNodesAsInt[1] == int2.y == right child\n" -" __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" -" out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" -" }\n" -" else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" -" }\n" -"}\n" -"__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n" -" __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n" -"{\n" -" if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n" -" int internalNodeIndex = get_global_id(0);\n" -" if(internalNodeIndex >= numInternalNodes) return;\n" -" \n" -" //\n" -" int distanceFromRoot = 0;\n" -" {\n" -" int parentIndex = internalNodeParentNodes[internalNodeIndex];\n" -" while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n" -" {\n" -" parentIndex = internalNodeParentNodes[parentIndex];\n" -" ++distanceFromRoot;\n" -" }\n" -" }\n" -" out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n" -" \n" -" //\n" -" __local int localMaxDistanceFromRoot;\n" -" if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n" -"}\n" -"__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n" -" __global int2* childNodes,\n" -" __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n" -" int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n" -"{\n" -" int internalNodeIndex = get_global_id(0);\n" -" if(internalNodeIndex >= numInternalNodes) return;\n" -" \n" -" int distance = distanceFromRoot[internalNodeIndex];\n" -" \n" -" if(distance == processedDistance)\n" -" {\n" -" int leftChildIndex = childNodes[internalNodeIndex].x;\n" -" int rightChildIndex = childNodes[internalNodeIndex].y;\n" -" \n" -" int isLeftChildLeaf = isLeafNode(leftChildIndex);\n" -" int isRightChildLeaf = isLeafNode(rightChildIndex);\n" -" \n" -" leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n" -" rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n" -" \n" -" //leftRigidIndex/rightRigidIndex is not used if internal node\n" -" int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n" -" int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n" -" \n" -" b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n" -" b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n" -" \n" -" b3AabbCL mergedAabb;\n" -" mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n" -" mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n" -" internalNodeAabbs[internalNodeIndex] = mergedAabb;\n" -" }\n" -"}\n" -"__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n" -"{\n" -" int internalNodeIndex = get_global_id(0);\n" -" if(internalNodeIndex >= numInternalNodes) return;\n" -" \n" -" int numLeafNodes = numInternalNodes + 1;\n" -" \n" -" int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n" -" \n" -" int2 leafIndexRange; //x == min leaf index, y == max leaf index\n" -" \n" -" //Find lowest leaf index covered by this internal node\n" -" {\n" -" int lowestIndex = childNodes.x; //childNodes.x == Left child\n" -" while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n" -" leafIndexRange.x = lowestIndex;\n" -" }\n" -" \n" -" //Find highest leaf index covered by this internal node\n" -" {\n" -" int highestIndex = childNodes.y; //childNodes.y == Right child\n" -" while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n" -" leafIndexRange.y = highestIndex;\n" -" }\n" -" \n" -" //\n" -" out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl deleted file mode 100644 index 93f77a6433..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl +++ /dev/null @@ -1,389 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define NEW_PAIR_MARKER -1 - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -/// conservative test for overlap between two aabbs -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2); -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - - -__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numUnsortedAabbs) - return; - - int j = get_global_id(1); - if (j>=numUnSortedAabbs2) - return; - - - __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]]; - __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]]; - - if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2)) - { - int4 myPair; - - int xIndex = unsortedAabbPtr[0].m_minIndices[3]; - int yIndex = unsortedAabbPtr2[0].m_minIndices[3]; - if (xIndex>yIndex) - { - int tmp = xIndex; - xIndex=yIndex; - yIndex=tmp; - } - - myPair.x = xIndex; - myPair.y = yIndex; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } -} - - - -__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - for (int j=i+1;j<numObjects;j++) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } -} - -__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - for (int j=i+1;j<numObjects;j++) - { - if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) - { - break; - } - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } -} - - - - -__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - int localId = get_local_id(0); - - __local int numActiveWgItems[1]; - __local int breakRequest[1]; - - if (localId==0) - { - numActiveWgItems[0] = 0; - breakRequest[0] = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - atomic_inc(numActiveWgItems); - barrier(CLK_LOCAL_MEM_FENCE); - int localBreak = 0; - - int j=i+1; - do - { - barrier(CLK_LOCAL_MEM_FENCE); - - if (j<numObjects) - { - if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) - { - if (!localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (j>=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } - j++; - - } while (breakRequest[0]<numActiveWgItems[0]); -} - - -__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - int localId = get_local_id(0); - - __local int numActiveWgItems[1]; - __local int breakRequest[1]; - __local btAabbCL localAabbs[128];// = aabbs[i]; - - btAabbCL myAabb; - - myAabb = (i<numObjects)? aabbs[i]:aabbs[0]; - float testValue = myAabb.m_maxElems[axis]; - - if (localId==0) - { - numActiveWgItems[0] = 0; - breakRequest[0] = 0; - } - int localCount=0; - int block=0; - localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0]; - localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0]; - - barrier(CLK_LOCAL_MEM_FENCE); - atomic_inc(numActiveWgItems); - barrier(CLK_LOCAL_MEM_FENCE); - int localBreak = 0; - - int j=i+1; - do - { - barrier(CLK_LOCAL_MEM_FENCE); - - if (j<numObjects) - { - if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) - { - if (!localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (j>=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1])) - { - int4 myPair; - myPair.x = myAabb.m_minIndices[3]; - myPair.y = localAabbs[localCount+localId+1].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - localCount++; - if (localCount==64) - { - localCount = 0; - block+=64; - localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0]; - localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0]; - } - j++; - - } while (breakRequest[0]<numActiveWgItems[0]); - -} - - - - -//http://stereopsis.com/radix.html -unsigned int FloatFlip(float fl); -unsigned int FloatFlip(float fl) -{ - unsigned int f = *(unsigned int*)&fl; - unsigned int mask = -(int)(f >> 31) | 0x80000000; - return f ^ mask; -} -float IFloatFlip(unsigned int f); -float IFloatFlip(unsigned int f) -{ - unsigned int mask = ((f >> 31) - 1) | 0x80000000; - unsigned int fl = f ^ mask; - return *(float*)&fl; -} - - - - -__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - int src = destAabbs[i].m_maxIndices[3]; - destAabbs[i] = allAabbs[src]; - destAabbs[i].m_maxIndices[3] = src; -} - - -__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - - sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]); - sortData[i].y = i; - -} - - -__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]]; -} - - - -__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs) -{ - int i = get_global_id(0); - if (i>=numAabbs) - return; - - btAabbCL smallAabb = allAabbs[smallAabbMapping[i]]; - - float4 s; - s = (smallAabb.m_max+smallAabb.m_min)*0.5f; - sum[i]=s; - sum2[i]=s*s; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h deleted file mode 100644 index 04d40fcf26..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ /dev/null @@ -1,342 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* sapCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#define NEW_PAIR_MARKER -1\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} btAabbCL;\n" -"/// conservative test for overlap between two aabbs\n" -"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" -"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" -"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" -"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numUnsortedAabbs)\n" -" return;\n" -" int j = get_global_id(1);\n" -" if (j>=numUnSortedAabbs2)\n" -" return;\n" -" __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" -" __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" -" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" -" {\n" -" int4 myPair;\n" -" \n" -" int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" -" int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" -" if (xIndex>yIndex)\n" -" {\n" -" int tmp = xIndex;\n" -" xIndex=yIndex;\n" -" yIndex=tmp;\n" -" }\n" -" \n" -" myPair.x = xIndex;\n" -" myPair.y = yIndex;\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -"}\n" -"__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" for (int j=i+1;j<numObjects;j++)\n" -" {\n" -" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = aabbs[i].m_minIndices[3];\n" -" myPair.y = aabbs[j].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" for (int j=i+1;j<numObjects;j++)\n" -" {\n" -" if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" -" {\n" -" break;\n" -" }\n" -" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = aabbs[i].m_minIndices[3];\n" -" myPair.y = aabbs[j].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int localId = get_local_id(0);\n" -" __local int numActiveWgItems[1];\n" -" __local int breakRequest[1];\n" -" if (localId==0)\n" -" {\n" -" numActiveWgItems[0] = 0;\n" -" breakRequest[0] = 0;\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" atomic_inc(numActiveWgItems);\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" int localBreak = 0;\n" -" int j=i+1;\n" -" do\n" -" {\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j<numObjects)\n" -" {\n" -" if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" -" {\n" -" if (!localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" }\n" -" }\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j>=numObjects && !localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (!localBreak)\n" -" {\n" -" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = aabbs[i].m_minIndices[3];\n" -" myPair.y = aabbs[j].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -" j++;\n" -" } while (breakRequest[0]<numActiveWgItems[0]);\n" -"}\n" -"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int localId = get_local_id(0);\n" -" __local int numActiveWgItems[1];\n" -" __local int breakRequest[1];\n" -" __local btAabbCL localAabbs[128];// = aabbs[i];\n" -" \n" -" btAabbCL myAabb;\n" -" \n" -" myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n" -" float testValue = myAabb.m_maxElems[axis];\n" -" \n" -" if (localId==0)\n" -" {\n" -" numActiveWgItems[0] = 0;\n" -" breakRequest[0] = 0;\n" -" }\n" -" int localCount=0;\n" -" int block=0;\n" -" localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n" -" localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" atomic_inc(numActiveWgItems);\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" int localBreak = 0;\n" -" \n" -" int j=i+1;\n" -" do\n" -" {\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j<numObjects)\n" -" {\n" -" if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n" -" {\n" -" if (!localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" }\n" -" }\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j>=numObjects && !localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (!localBreak)\n" -" {\n" -" if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = myAabb.m_minIndices[3];\n" -" myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" localCount++;\n" -" if (localCount==64)\n" -" {\n" -" localCount = 0;\n" -" block+=64; \n" -" localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n" -" localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n" -" }\n" -" j++;\n" -" \n" -" } while (breakRequest[0]<numActiveWgItems[0]);\n" -" \n" -"}\n" -"//http://stereopsis.com/radix.html\n" -"unsigned int FloatFlip(float fl);\n" -"unsigned int FloatFlip(float fl)\n" -"{\n" -" unsigned int f = *(unsigned int*)&fl;\n" -" unsigned int mask = -(int)(f >> 31) | 0x80000000;\n" -" return f ^ mask;\n" -"}\n" -"float IFloatFlip(unsigned int f);\n" -"float IFloatFlip(unsigned int f)\n" -"{\n" -" unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n" -" unsigned int fl = f ^ mask;\n" -" return *(float*)&fl;\n" -"}\n" -"__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" int src = destAabbs[i].m_maxIndices[3];\n" -" destAabbs[i] = allAabbs[src];\n" -" destAabbs[i].m_maxIndices[3] = src;\n" -"}\n" -"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" \n" -" \n" -" sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n" -" sortData[i].y = i;\n" -" \n" -"}\n" -"__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" \n" -" sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n" -"}\n" -"__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numAabbs)\n" -" return;\n" -" \n" -" btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n" -" \n" -" float4 s;\n" -" s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n" -" sum[i]=s;\n" -" sum2[i]=s*s; \n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/CMakeLists.txt b/thirdparty/bullet/src/Bullet3OpenCL/CMakeLists.txt deleted file mode 100644 index 1da58d4a99..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/CMakeLists.txt +++ /dev/null @@ -1,77 +0,0 @@ -INCLUDE_DIRECTORIES( ${BULLET_PHYSICS_SOURCE_DIR}/src ) - -ADD_DEFINITIONS(-DB3_USE_CLEW) - -SET(Bullet3OpenCL_clew_SRCS - ../clew/clew.c - BroadphaseCollision/b3GpuGridBroadphase.cpp - BroadphaseCollision/b3GpuSapBroadphase.cpp - BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp - BroadphaseCollision/b3GpuParallelLinearBvh.cpp - Initialize/b3OpenCLUtils.cpp - NarrowphaseCollision/b3ContactCache.cpp - NarrowphaseCollision/b3ConvexHullContact.cpp - NarrowphaseCollision/b3GjkEpa.cpp - NarrowphaseCollision/b3OptimizedBvh.cpp - NarrowphaseCollision/b3QuantizedBvh.cpp - NarrowphaseCollision/b3StridingMeshInterface.cpp - NarrowphaseCollision/b3TriangleCallback.cpp - NarrowphaseCollision/b3TriangleIndexVertexArray.cpp - NarrowphaseCollision/b3VoronoiSimplexSolver.cpp - ParallelPrimitives/b3BoundSearchCL.cpp - ParallelPrimitives/b3FillCL.cpp - ParallelPrimitives/b3LauncherCL.cpp - ParallelPrimitives/b3PrefixScanCL.cpp - ParallelPrimitives/b3PrefixScanFloat4CL.cpp - ParallelPrimitives/b3RadixSort32CL.cpp - Raycast/b3GpuRaycast.cpp - RigidBody/b3GpuGenericConstraint.cpp - RigidBody/b3GpuJacobiContactSolver.cpp - RigidBody/b3GpuNarrowPhase.cpp - RigidBody/b3GpuPgsConstraintSolver.cpp - RigidBody/b3GpuPgsContactSolver.cpp - RigidBody/b3GpuRigidBodyPipeline.cpp - RigidBody/b3Solver.cpp -) - - -SET(Bullet3OpenCL_clew_HDRS -# ${Root_HDRS} -) - - -ADD_LIBRARY(Bullet3OpenCL_clew ${Bullet3OpenCL_clew_SRCS} ${Bullet3OpenCL_clew_HDRS}) -SET_TARGET_PROPERTIES(Bullet3OpenCL_clew PROPERTIES VERSION ${BULLET_VERSION}) -SET_TARGET_PROPERTIES(Bullet3OpenCL_clew PROPERTIES SOVERSION ${BULLET_VERSION}) -IF (BUILD_SHARED_LIBS) - TARGET_LINK_LIBRARIES(Bullet3OpenCL_clew LinearMath Bullet3Dynamics ${CMAKE_DL_LIBS}) -ENDIF (BUILD_SHARED_LIBS) - - -IF (INSTALL_LIBS) - IF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) - #INSTALL of other files requires CMake 2.6 - IF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS Bullet3OpenCL_clew DESTINATION .) - ELSE (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - INSTALL(TARGETS Bullet3OpenCL_clew RUNTIME DESTINATION bin - LIBRARY DESTINATION lib${LIB_SUFFIX} - ARCHIVE DESTINATION lib${LIB_SUFFIX}) - INSTALL(DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} -DESTINATION ${INCLUDE_INSTALL_DIR} FILES_MATCHING PATTERN "*.h" PATTERN ".svn" EXCLUDE PATTERN "CMakeFiles" EXCLUDE) -# INSTALL(FILES ../btBullet3OpenCL_clewCommon.h -#DESTINATION ${INCLUDE_INSTALL_DIR}/Bullet3OpenCL_clew) - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} GREATER 2.5) - - IF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - SET_TARGET_PROPERTIES(Bullet3OpenCL_clew PROPERTIES FRAMEWORK true) - - SET_TARGET_PROPERTIES(Bullet3OpenCL_clew PROPERTIES PUBLIC_HEADER "${Root_HDRS}") - # Have to list out sub-directories manually: - SET_PROPERTY(SOURCE ${BroadphaseCollision_HDRS} PROPERTY MACOSX_PACKAGE_LOCATION Headers/BroadphaseCollision) - - ENDIF (APPLE AND BUILD_SHARED_LIBS AND FRAMEWORK) - ENDIF (NOT INTERNAL_CREATE_DISTRIBUTABLE_MSVC_PROJECTFILES) -ENDIF (INSTALL_LIBS) diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLInclude.h b/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLInclude.h deleted file mode 100644 index e79182d7cb..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLInclude.h +++ /dev/null @@ -1,48 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OPENCL_INCLUDE_H -#define B3_OPENCL_INCLUDE_H - -#ifdef B3_USE_CLEW - #include "clew/clew.h" -#else - -#ifdef __APPLE__ -#ifdef USE_MINICL -#include <MiniCL/cl.h> -#else -#include <OpenCL/cl.h> -#include <OpenCL/cl_ext.h> //clLogMessagesToStderrAPPLE -#endif -#else -#ifdef USE_MINICL -#include <MiniCL/cl.h> -#else -#include <CL/cl.h> -#ifdef _WIN32 -#include "CL/cl_gl.h" -#endif //_WIN32 -#endif -#endif //__APPLE__ -#endif //B3_USE_CLEW - -#include <assert.h> -#include <stdio.h> -#define oclCHECKERROR(a, b) if((a)!=(b)) { printf("OCL Error : %d\n", (a)); assert((a) == (b)); } - - -#endif //B3_OPENCL_INCLUDE_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp b/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp deleted file mode 100644 index dd194fc7ba..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp +++ /dev/null @@ -1,1011 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org -Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//Original author: Roman Ponomarev -//Mostly Reimplemented by Erwin Coumans - - -bool gDebugForceLoadingFromSource = false; -bool gDebugSkipLoadingBinary = false; - -#include "Bullet3Common/b3Logging.h" - -#include <string.h> - -#ifdef _WIN32 -#pragma warning (disable:4996) -#endif -#include "b3OpenCLUtils.h" -//#include "b3OpenCLInclude.h" - -#include <stdio.h> -#include <stdlib.h> - -#define B3_MAX_CL_DEVICES 16 //who needs 16 devices? - -#ifdef _WIN32 -#include <windows.h> -#endif - -#include <assert.h> -#define b3Assert assert -#ifndef _WIN32 -#include <sys/stat.h> - -#endif - -static const char* sCachedBinaryPath="cache"; - - -//Set the preferred platform vendor using the OpenCL SDK -static const char* spPlatformVendor = -#if defined(CL_PLATFORM_MINI_CL) -"MiniCL, SCEA"; -#elif defined(CL_PLATFORM_AMD) -"Advanced Micro Devices, Inc."; -#elif defined(CL_PLATFORM_NVIDIA) -"NVIDIA Corporation"; -#elif defined(CL_PLATFORM_INTEL) -"Intel(R) Corporation"; -#elif defined(B3_USE_CLEW) -"clew (OpenCL Extension Wrangler library)"; -#else -"Unknown Vendor"; -#endif - -#ifndef CL_PLATFORM_MINI_CL -#ifdef _WIN32 -#ifndef B3_USE_CLEW -#include "CL/cl_gl.h" -#endif //B3_USE_CLEW -#endif //_WIN32 -#endif - - -void MyFatalBreakAPPLE( const char * errstr , - const void * private_info , - size_t cb , - void * user_data ) -{ - - - const char* patloc = strstr(errstr, "Warning"); - //find out if it is a warning or error, exit if error - - if (patloc) - { - b3Warning("Warning: %s\n", errstr); - } else - { - b3Error("Error: %s\n", errstr); - b3Assert(0); - } - -} - -#ifdef B3_USE_CLEW - -int b3OpenCLUtils_clewInit() -{ - int result = -1; - -#ifdef _WIN32 - const char* cl = "OpenCL.dll"; -#elif defined __APPLE__ - const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL"; -#else//presumable Linux? - //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so - const char* cl = "libOpenCL.so.1"; - result = clewInit(cl); - if (result != CLEW_SUCCESS) - { - cl = "libOpenCL.so"; - } else - { - clewExit(); - } -#endif - result = clewInit(cl); - if (result!=CLEW_SUCCESS) - { - b3Error("clewInit failed with error code %d\n",result); - } - else - { - b3Printf("clewInit succesfull using %s\n",cl); - } - return result; -} -#endif - -int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_platform_id pPlatforms[10] = { 0 }; - - cl_uint numPlatforms = 0; - cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms); - //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - - if(ciErrNum != CL_SUCCESS) - { - if(pErrNum != NULL) - *pErrNum = ciErrNum; - } - return numPlatforms; - -} - -const char* b3OpenCLUtils_getSdkVendorName() -{ - return spPlatformVendor; -} - -void b3OpenCLUtils_setCachePath(const char* path) -{ - sCachedBinaryPath = path; -} - -cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_platform_id platform = 0; - unsigned int platformIndex = (unsigned int )platformIndex0; - cl_uint numPlatforms; - cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - - if (platformIndex<numPlatforms) - { - cl_platform_id* platforms = (cl_platform_id*) malloc (sizeof(cl_platform_id)*numPlatforms); - ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); - if(ciErrNum != CL_SUCCESS) - { - if(pErrNum != NULL) - *pErrNum = ciErrNum; - return platform; - } - - platform = platforms[platformIndex]; - - free (platforms); - } - - return platform; -} - -void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo) -{ - b3Assert(platform); - cl_int ciErrNum; - ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VENDOR,B3_MAX_STRING_LENGTH,platformInfo->m_platformVendor,NULL); - oclCHECKERROR(ciErrNum,CL_SUCCESS); - ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_NAME,B3_MAX_STRING_LENGTH,platformInfo->m_platformName,NULL); - oclCHECKERROR(ciErrNum,CL_SUCCESS); - ciErrNum = clGetPlatformInfo( platform,CL_PLATFORM_VERSION,B3_MAX_STRING_LENGTH,platformInfo->m_platformVersion,NULL); - oclCHECKERROR(ciErrNum,CL_SUCCESS); -} - -void b3OpenCLUtils_printPlatformInfo( cl_platform_id platform) -{ - b3OpenCLPlatformInfo platformInfo; - b3OpenCLUtils::getPlatformInfo (platform, &platformInfo); - b3Printf("Platform info:\n"); - b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n",platformInfo.m_platformVendor); - b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n",platformInfo.m_platformName); - b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n",platformInfo.m_platformVersion); -} - - - -cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex) -{ - cl_context retContext = 0; - cl_int ciErrNum=0; - cl_uint num_entries; - cl_device_id devices[B3_MAX_CL_DEVICES]; - cl_uint num_devices; - cl_context_properties* cprops; - - /* - * If we could find our platform, use it. Otherwise pass a NULL and get whatever the - * implementation thinks we should be using. - */ - cl_context_properties cps[7] = {0,0,0,0,0,0,0}; - cps[0] = CL_CONTEXT_PLATFORM; - cps[1] = (cl_context_properties)platform; -#ifdef _WIN32 -#ifndef B3_USE_CLEW - if (pGLContext && pGLDC) - { - cps[2] = CL_GL_CONTEXT_KHR; - cps[3] = (cl_context_properties)pGLContext; - cps[4] = CL_WGL_HDC_KHR; - cps[5] = (cl_context_properties)pGLDC; - } -#endif //B3_USE_CLEW -#endif //_WIN32 - num_entries = B3_MAX_CL_DEVICES; - - - num_devices=-1; - - ciErrNum = clGetDeviceIDs( - platform, - deviceType, - num_entries, - devices, - &num_devices); - - if (ciErrNum<0) - { - b3Printf("clGetDeviceIDs returned %d\n",ciErrNum); - return 0; - } - cprops = (NULL == platform) ? NULL : cps; - - if (!num_devices) - return 0; - - if (pGLContext) - { - //search for the GPU that relates to the OpenCL context - unsigned int i; - for (i=0;i<num_devices;i++) - { - retContext = clCreateContext(cprops,1,&devices[i],NULL,NULL,&ciErrNum); - if (ciErrNum==CL_SUCCESS) - break; - } - } - else - { - if (preferredDeviceIndex>=0 && (unsigned int)preferredDeviceIndex<num_devices) - { - //create a context of the preferred device index - retContext = clCreateContext(cprops,1,&devices[preferredDeviceIndex],NULL,NULL,&ciErrNum); - } else - { - //create a context of all devices -#if defined (__APPLE__) - retContext = clCreateContext(cprops,num_devices,devices,MyFatalBreakAPPLE,NULL,&ciErrNum); -#else - b3Printf("numDevices=%d\n",num_devices); - - retContext = clCreateContext(cprops,num_devices,devices,NULL,NULL,&ciErrNum); -#endif - } - } - if(pErrNum != NULL) - { - *pErrNum = ciErrNum; - }; - - return retContext; -} - -cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC , int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - - cl_uint numPlatforms; - cl_context retContext = 0; - unsigned int i; - - cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - if(ciErrNum != CL_SUCCESS) - { - if(pErrNum != NULL) *pErrNum = ciErrNum; - return NULL; - } - if(numPlatforms > 0) - { - cl_platform_id* platforms = (cl_platform_id*) malloc (sizeof(cl_platform_id)*numPlatforms); - ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); - if(ciErrNum != CL_SUCCESS) - { - if(pErrNum != NULL) - *pErrNum = ciErrNum; - free(platforms); - return NULL; - } - - - - for ( i = 0; i < numPlatforms; ++i) - { - char pbuf[128]; - ciErrNum = clGetPlatformInfo( platforms[i], - CL_PLATFORM_VENDOR, - sizeof(pbuf), - pbuf, - NULL); - if(ciErrNum != CL_SUCCESS) - { - if(pErrNum != NULL) *pErrNum = ciErrNum; - return NULL; - } - - if (preferredPlatformIndex>=0 && i==preferredPlatformIndex) - { - cl_platform_id tmpPlatform = platforms[0]; - platforms[0] = platforms[i]; - platforms[i] = tmpPlatform; - break; - } else - { - if(!strcmp(pbuf, spPlatformVendor)) - { - cl_platform_id tmpPlatform = platforms[0]; - platforms[0] = platforms[i]; - platforms[i] = tmpPlatform; - } - } - } - - for (i = 0; i < numPlatforms; ++i) - { - cl_platform_id platform = platforms[i]; - assert(platform); - - retContext = b3OpenCLUtils_createContextFromPlatform(platform,deviceType,pErrNum,pGLContext,pGLDC,preferredDeviceIndex,preferredPlatformIndex); - - if (retContext) - { -// printf("OpenCL platform details:\n"); - b3OpenCLPlatformInfo platformInfo; - - b3OpenCLUtils::getPlatformInfo(platform, &platformInfo); - - if (retPlatformId) - *retPlatformId = platform; - - break; - } - } - - free (platforms); - } - return retContext; -} - - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the nth device from the context -//! -//! @return the id or -1 when out of range -//! @param cxMainContext OpenCL context -//! @param device_idx index of the device of interest -////////////////////////////////////////////////////////////////////////////// -cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex) -{ - assert(cxMainContext); - - size_t szParmDataBytes; - cl_device_id* cdDevices; - cl_device_id device ; - - // get the list of devices associated with context - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); - - if( szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex ) { - return (cl_device_id)-1; - } - - cdDevices = (cl_device_id*) malloc(szParmDataBytes); - - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); - - device = cdDevices[deviceIndex]; - free(cdDevices); - - return device; -} - -int b3OpenCLUtils_getNumDevices(cl_context cxMainContext) -{ - size_t szParamDataBytes; - int device_count; - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes); - device_count = (int) szParamDataBytes/ sizeof(cl_device_id); - return device_count; -} - - - -void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info) -{ - // CL_DEVICE_NAME - clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL); - - // CL_DEVICE_VENDOR - clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL); - - // CL_DRIVER_VERSION - clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL); - - // CL_DEVICE_INFO - clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL); - - // CL_DEVICE_MAX_COMPUTE_UNITS - clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL); - - // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL); - - // CL_DEVICE_MAX_WORK_ITEM_SIZES - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL); - - // CL_DEVICE_MAX_WORK_GROUP_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL); - - // CL_DEVICE_MAX_CLOCK_FREQUENCY - clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL); - - // CL_DEVICE_ADDRESS_BITS - clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL); - - // CL_DEVICE_MAX_MEM_ALLOC_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL); - - // CL_DEVICE_GLOBAL_MEM_SIZE - clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL); - - // CL_DEVICE_ERROR_CORRECTION_SUPPORT - clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL); - - // CL_DEVICE_LOCAL_MEM_TYPE - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL); - - // CL_DEVICE_LOCAL_MEM_SIZE - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL); - - // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL); - - // CL_DEVICE_QUEUE_PROPERTIES - clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL); - - // CL_DEVICE_IMAGE_SUPPORT - clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL); - - // CL_DEVICE_MAX_READ_IMAGE_ARGS - clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL); - - // CL_DEVICE_MAX_WRITE_IMAGE_ARGS - clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL); - - // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH - clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL); - - // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines - clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL); - - // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type> - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL); -} - - -void b3OpenCLUtils_printDeviceInfo(cl_device_id device) -{ - b3OpenCLDeviceInfo info; - b3OpenCLUtils::getDeviceInfo(device,&info); - b3Printf("Device Info:\n"); - b3Printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName); - b3Printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor); - b3Printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion); - - if( info.m_deviceType & CL_DEVICE_TYPE_CPU ) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); - if( info.m_deviceType & CL_DEVICE_TYPE_GPU ) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); - if( info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR ) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); - if( info.m_deviceType & CL_DEVICE_TYPE_DEFAULT ) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); - - b3Printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits); - b3Printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims); - b3Printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]); - b3Printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize); - b3Printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency); - b3Printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits); - b3Printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize/ (1024 * 1024))); - b3Printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize/ (1024 * 1024))); - b3Printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport== CL_TRUE ? "yes" : "no"); - b3Printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global"); - b3Printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024)); - b3Printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024)); - if( info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE ) - b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); - if( info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE ) - b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); - - b3Printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport); - - b3Printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs); - b3Printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs); - b3Printf("\n CL_DEVICE_IMAGE <dim>"); - b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth); - b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight); - b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth); - b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight); - b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth); - if (*info.m_deviceExtensions != 0) - { - b3Printf("\n CL_DEVICE_EXTENSIONS:%s\n",info.m_deviceExtensions); - } - else - { - b3Printf(" CL_DEVICE_EXTENSIONS: None\n"); - } - b3Printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t"); - b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n", - info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong,info.m_vecWidthFloat, info.m_vecWidthDouble); - - -} - - -static const char* strip2(const char* name, const char* pattern) -{ - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char * oriptr; - const char * patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; -} - -cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg , const char* clFileNameForCaching, bool disableBinaryCaching) -{ - const char* additionalMacros = additionalMacrosArg?additionalMacrosArg:""; - - if (disableBinaryCaching) - { - //kernelSourceOrg = 0; - } - - cl_program m_cpProgram=0; - cl_int status; - - char binaryFileName[B3_MAX_STRING_LENGTH]; - - char deviceName[256]; - char driverVersion[256]; - const char* strippedName; - int fileUpToDate = 0; -#ifdef _WIN32 - int binaryFileValid=0; -#endif - if (!disableBinaryCaching && clFileNameForCaching) - { - clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL); - clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL); - - strippedName = strip2(clFileNameForCaching,"\\"); - strippedName = strip2(strippedName,"/"); - -#ifdef _MSVC_VER - sprintf_s(binaryFileName,B3_MAX_STRING_LENGTH,"%s/%s.%s.%s.bin",sCachedBinaryPath,strippedName, deviceName,driverVersion ); -#else - sprintf(binaryFileName,"%s/%s.%s.%s.bin",sCachedBinaryPath,strippedName, deviceName,driverVersion ); -#endif - } - if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary||gDebugForceLoadingFromSource) ) - { - -#ifdef _WIN32 - char* bla=0; - - - - //printf("searching for %s\n", binaryFileName); - - - FILETIME modtimeBinary; - CreateDirectoryA(sCachedBinaryPath,0); - { - - HANDLE binaryFileHandle = CreateFileA(binaryFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); - if (binaryFileHandle ==INVALID_HANDLE_VALUE) - { - DWORD errorCode; - errorCode = GetLastError(); - switch (errorCode) - { - case ERROR_FILE_NOT_FOUND: - { - b3Warning("\nCached file not found %s\n", binaryFileName); - break; - } - case ERROR_PATH_NOT_FOUND: - { - b3Warning("\nCached file path not found %s\n", binaryFileName); - break; - } - default: - { - b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode); - } - } - } else - { - if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary)==0) - { - DWORD errorCode; - errorCode = GetLastError(); - b3Warning("\nGetFileTime errorCode = %d\n", errorCode); - } else - { - binaryFileValid = 1; - } - CloseHandle(binaryFileHandle); - } - - if (binaryFileValid) - { - HANDLE srcFileHandle = CreateFileA(clFileNameForCaching,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); - - if (srcFileHandle==INVALID_HANDLE_VALUE) - { - const char* prefix[]={"./","../","../../","../../../","../../../../"}; - for (int i=0;(srcFileHandle==INVALID_HANDLE_VALUE) && i<5;i++) - { - char relativeFileName[1024]; - sprintf(relativeFileName,"%s%s",prefix[i],clFileNameForCaching); - srcFileHandle = CreateFileA(relativeFileName,GENERIC_READ,0,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0); - } - - } - - - if (srcFileHandle!=INVALID_HANDLE_VALUE) - { - FILETIME modtimeSrc; - if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc)==0) - { - DWORD errorCode; - errorCode = GetLastError(); - b3Warning("\nGetFileTime errorCode = %d\n", errorCode); - } - if ( ( modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) - ||(( modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime)&&(modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime))) - { - fileUpToDate=1; - } else - { - b3Warning("\nCached binary file out-of-date (%s)\n",binaryFileName); - } - CloseHandle(srcFileHandle); - } - else - { -#ifdef _DEBUG - DWORD errorCode; - errorCode = GetLastError(); - switch (errorCode) - { - case ERROR_FILE_NOT_FOUND: - { - b3Warning("\nSrc file not found %s\n", clFileNameForCaching); - break; - } - case ERROR_PATH_NOT_FOUND: - { - b3Warning("\nSrc path not found %s\n", clFileNameForCaching); - break; - } - default: - { - b3Warning("\nnSrc file reading errorCode = %d\n", errorCode); - } - } - - //we should make sure the src file exists so we can verify the timestamp with binary -// assert(0); - b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n",clFileNameForCaching, binaryFileName); - fileUpToDate = true; -#else - //if we cannot find the source, assume it is OK in release builds - fileUpToDate = true; -#endif - } - } - - - } - - - -#else - fileUpToDate = true; - if (mkdir(sCachedBinaryPath,0777) == -1) - { - } - else - { - b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath); - } -#endif //_WIN32 - } - - - if( fileUpToDate) - { -#ifdef _MSC_VER - FILE* file; - if (fopen_s(&file,binaryFileName, "rb")!=0) - file=0; -#else - FILE* file = fopen(binaryFileName, "rb"); -#endif - - if (file) - { - size_t binarySize=0; - char* binary =0; - - fseek( file, 0L, SEEK_END ); - binarySize = ftell( file ); - rewind( file ); - binary = (char*)malloc(sizeof(char)*binarySize); - int bytesRead; - bytesRead = fread( binary, sizeof(char), binarySize, file ); - fclose( file ); - - m_cpProgram = clCreateProgramWithBinary( clContext, 1,&device, &binarySize, (const unsigned char**)&binary, 0, &status ); - b3Assert( status == CL_SUCCESS ); - status = clBuildProgram( m_cpProgram, 1, &device, additionalMacros, 0, 0 ); - b3Assert( status == CL_SUCCESS ); - - if( status != CL_SUCCESS ) - { - char *build_log; - size_t ret_val_size; - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - build_log = (char*)malloc(sizeof(char)*(ret_val_size+1)); - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - build_log[ret_val_size] = '\0'; - b3Error("%s\n", build_log); - free (build_log); - b3Assert(0); - m_cpProgram = 0; - - b3Warning("clBuildProgram reported failure on cached binary: %s\n",binaryFileName); - - } else - { - b3Printf("clBuildProgram successfully compiled cached binary: %s\n",binaryFileName); - } - free (binary); - - } else - { - b3Warning("Cannot open cached binary: %s\n",binaryFileName); - } - } - - - - - - - - - - if (!m_cpProgram) - { - - cl_int localErrNum; - char* compileFlags; - int flagsize; - - - - const char* kernelSource = kernelSourceOrg; - - if (!kernelSourceOrg || gDebugForceLoadingFromSource) - { - if (clFileNameForCaching) - { - - FILE* file = fopen(clFileNameForCaching, "rb"); - //in many cases the relative path is a few levels up the directory hierarchy, so try it - if (!file) - { - const char* prefix[]={"../","../../","../../../","../../../../"}; - for (int i=0;!file && i<3;i++) - { - char relativeFileName[1024]; - sprintf(relativeFileName,"%s%s",prefix[i],clFileNameForCaching); - file = fopen(relativeFileName, "rb"); - } - } - - if (file) - { - char* kernelSrc=0; - fseek( file, 0L, SEEK_END ); - int kernelSize = ftell( file ); - rewind( file ); - kernelSrc = (char*)malloc(kernelSize+1); - int readBytes; - readBytes = fread((void*)kernelSrc,1,kernelSize, file); - kernelSrc[kernelSize] = 0; - fclose(file); - kernelSource = kernelSrc; - } - } - } - - size_t program_length = kernelSource ? strlen(kernelSource) : 0; -#ifdef MAC //or __APPLE__? - char* flags = "-cl-mad-enable -DMAC "; -#else - const char* flags = ""; -#endif - - - m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum); - if (localErrNum!= CL_SUCCESS) - { - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - // Build the program with 'mad' Optimization option - - - - flagsize = sizeof(char)*(strlen(additionalMacros) + strlen(flags) + 5); - compileFlags = (char*) malloc(flagsize); -#ifdef _MSC_VER - sprintf_s(compileFlags,flagsize, "%s %s", flags, additionalMacros); -#else - sprintf(compileFlags, "%s %s", flags, additionalMacros); -#endif - localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL); - if (localErrNum!= CL_SUCCESS) - { - char *build_log; - size_t ret_val_size; - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - build_log = (char*) malloc(sizeof(char)*(ret_val_size+1)); - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - - // to be carefully, terminate with \0 - // there's no information in the reference whether the string is 0 terminated or not - build_log[ret_val_size] = '\0'; - - - b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log); - free (build_log); - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - - if( !disableBinaryCaching && clFileNameForCaching ) - { // write to binary - - cl_uint numAssociatedDevices; - status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0 ); - b3Assert( status == CL_SUCCESS ); - if (numAssociatedDevices==1) - { - - size_t binarySize; - char* binary ; - - status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0 ); - b3Assert( status == CL_SUCCESS ); - - binary = (char*)malloc(sizeof(char)*binarySize); - - status = clGetProgramInfo( m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0 ); - b3Assert( status == CL_SUCCESS ); - - { - FILE* file=0; -#ifdef _MSC_VER - if (fopen_s(&file,binaryFileName, "wb")!=0) - file=0; -#else - file = fopen(binaryFileName, "wb"); -#endif - if (file) - { - fwrite( binary, sizeof(char), binarySize, file ); - fclose( file ); - } else - { - b3Warning("cannot write file %s\n", binaryFileName); - } - } - - free (binary); - } - } - - free(compileFlags); - - } - return m_cpProgram; -} - - -cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros ) -{ - - cl_kernel kernel; - cl_int localErrNum; - - cl_program m_cpProgram = prog; - - b3Printf("compiling kernel %s ",kernelName); - - if (!m_cpProgram) - { - m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext,device,kernelSource,pErrNum, additionalMacros,0, false); - } - - - // Create the kernel - kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum); - if (localErrNum != CL_SUCCESS) - { - b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName); - assert(0); - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - if (!prog && m_cpProgram) - { - clReleaseProgram(m_cpProgram); - } - b3Printf("ready. \n"); - - - if (pErrNum) - *pErrNum = CL_SUCCESS; - return kernel; - -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h b/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h deleted file mode 100644 index db6466e76b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Initialize/b3OpenCLUtils.h +++ /dev/null @@ -1,194 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org -Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//original author: Roman Ponomarev -//cleanup by Erwin Coumans - -#ifndef B3_OPENCL_UTILS_H -#define B3_OPENCL_UTILS_H - -#include "b3OpenCLInclude.h" - -#ifdef __cplusplus -extern "C" { -#endif - - -///C API for OpenCL utilities: convenience functions, see below for C++ API - -/// CL Context optionally takes a GL context. This is a generic type because we don't really want this code -/// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. -cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC , int preferredDeviceIndex , int preferredPlatformIndex, cl_platform_id* platformId); - -int b3OpenCLUtils_getNumDevices(cl_context cxMainContext); - -cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int nr); - -void b3OpenCLUtils_printDeviceInfo(cl_device_id device); - -cl_kernel b3OpenCLUtils_compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog,const char* additionalMacros); - -//optional -cl_program b3OpenCLUtils_compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum,const char* additionalMacros , const char* srcFileNameForCaching, bool disableBinaryCaching); - -//the following optional APIs provide access using specific platform information -int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum); - -///get the nr'th platform, where nr is in the range [0..getNumPlatforms) -cl_platform_id b3OpenCLUtils_getPlatform(int nr, cl_int* pErrNum); - - -void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform); - -const char* b3OpenCLUtils_getSdkVendorName(); - -///set the path (directory/folder) where the compiled OpenCL kernel are stored -void b3OpenCLUtils_setCachePath(const char* path); - -cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx , void* pGLDC ,int preferredDeviceIndex , int preferredPlatformIndex); - -#ifdef __cplusplus -} - -#define B3_MAX_STRING_LENGTH 1024 - -typedef struct -{ - char m_deviceName[B3_MAX_STRING_LENGTH]; - char m_deviceVendor[B3_MAX_STRING_LENGTH]; - char m_driverVersion[B3_MAX_STRING_LENGTH]; - char m_deviceExtensions[B3_MAX_STRING_LENGTH]; - - cl_device_type m_deviceType; - cl_uint m_computeUnits; - size_t m_workitemDims; - size_t m_workItemSize[3]; - size_t m_image2dMaxWidth; - size_t m_image2dMaxHeight; - size_t m_image3dMaxWidth; - size_t m_image3dMaxHeight; - size_t m_image3dMaxDepth; - size_t m_workgroupSize; - cl_uint m_clockFrequency; - cl_ulong m_constantBufferSize; - cl_ulong m_localMemSize; - cl_ulong m_globalMemSize; - cl_bool m_errorCorrectionSupport; - cl_device_local_mem_type m_localMemType; - cl_uint m_maxReadImageArgs; - cl_uint m_maxWriteImageArgs; - - - - cl_uint m_addressBits; - cl_ulong m_maxMemAllocSize; - cl_command_queue_properties m_queueProperties; - cl_bool m_imageSupport; - cl_uint m_vecWidthChar; - cl_uint m_vecWidthShort; - cl_uint m_vecWidthInt; - cl_uint m_vecWidthLong; - cl_uint m_vecWidthFloat; - cl_uint m_vecWidthDouble; - -} b3OpenCLDeviceInfo; - -struct b3OpenCLPlatformInfo -{ - char m_platformVendor[B3_MAX_STRING_LENGTH]; - char m_platformName[B3_MAX_STRING_LENGTH]; - char m_platformVersion[B3_MAX_STRING_LENGTH]; - - b3OpenCLPlatformInfo() - { - m_platformVendor[0]=0; - m_platformName[0]=0; - m_platformVersion[0]=0; - } -}; - - -///C++ API for OpenCL utilities: convenience functions -struct b3OpenCLUtils -{ - /// CL Context optionally takes a GL context. This is a generic type because we don't really want this code - /// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. - static inline cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex= - 1, cl_platform_id* platformId=0) - { - return b3OpenCLUtils_createContextFromType(deviceType, pErrNum, pGLCtx , pGLDC , preferredDeviceIndex, preferredPlatformIndex, platformId); - } - - static inline int getNumDevices(cl_context cxMainContext) - { - return b3OpenCLUtils_getNumDevices(cxMainContext); - } - static inline cl_device_id getDevice(cl_context cxMainContext, int nr) - { - return b3OpenCLUtils_getDevice(cxMainContext,nr); - } - - static void getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info); - - static inline void printDeviceInfo(cl_device_id device) - { - b3OpenCLUtils_printDeviceInfo(device); - } - - static inline cl_kernel compileCLKernelFromString( cl_context clContext,cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum=0, cl_program prog=0,const char* additionalMacros = "" ) - { - return b3OpenCLUtils_compileCLKernelFromString(clContext,device, kernelSource, kernelName, pErrNum, prog,additionalMacros); - } - - //optional - static inline cl_program compileCLProgramFromString( cl_context clContext,cl_device_id device, const char* kernelSource, cl_int* pErrNum=0,const char* additionalMacros = "" , const char* srcFileNameForCaching=0, bool disableBinaryCaching=false) - { - return b3OpenCLUtils_compileCLProgramFromString(clContext,device, kernelSource, pErrNum,additionalMacros, srcFileNameForCaching, disableBinaryCaching); - } - - //the following optional APIs provide access using specific platform information - static inline int getNumPlatforms(cl_int* pErrNum=0) - { - return b3OpenCLUtils_getNumPlatforms(pErrNum); - } - ///get the nr'th platform, where nr is in the range [0..getNumPlatforms) - static inline cl_platform_id getPlatform(int nr, cl_int* pErrNum=0) - { - return b3OpenCLUtils_getPlatform(nr,pErrNum); - } - - static void getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo); - - static inline void printPlatformInfo(cl_platform_id platform) - { - b3OpenCLUtils_printPlatformInfo(platform); - } - - static inline const char* getSdkVendorName() - { - return b3OpenCLUtils_getSdkVendorName(); - } - static inline cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0,int preferredDeviceIndex = -1, int preferredPlatformIndex= -1) - { - return b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLCtx,pGLDC,preferredDeviceIndex, preferredPlatformIndex); - } - static void setCachePath(const char* path) - { - b3OpenCLUtils_setCachePath(path); - } -}; - -#endif //__cplusplus - -#endif // B3_OPENCL_UTILS_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h deleted file mode 100644 index 872f039506..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef B3_BVH_INFO_H -#define B3_BVH_INFO_H - -#include "Bullet3Common/b3Vector3.h" - -struct b3BvhInfo -{ - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - b3Vector3 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -}; - -#endif //B3_BVH_INFO_H
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp deleted file mode 100644 index cb30ee939b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp +++ /dev/null @@ -1,258 +0,0 @@ - -#if 0 -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#include "b3ContactCache.h" -#include "Bullet3Common/b3Transform.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -b3Scalar gContactBreakingThreshold = b3Scalar(0.02); - -///gContactCalcArea3Points will approximate the convex hull area using 3 points -///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower -bool gContactCalcArea3Points = true; - - - - -static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3) -{ - // It calculates possible 3 area constructed from random 4 points and returns the biggest one. - - b3Vector3 a[3],b[3]; - a[0] = p0 - p1; - a[1] = p0 - p2; - a[2] = p0 - p3; - b[0] = p2 - p3; - b[1] = p1 - p3; - b[2] = p1 - p2; - - //todo: Following 3 cross production can be easily optimized by SIMD. - b3Vector3 tmp0 = a[0].cross(b[0]); - b3Vector3 tmp1 = a[1].cross(b[1]); - b3Vector3 tmp2 = a[2].cross(b[2]); - - return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2()); -} -#if 0 - -//using localPointA for all points -int b3ContactCache::sortCachedPoints(const b3Vector3& pt) -{ - //calculate 4 possible cases areas, and take biggest area - //also need to keep 'deepest' - - int maxPenetrationIndex = -1; -#define KEEP_DEEPEST_POINT 1 -#ifdef KEEP_DEEPEST_POINT - b3Scalar maxPenetration = pt.getDistance(); - for (int i=0;i<4;i++) - { - if (m_pointCache[i].getDistance() < maxPenetration) - { - maxPenetrationIndex = i; - maxPenetration = m_pointCache[i].getDistance(); - } - } -#endif //KEEP_DEEPEST_POINT - - b3Scalar res0(b3Scalar(0.)),res1(b3Scalar(0.)),res2(b3Scalar(0.)),res3(b3Scalar(0.)); - - if (gContactCalcArea3Points) - { - if (maxPenetrationIndex != 0) - { - b3Vector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - b3Vector3 cross = a0.cross(b0); - res0 = cross.length2(); - } - if (maxPenetrationIndex != 1) - { - b3Vector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - b3Vector3 cross = a1.cross(b1); - res1 = cross.length2(); - } - - if (maxPenetrationIndex != 2) - { - b3Vector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 cross = a2.cross(b2); - res2 = cross.length2(); - } - - if (maxPenetrationIndex != 3) - { - b3Vector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 cross = a3.cross(b3); - res3 = cross.length2(); - } - } - else - { - if(maxPenetrationIndex != 0) { - res0 = calcArea4Points(pt.m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 1) { - res1 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 2) { - res2 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 3) { - res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA); - } - } - b3Vector4 maxvec(res0,res1,res2,res3); - int biggestarea = maxvec.closestAxis4(); - return biggestarea; - -} - - -int b3ContactCache::getCacheEntry(const b3Vector3& newPoint) const -{ - b3Scalar shortestDist = getContactBreakingThreshold() * getContactBreakingThreshold(); - int size = getNumContacts(); - int nearestPoint = -1; - for( int i = 0; i < size; i++ ) - { - const b3Vector3 &mp = m_pointCache[i]; - - b3Vector3 diffA = mp.m_localPointA- newPoint.m_localPointA; - const b3Scalar distToManiPoint = diffA.dot(diffA); - if( distToManiPoint < shortestDist ) - { - shortestDist = distToManiPoint; - nearestPoint = i; - } - } - return nearestPoint; -} - -int b3ContactCache::addManifoldPoint(const b3Vector3& newPoint) -{ - b3Assert(validContactDistance(newPoint)); - - int insertIndex = getNumContacts(); - if (insertIndex == MANIFOLD_CACHE_SIZE) - { -#if MANIFOLD_CACHE_SIZE >= 4 - //sort cache so best points come first, based on area - insertIndex = sortCachedPoints(newPoint); -#else - insertIndex = 0; -#endif - clearUserCache(m_pointCache[insertIndex]); - - } else - { - m_cachedPoints++; - - - } - if (insertIndex<0) - insertIndex=0; - - //b3Assert(m_pointCache[insertIndex].m_userPersistentData==0); - m_pointCache[insertIndex] = newPoint; - return insertIndex; -} - -#endif - -bool b3ContactCache::validContactDistance(const b3Vector3& pt) -{ - return pt.w <= gContactBreakingThreshold; -} - -void b3ContactCache::removeContactPoint(struct b3Contact4Data& newContactCache,int i) -{ - int numContacts = b3Contact4Data_getNumPoints(&newContactCache); - if (i!=(numContacts-1)) - { - b3Swap(newContactCache.m_localPosA[i],newContactCache.m_localPosA[numContacts-1]); - b3Swap(newContactCache.m_localPosB[i],newContactCache.m_localPosB[numContacts-1]); - b3Swap(newContactCache.m_worldPosB[i],newContactCache.m_worldPosB[numContacts-1]); - } - b3Contact4Data_setNumPoints(&newContactCache,numContacts-1); - -} - - -void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& contacts) -{ - - int numContacts = b3Contact4Data_getNumPoints(&contacts); - - - int i; - /// first refresh worldspace positions and distance - for (i=numContacts-1;i>=0;i--) - { - b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); - b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); - contacts.m_worldPosB[i] = worldPosB; - float distance = (worldPosA - worldPosB).dot(contacts.m_worldNormalOnB); - contacts.m_worldPosB[i].w = distance; - } - - /// then - b3Scalar distance2d; - b3Vector3 projectedDifference,projectedPoint; - for (i=numContacts-1;i>=0;i--) - { - b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); - b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); - b3Vector3&pt = contacts.m_worldPosB[i]; - //contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction) - if (!validContactDistance(pt)) - { - removeContactPoint(contacts,i); - } else - { - //contact also becomes invalid when relative movement orthogonal to normal exceeds margin - projectedPoint = worldPosA - contacts.m_worldNormalOnB * contacts.m_worldPosB[i].w; - projectedDifference = contacts.m_worldPosB[i] - projectedPoint; - distance2d = projectedDifference.dot(projectedDifference); - if (distance2d > gContactBreakingThreshold*gContactBreakingThreshold ) - { - removeContactPoint(contacts,i); - } else - { - ////contact point processed callback - //if (gContactProcessedCallback) - // (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1); - } - } - } - - -} - - - - - -#endif diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h deleted file mode 100644 index d6c9b0a07e..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h +++ /dev/null @@ -1,80 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_CONTACT_CACHE_H -#define B3_CONTACT_CACHE_H - - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3AlignedAllocator.h" - - -///maximum contact breaking and merging threshold -extern b3Scalar gContactBreakingThreshold; - - - -#define MANIFOLD_CACHE_SIZE 4 - -///b3ContactCache is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase. -///Those contact points are created by the collision narrow phase. -///The cache can be empty, or hold 1,2,3 or 4 points. Some collision algorithms (GJK) might only add one point at a time. -///updates/refreshes old contact points, and throw them away if necessary (distance becomes too large) -///reduces the cache to 4 points, when more then 4 points are added, using following rules: -///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points -///note that some pairs of objects might have more then one contact manifold. -B3_ATTRIBUTE_ALIGNED16( class) b3ContactCache -{ - - - - - /// sort cached points so most isolated points come first - int sortCachedPoints(const b3Vector3& pt); - - - -public: - - B3_DECLARE_ALIGNED_ALLOCATOR(); - - - - int addManifoldPoint( const b3Vector3& newPoint); - - /*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex) - { - b3Assert(validContactDistance(newPoint)); - m_pointCache[insertIndex] = newPoint; - } - */ - - - - static bool validContactDistance(const b3Vector3& pt); - - /// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin - static void refreshContactPoints( const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& newContactCache); - - static void removeContactPoint(struct b3Contact4Data& newContactCache,int i); - - -}; - - - -#endif //B3_CONTACT_CACHE_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp deleted file mode 100644 index fb435aa7fd..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ /dev/null @@ -1,4733 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -bool findSeparatingAxisOnGpu = true; -bool splitSearchSepAxisConcave = false; -bool splitSearchSepAxisConvex = true; -bool useMprGpu = true;//use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... -bool bvhTraversalKernelGPU = true; -bool findConcaveSeparatingAxisKernelGPU = true; -bool clipConcaveFacesAndFindContactsCPU = false;//false;//true; -bool clipConvexFacesAndFindContactsCPU = false;//false;//true; -bool reduceConcaveContactsOnGPU = true;//false; -bool reduceConvexContactsOnGPU = true;//false; -bool findConvexClippingFacesGPU = true; -bool useGjk = false;///option for CPU/host testing, when findSeparatingAxisOnGpu = false -bool useGjkContacts = false;//////option for CPU/host testing when findSeparatingAxisOnGpu = false - - -static int myframecount=0;///for testing - -///This file was written by Erwin Coumans -///Separating axis rest based on work from Pierre Terdiman, see -///And contact clipping based on work from Simon Hobbs - -//#define B3_DEBUG_SAT_FACE - -//#define CHECK_ON_HOST - -#ifdef CHECK_ON_HOST -//#define PERSISTENT_CONTACTS_HOST -#endif - -int b3g_actualSATPairTests=0; - -#include "b3ConvexHullContact.h" -#include <string.h>//memcpy -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" - -#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; - - -#include <float.h> //for FLT_MAX -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -//#include "AdlQuaternion.h" - -#include "kernels/satKernels.h" -#include "kernels/mprKernels.h" - -#include "kernels/satConcaveKernels.h" - -#include "kernels/satClipHullContacts.h" -#include "kernels/bvhTraversal.h" -#include "kernels/primitiveContacts.h" - - -#include "Bullet3Geometry/b3AabbUtil.h" - -#define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" -#define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl" - -#define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" - - -#define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" -#define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" -#define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" - - -#ifndef __global -#define __global -#endif - -#ifndef __kernel -#define __kernel -#endif - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" - - - -#define dot3F4 b3Dot - -GpuSatCollision::GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ) -:m_context(ctx), -m_device(device), -m_queue(q), - -m_findSeparatingAxisKernel(0), -m_findSeparatingAxisVertexFaceKernel(0), -m_findSeparatingAxisEdgeEdgeKernel(0), -m_unitSphereDirections(m_context,m_queue), - -m_totalContactsOut(m_context, m_queue), -m_sepNormals(m_context, m_queue), -m_dmins(m_context,m_queue), - -m_hasSeparatingNormals(m_context, m_queue), -m_concaveSepNormals(m_context, m_queue), -m_concaveHasSeparatingNormals(m_context,m_queue), -m_numConcavePairsOut(m_context, m_queue), - - -m_gpuCompoundPairs(m_context, m_queue), - - -m_gpuCompoundSepNormals(m_context, m_queue), -m_gpuHasCompoundSepNormals(m_context, m_queue), - -m_numCompoundPairsOut(m_context, m_queue) -{ - m_totalContactsOut.push_back(0); - - cl_int errNum=0; - - if (1) - { - const char* mprSrc = mprKernelsCL; - - const char* srcConcave = satConcaveKernelsCL; - char flags[1024]={0}; -//#ifdef CL_PLATFORM_INTEL -// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); -//#endif - m_mprPenetrationKernel = 0; - m_findSeparatingAxisUnitSphereKernel = 0; - - if (useMprGpu) - { - cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,mprSrc,&errNum,flags,BT_NARROWPHASE_MPR_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "mprPenetrationKernel",&errNum,mprProg ); - b3Assert(m_mprPenetrationKernel); - b3Assert(errNum==CL_SUCCESS); - - m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,mprSrc, "findSeparatingAxisUnitSphereKernel",&errNum,mprProg ); - b3Assert(m_findSeparatingAxisUnitSphereKernel); - b3Assert(errNum==CL_SUCCESS); - - - int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); - m_unitSphereDirections.resize(numDirections); - m_unitSphereDirections.copyFromHostPointer(unitSphere162,numDirections,0,true); - - - } - - - cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,satKernelsCL,&errNum,flags,BT_NARROWPHASE_SAT_PATH); - b3Assert(errNum==CL_SUCCESS); - - cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcConcave,&errNum,flags,BT_NARROWPHASE_SAT_CONCAVE_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisKernel",&errNum,satProg ); - b3Assert(m_findSeparatingAxisKernel); - b3Assert(errNum==CL_SUCCESS); - - - m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisVertexFaceKernel",&errNum,satProg ); - b3Assert(m_findSeparatingAxisVertexFaceKernel); - - m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findSeparatingAxisEdgeEdgeKernel",&errNum,satProg ); - b3Assert(m_findSeparatingAxisVertexFaceKernel); - - - m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findConcaveSeparatingAxisKernel",&errNum,satProg ); - b3Assert(m_findConcaveSeparatingAxisKernel); - b3Assert(errNum==CL_SUCCESS); - - m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisVertexFaceKernel",&errNum,satConcaveProg ); - b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); - b3Assert(errNum==CL_SUCCESS); - - m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel",&errNum,satConcaveProg ); - b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); - b3Assert(errNum==CL_SUCCESS); - - - - - m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "findCompoundPairsKernel",&errNum,satProg ); - b3Assert(m_findCompoundPairsKernel); - b3Assert(errNum==CL_SUCCESS); - m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,satKernelsCL, "processCompoundPairsKernel",&errNum,satProg ); - b3Assert(m_processCompoundPairsKernel); - b3Assert(errNum==CL_SUCCESS); - } - - if (1) - { - const char* srcClip = satClipKernelsCL; - - char flags[1024]={0}; -//#ifdef CL_PLATFORM_INTEL -// sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); -//#endif - - cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcClip,&errNum,flags,BT_NARROWPHASE_CLIPHULL_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - - m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipCompoundsHullHullKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - - - m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "findClippingFacesKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - - m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipFacesAndFindContactsKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - - m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "clipHullHullConcaveConvexKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - -// m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); - // b3Assert(errNum==CL_SUCCESS); - - m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, - "newContactReductionKernel",&errNum,satClipContactsProg); - b3Assert(errNum==CL_SUCCESS); - } - else - { - m_clipHullHullKernel=0; - m_clipCompoundsHullHullKernel = 0; - m_findClippingFacesKernel = 0; - m_newContactReductionKernel=0; - m_clipFacesAndFindContacts = 0; - m_clipHullHullConcaveConvexKernel = 0; -// m_extractManifoldAndAddContactKernel = 0; - } - - if (1) - { - const char* srcBvh = bvhTraversalKernelCL; - cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,srcBvh,&errNum,"",BT_NARROWPHASE_BVH_TRAVERSAL_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcBvh, "bvhTraversalKernel",&errNum,bvhTraversalProg,""); - b3Assert(errNum==CL_SUCCESS); - - } - - { - const char* primitiveContactsSrc = primitiveContactsKernelsCL; - cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,primitiveContactsSrc,&errNum,"",BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); - b3Assert(errNum==CL_SUCCESS); - - m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "primitiveContactsKernel",&errNum,primitiveContactsProg,""); - b3Assert(errNum==CL_SUCCESS); - - m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "findConcaveSphereContactsKernel",&errNum,primitiveContactsProg ); - b3Assert(errNum==CL_SUCCESS); - b3Assert(m_findConcaveSphereContactsKernel); - - m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,primitiveContactsSrc, "processCompoundPairsPrimitivesKernel",&errNum,primitiveContactsProg,""); - b3Assert(errNum==CL_SUCCESS); - b3Assert(m_processCompoundPairsPrimitivesKernel); - - } - - -} - -GpuSatCollision::~GpuSatCollision() -{ - - if (m_findSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); - - if (m_findSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); - - if (m_findSeparatingAxisUnitSphereKernel) - clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); - - if (m_mprPenetrationKernel) - clReleaseKernel(m_mprPenetrationKernel); - - - if (m_findSeparatingAxisKernel) - clReleaseKernel(m_findSeparatingAxisKernel); - - if (m_findConcaveSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); - - - if (m_findConcaveSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); - - if (m_findConcaveSeparatingAxisKernel) - clReleaseKernel(m_findConcaveSeparatingAxisKernel); - - if (m_findCompoundPairsKernel) - clReleaseKernel(m_findCompoundPairsKernel); - - if (m_processCompoundPairsKernel) - clReleaseKernel(m_processCompoundPairsKernel); - - if (m_findClippingFacesKernel) - clReleaseKernel(m_findClippingFacesKernel); - - if (m_clipFacesAndFindContacts) - clReleaseKernel(m_clipFacesAndFindContacts); - if (m_newContactReductionKernel) - clReleaseKernel(m_newContactReductionKernel); - if (m_primitiveContactsKernel) - clReleaseKernel(m_primitiveContactsKernel); - - if (m_findConcaveSphereContactsKernel) - clReleaseKernel(m_findConcaveSphereContactsKernel); - - if (m_processCompoundPairsPrimitivesKernel) - clReleaseKernel(m_processCompoundPairsPrimitivesKernel); - - if (m_clipHullHullKernel) - clReleaseKernel(m_clipHullHullKernel); - if (m_clipCompoundsHullHullKernel) - clReleaseKernel(m_clipCompoundsHullHullKernel); - - if (m_clipHullHullConcaveConvexKernel) - clReleaseKernel(m_clipHullHullConcaveConvexKernel); -// if (m_extractManifoldAndAddContactKernel) - // clReleaseKernel(m_extractManifoldAndAddContactKernel); - - if (m_bvhTraversalKernel) - clReleaseKernel(m_bvhTraversalKernel); - -} - -struct MyTriangleCallback : public b3NodeOverlapCallback -{ - int m_bodyIndexA; - int m_bodyIndexB; - - virtual void processNode(int subPart, int triangleIndex) - { - printf("bodyIndexA %d, bodyIndexB %d\n",m_bodyIndexA,m_bodyIndexB); - printf("triangleIndex %d\n", triangleIndex); - } -}; - - -#define float4 b3Vector3 -#define make_float4(x,y,z,w) b3MakeVector3(x,y,z,w) - -float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) -{ - float4 n = planeEqn; - n[3] = 0.f; - float dist = dot3F4(n, point) + planeEqn[3]; - *closestPointOnFace = point - dist * n; - return dist; -} - - - -#define cross3(a,b) (a.cross(b)) -b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) -{ - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(*pos); - tr.setRotation(*orn); - b3Vector3 res = tr(*v); - return res; -} - - -inline bool IsPointInPolygon(const float4& p, - const b3GpuFace* face, - const float4* baseVertex, - const int* convexIndices, - float4* out) -{ - float4 a; - float4 b; - float4 ab; - float4 ap; - float4 v; - - float4 plane = b3MakeVector3(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); - - if (face->m_numIndices<2) - return false; - - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; - b = v0; - - for(unsigned i=0; i != face->m_numIndices; ++i) - { - a = b; - float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; - b = vi; - ab = b-a; - ap = p-a; - v = cross3(ab,plane); - - if (b3Dot(ap, v) > 0.f) - { - float ab_m2 = b3Dot(ab, ab); - float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; - out[0].x = s * a.x + rt * b.x; - out[0].y = s * a.y + rt * b.y; - out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; -} - -#define normalize3(a) (a.normalize()) - - -int extractManifoldSequentialGlobal( const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = b3MakeVector3(0,0,0,0); - { - - for (int i=0;i<nPoints;i++) - center += p[i]; - center /= (float)nPoints; - } - - - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} - - - -#define MAX_VERTS 1024 - - -inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max) -{ - min = FLT_MAX; - max = -FLT_MAX; - int numVerts = hull.m_numVertices; - - const float4 localDir = b3QuatRotate(orn.inverse(),dir); - - b3Scalar offset = dot3F4(pos,dir); - - for(int i=0;i<numVerts;i++) - { - //b3Vector3 pt = trans * vertices[m_vertexOffset+i]; - //b3Scalar dp = pt.dot(dir); - //b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; - b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset+i],localDir); - //b3Assert(dp==dpL); - if(dp < min) min = dp; - if(dp > max) max = dp; - } - if(min>max) - { - b3Scalar tmp = min; - min = max; - max = tmp; - } - min += offset; - max += offset; -} - - -static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA,const b3Quaternion& ornA, - const float4& posB,const b3Quaternion& ornB, - const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB,b3Scalar& depth) -{ - b3Scalar Min0,Max0; - b3Scalar Min1,Max1; - project(hullA,posA,ornA,sep_axis,verticesA, Min0, Max0); - project(hullB,posB,ornB, sep_axis,verticesB, Min1, Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - b3Scalar d0 = Max0 - Min1; - assert(d0>=0.0f); - b3Scalar d1 = Max1 - Min0; - assert(d1>=0.0f); - depth = d0<d1 ? d0:d1; - return true; -} - -inline bool IsAlmostZero(const b3Vector3& v) -{ - if(fabsf(v.x)>1e-6 || fabsf(v.y)>1e-6 || fabsf(v.z)>1e-6) return false; - return true; -} - - -static bool findSeparatingAxis( const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA1, - const b3Quaternion& ornA, - const float4& posB1, - const b3Quaternion& ornB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, - const b3AlignedObjectArray<b3GpuFace>& facesA, - const b3AlignedObjectArray<int>& indicesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, - const b3AlignedObjectArray<b3GpuFace>& facesB, - const b3AlignedObjectArray<int>& indicesB, - - b3Vector3& sep) -{ - B3_PROFILE("findSeparatingAxis"); - - b3g_actualSATPairTests++; - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; -//#ifdef TEST_INTERNAL_OBJECTS - float4 c0local = (float4&)hullA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = (float4&)hullB.m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 deltaC2 = c0 - c1; -//#endif - - b3Scalar dmin = FLT_MAX; - int curPlaneTests=0; - - int numFacesA = hullA.m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4& normal = (float4&)facesA[hullA.m_faceOffset+i].m_plane; - float4 faceANormalWS = b3QuatRotate(ornA,normal); - - if (dot3F4(deltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if(gUseInternalObject && !TestInternalObjects(transA,transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - - b3Scalar d; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,faceANormalWS, verticesA, verticesB,d)) - return false; - - if(d<dmin) - { - dmin = d; - sep = (b3Vector3&)faceANormalWS; - } - } - - int numFacesB = hullB.m_numFaces; - // Test normals from hullB - for(int i=0;i<numFacesB;i++) - { - float4 normal = (float4&)facesB[hullB.m_faceOffset+i].m_plane; - float4 WorldNormal = b3QuatRotate(ornB, normal); - - if (dot3F4(deltaC2,WorldNormal)<0) - { - WorldNormal*=-1.f; - } - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, WorldNormal, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if(!TestSepAxis(hullA, hullB,posA,ornA,posB,ornB,WorldNormal,verticesA,verticesB,d)) - return false; - - if(d<dmin) - { - dmin = d; - sep = (b3Vector3&)WorldNormal; - } - } - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA.m_numUniqueEdges;e0++) - { - const float4& edge0 = (float4&) uniqueEdgesA[hullA.m_uniqueEdgesOffset+e0]; - float4 edge0World = b3QuatRotate(ornA,(float4&)edge0); - - for(int e1=0;e1<hullB.m_numUniqueEdges;e1++) - { - const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset+e1]; - float4 edge1World = b3QuatRotate(ornB,(float4&)edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero((b3Vector3&)crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(deltaC2,crossje)<0) - crossje*=-1.f; - - -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if(gUseInternalObject && !TestInternalObjects(transA,transB,DeltaC2, Cross, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar dist; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,crossje, verticesA,verticesB,dist)) - return false; - - if(dist<dmin) - { - dmin = dist; - sep = (b3Vector3&)crossje; - } - } - } - - } - - - if((dot3F4(-deltaC2,(float4&)sep))>0.0f) - sep = -sep; - - return true; -} - - -bool findSeparatingAxisEdgeEdge( __global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - const b3Float4& posA1, - const b3Quat& ornA, - const b3Float4& posB1, - const b3Quat& ornB, - const b3Float4& DeltaC2, - __global const b3AlignedObjectArray<float4>& vertices, - __global const b3AlignedObjectArray<float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - float4* sep, - float* dmin) -{ -// int i = get_global_id(0); - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - //int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = b3QuatRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = b3QuatRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje*=-1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - project(*hullA,posA,ornA,crossje,vertices, Min0, Max0); - project(*hullB,posB,ornB,crossje,vertices, Min1, Max1); - - if(Max0<Min1 || Max1<Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -__inline float4 lerp3(const float4& a,const float4& b, float t) -{ - return b3MakeVector3( a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS,float planeEqWS, float4* ppVtxOut) -{ - - int ve; - float ds, de; - int numVertsOut = 0; - if (numVertsIn < 2) - return 0; - - float4 firstVertex=pVtxIn[numVertsIn-1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex=pVtxIn[ve]; - - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; - - if (ds<0) - { - if (de<0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - -int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, - const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, - //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = b3MakeVector3( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = b3QuatRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace polyA = facesA[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face -// int numContacts = numWorldVertsB1; - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; - const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = b3QuatRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = b3QuatRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = b3QuatRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;i<numVertsIn;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - if (numContactsOut<contactCapacity) - { - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - //printf("depth=%f\n",depth); - } - } else - { - b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut,contactCapacity); - } - } - } - - return numContactsOut; -} - - - -static int clipHullAgainstHull(const float4& separatingNormal, - const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA, const b3Quaternion& ornA,const float4& posB, const b3Quaternion& ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, - const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB, - - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - B3_PROFILE("clipHullAgainstHull"); - -// float curMaxDist=maxDist; - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - //B3_PROFILE("closestFaceB"); - if (hullB.m_numFaces!=1) - { - //printf("wtf\n"); - } - static bool once = true; - //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); - - for(int face=0;face<hullB.m_numFaces;face++) - { -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("face %d\n",face); - const b3GpuFace* faceB = &facesB[hullB.m_faceOffset+face]; - if (once) - { - for (int i=0;i<faceB->m_numIndices;i++) - { - float4 vert = verticesB[hullB.m_vertexOffset+indicesB[faceB->m_indexOffset+i]]; - printf("vert[%d] = %f,%f,%f\n",i,vert.x,vert.y,vert.z); - } - } -#endif //BT_DEBUG_SAT_FACE - //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) - { - const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset+face].m_plane.x, - facesB[hullB.m_faceOffset+face].m_plane.y, facesB[hullB.m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = b3QuatRotate(ornB, Normal); -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("faceNormal = %f,%f,%f\n",Normal.x,Normal.y,Normal.z); -#endif - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - once = false; - } - - - b3Assert(closestFaceB>=0); - { - //B3_PROFILE("worldVertsB1"); - const b3GpuFace& polyB = facesB[hullB.m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4& b = verticesB[hullB.m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - //B3_PROFILE("clipFaceAgainstHull"); - numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, - verticesA, facesA, indicesA, - contactsOut,contactCapacity); - } - - return numContactsOut; -} - - - - - - -#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} -#define REDUCE_MAX(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } -#define REDUCE_MIN(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } - -int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0,0,0,0); - { - - for (int i=0;i<nPoints;i++) - center += p[i]; - center /= (float)nPoints; - } - - - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} - - - - -int clipHullHullSingle( - int bodyIndexA, int bodyIndexB, - const float4& posA, - const b3Quaternion& ornA, - const float4& posB, - const b3Quaternion& ornB, - - int collidableIndexA, int collidableIndexB, - - const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, - b3AlignedObjectArray<b3Contact4>* globalContactOut, - int& nContacts, - - const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, - const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, - - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, - const b3AlignedObjectArray<b3GpuFace>& facesA, - const b3AlignedObjectArray<int>& indicesA, - - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, - const b3AlignedObjectArray<b3GpuFace>& facesB, - const b3AlignedObjectArray<int>& indicesB, - - const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, - const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, - const b3Vector3& sepNormalWorldSpace, - int maxContactCapacity ) -{ - int contactIndex = -1; - b3ConvexPolyhedronData hullA, hullB; - - b3Collidable colA = hostCollidablesA[collidableIndexA]; - hullA = hostConvexDataA[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - - b3Collidable colB = hostCollidablesB[collidableIndexB]; - hullB = hostConvexDataB[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - - float4 contactsOut[MAX_VERTS]; - int localContactCapacity = MAX_VERTS; - -#ifdef _WIN32 - b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); - b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); -#endif - - - { - - float4 worldVertsB1[MAX_VERTS]; - float4 worldVertsB2[MAX_VERTS]; - int capacityWorldVerts = MAX_VERTS; - - float4 hostNormal = make_float4(sepNormalWorldSpace.x,sepNormalWorldSpace.y,sepNormalWorldSpace.z,0.f); - int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; - int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; - - b3Scalar minDist = -1; - b3Scalar maxDist = 0.; - - - - b3Transform trA,trB; - { - //B3_PROFILE("transform computation"); - //trA.setIdentity(); - trA.setOrigin(b3MakeVector3(posA.x,posA.y,posA.z)); - trA.setRotation(b3Quaternion(ornA.x,ornA.y,ornA.z,ornA.w)); - - //trB.setIdentity(); - trB.setOrigin(b3MakeVector3(posB.x,posB.y,posB.z)); - trB.setRotation(b3Quaternion(ornB.x,ornB.y,ornB.z,ornB.w)); - } - - b3Quaternion trAorn = trA.getRotation(); - b3Quaternion trBorn = trB.getRotation(); - - int numContactsOut = clipHullAgainstHull(hostNormal, - hostConvexDataA.at(shapeA), - hostConvexDataB.at(shapeB), - (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, - (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - verticesA, facesA,indicesA, - verticesB, facesB,indicesB, - - contactsOut,localContactCapacity); - - if (numContactsOut>0) - { - B3_PROFILE("overlap"); - - float4 normalOnSurfaceB = (float4&)hostNormal; - - b3Int4 contactIdx; - contactIdx.x = 0; - contactIdx.y = 1; - contactIdx.z = 2; - contactIdx.w = 3; - - int numPoints = 0; - - { - // B3_PROFILE("extractManifold"); - numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); - } - - b3Assert(numPoints); - - if (nContacts<maxContactCapacity) - { - contactIndex = nContacts; - globalContactOut->expand(); - b3Contact4& contact = globalContactOut->at(nContacts); - contact.m_batchIdx = 0;//i; - contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; - contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; - - contact.m_frictionCoeffCmp = 45874; - contact.m_restituitionCoeffCmp = 0; - - // float distance = 0.f; - for (int p=0;p<numPoints;p++) - { - contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]];//check if it is actually on B - contact.m_worldNormalOnB = normalOnSurfaceB; - } - //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); - contact.m_worldNormalOnB.w = (b3Scalar)numPoints; - nContacts++; - } else - { - b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts,maxContactCapacity); - } - } - } - return contactIndex; -} - - - - - -void computeContactPlaneConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex]; - - b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - -// int numContactsOut = 0; -// int numWorldVertsB1= 0; - - b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); - float planeConstant = planeEq.w; - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(posB); - convexWorldTransform.setRotation(ornB); - b3Transform planeTransform; - planeTransform.setIdentity(); - planeTransform.setOrigin(posA); - planeTransform.setRotation(ornA); - - b3Transform planeInConvex; - planeInConvex= convexWorldTransform.inverse() * planeTransform; - b3Transform convexInPlane; - convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; - float maxDot = -1e30; - int hitVertex=-1; - b3Vector3 hitVtx; - -#define MAX_PLANE_CONVEX_POINTS 64 - - b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - b3Int4 contactIdx; - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - for (int i=0;i<hullB->m_numVertices;i++) - { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; - float curDot = vtx.dot(planeNormalInConvex); - - - if (curDot>maxDot) - { - hitVertex=i; - maxDot=curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints<MAX_PLANE_CONVEX_POINTS) - { - b3Vector3 vtxWorld = convexWorldTransform*vtx; - b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; - float dist = planeNormal.dot(vtxInPlane)-planeConstant; - if (dist<0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - - } - - int numReducedPoints = 0; - - numReducedPoints = numPoints; - - if (numPoints>4) - { - numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - int dstIdx; -// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints>0) - { - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx=nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - for (int i=0;i<numReducedPoints;i++) - { - b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; - c->m_worldPosB[i] = pOnB1; - } - c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - }//if (dstIdx < numPairs) - } - - - -// printf("computeContactPlaneConvex\n"); -} - - - -B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) - { - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (quantization.x), - (b3Scalar)(vecIn[1]) / (quantization.y), - (b3Scalar)(vecIn[2]) / (quantization.z)); - vecOut += bvhAabbMin; - return vecOut; - } - -void traverseTreeTree() -{ - -} - -#include "Bullet3Common/shared/b3Mat3x3.h" - -int numAabbChecks = 0; -int maxNumAabbChecks = 0; -int maxDepth = 0; - -// work-in-progress -__kernel void findCompoundPairsKernel( - int pairIndex, - int bodyIndexA, - int bodyIndexB, - int collidableIndexA, - int collidableIndexB, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, - __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace, - __global const b3GpuChildShape* gpuChildShapes, - __global b3Int4* gpuCompoundPairsOut, - __global int* numCompoundPairsOut, - int maxNumCompoundPairsCapacity, - b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, - b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, - b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU - ) -{ - numAabbChecks=0; - maxNumAabbChecks=0; -// int i = pairIndex; - { - - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; - int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; - int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees; - int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; - int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; - - - int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - - b3Transform transA; - transA.setIdentity(); - transA.setOrigin(posA); - transA.setRotation(ornA); - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - b3Transform transB; - transB.setIdentity(); - transB.setOrigin(posB); - transB.setRotation(ornB); - - - - for (int p=0;p<numSubTreesA;p++) - { - b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA+p]; - //bvhInfoCPU[bvhA].m_quantization - b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - - b3Vector3 aabbAMinOut,aabbAMaxOut; - float margin=0.f; - b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); - - for (int q=0;q<numSubTreesB;q++) - { - b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB+q]; - - b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); - - b3Vector3 aabbBMinOut,aabbBMaxOut; - float margin=0.f; - b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); - - - numAabbChecks=0; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (aabbOverlap) - { - - int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfoCPU[bvhA].m_nodeOffset; - // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; - - int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfoCPU[bvhB].m_nodeOffset; - // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; - - b3AlignedObjectArray<b3Int2> nodeStack; - b3Int2 node0; - node0.x = startNodeIndexA; - node0.y = startNodeIndexB; - - int maxStackDepth = 1024; - nodeStack.resize(maxStackDepth); - int depth=0; - nodeStack[depth++]=node0; - - do - { - if (depth > maxDepth) - { - maxDepth=depth; - printf("maxDepth=%d\n",maxDepth); - } - b3Int2 node = nodeStack[--depth]; - - b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax,bvhInfoCPU[bvhA].m_quantization,bvhInfoCPU[bvhA].m_aabbMin); - - b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax,bvhInfoCPU[bvhB].m_quantization,bvhInfoCPU[bvhB].m_aabbMin); - - float margin=0.f; - b3Vector3 aabbAMinOut,aabbAMaxOut; - b3TransformAabb2(aMinLocal,aMaxLocal, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); - - b3Vector3 aabbBMinOut,aabbBMaxOut; - b3TransformAabb2(bMinLocal,bMaxLocal, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); - - numAabbChecks++; - bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (nodeOverlap) - { - bool isLeafA = treeNodesCPU[node.x].isLeafNode(); - bool isLeafB = treeNodesCPU[node.y].isLeafNode(); - bool isInternalA = !isLeafA; - bool isInternalB = !isLeafB; - - //fail, even though it might hit two leaf nodes - if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) - { - b3Error("Error: traversal exceeded maxStackDepth\n"); - continue; - } - - if(isInternalA) - { - int nodeAleftChild = node.x+1; - bool isNodeALeftChildLeaf = treeNodesCPU[node.x+1].isLeafNode(); - int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + treeNodesCPU[node.x+1].getEscapeIndex(); - - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); - - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); - } - else - { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); - } - } - else - { - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y+1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + treeNodesCPU[node.y+1].getEscapeIndex(); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); - } - else - { - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex(); - int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex(); - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); - } - } - } - } - } while (depth); - maxNumAabbChecks = b3Max(numAabbChecks,maxNumAabbChecks); - } - } - } - - return; - } - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - - if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - - int numChildrenA = collidables[collidableIndexA].m_numChildShapes; - for (int c=0;c<numChildrenA;c++) - { - int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - - - - b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; - - - b3Transform transA; - transA.setIdentity(); - transA.setOrigin(newPosA); - transA.setRotation(newOrnA); - b3Scalar margin=0.0f; - - b3Vector3 aabbAMinOut,aabbAMaxOut; - - b3TransformAabb2((const b3Float4&)aabbA.m_min,(const b3Float4&)aabbA.m_max, margin,transA.getOrigin(),transA.getRotation(),&aabbAMinOut,&aabbAMaxOut); - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); - - - - b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; - - b3Transform transB; - transB.setIdentity(); - transB.setOrigin(newPosB); - transB.setRotation(newOrnB); - - b3Vector3 aabbBMinOut,aabbBMaxOut; - b3TransformAabb2((const b3Float4&)aabbB.m_min,(const b3Float4&)aabbB.m_max, margin,transB.getOrigin(),transB.getRotation(),&aabbBMinOut,&aabbBMaxOut); - - numAabbChecks++; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (aabbOverlap) - { - /* - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - */ - {// - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); - } - }// - }//fi(1) - } //for (int b=0 - }//if (collidables[collidableIndexB]. - else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - if (1) - { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = newOrnA; - float4 c0; - c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 c1; - c1 = transform(&c1local,&posB,&ornB); - // const float4 DeltaC2 = c0 - c1; - - { - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,childShapeIndexA,-1); - }//if (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - }//for (int b=0;b<numChildrenB;b++) - return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - - ////////////////////////////////////// - - if (1) - { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0; - c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB =newOrnB; - float4 c1; - c1 = transform(&c1local,&posB,&ornB); - // const float4 DeltaC2 = c0 - c1; - {// - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA,bodyIndexB,-1,childShapeIndexB); - }//fi (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//for (int b=0;b<numChildrenB;b++) - return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - return; - }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - }//i<numPairs -} - - - -__kernel void processCompoundPairsKernel( __global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - __global b3Aabb* aabbs, - __global const b3GpuChildShape* gpuChildShapes, - __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, - __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, - int numCompoundPairs, - int i - ) -{ - -// int i = get_global_id(0); - if (i<numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - gpuHasCompoundSepNormalsOut[i] = 0; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) - { - return; - } - - int hasSeparatingAxis = 5; - - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - posA.w = 0.f; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1,0,0,0); -// bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); - - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxis( convexShapes[shapeIndexB],convexShapes[shapeIndexA],posB,ornB,posA,ornA,vertices,uniqueEdges,faces,indices,vertices,uniqueEdges,faces,indices,sepNormal);//,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else//(!sepB) - { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - if (sepEE) - { - gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - }//sepEE - }//(!sepB) - }//(!sepA) - - - } - -} - - -__kernel void clipCompoundsHullHullKernel( __global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - __global const b3GpuChildShape* gpuChildShapes, - __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, - __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, - __global struct b3Contact4Data* globalContactsOut, - int* nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity, int i) -{ - -// int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.0f; - - if (i<numCompoundPairs) - { - - if (gpuHasCompoundSepNormalsOut[i]) - { - - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB,childPosB)+posB; - b3Quat newOrnB = b3QuatMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - convexShapes[shapeIndexA], convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -gpuCompoundSepNormalsOut[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - b3Int4 contactIdx;// = {-1,-1,-1,-1}; - - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int dstIdx; - dstIdx = b3AtomicInc( nGlobalContactsOut); - if ((dstIdx+nReducedContacts) < maxContactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = gpuCompoundPairs[pairIndex].x; - int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;i<nReducedContacts;i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx.s[i]]; - } - b3Contact4Data_setNumPoints(c,nReducedContacts); - } - - }// if (numContactsOut>0) - }// if (gpuHasCompoundSepNormalsOut[i]) - }// if (i<numCompoundPairs) - -} - - -void computeContactCompoundCompound(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3GpuChildShape* cpuChildShapes, - const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, - const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, - - const b3AlignedObjectArray<b3Vector3>& convexVertices, - const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, - const b3AlignedObjectArray<int>& convexIndices, - const b3AlignedObjectArray<b3GpuFace>& faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, - b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, - b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU - ) -{ - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); - - b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; - int numCompoundPairsOut=0; - int maxNumCompoundPairsCapacity = 8192;//1024; - cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); - - // work-in-progress - findCompoundPairsKernel( - pairIndex, - bodyIndexA,bodyIndexB, - collidableIndexA,collidableIndexB, - rigidBodies, - collidables, - convexShapes, - convexVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - cpuChildShapes, - &cpuCompoundPairsOut[0], - &numCompoundPairsOut, - maxNumCompoundPairsCapacity , - treeNodesCPU, - subTreesCPU, - bvhInfoCPU - ); - - printf("maxNumAabbChecks=%d\n",maxNumAabbChecks); - if (numCompoundPairsOut>maxNumCompoundPairsCapacity) - { - b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n",maxNumCompoundPairsCapacity); - numCompoundPairsOut=maxNumCompoundPairsCapacity; - } - b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut; - b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut; - cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); - cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); - - for (int i=0;i<numCompoundPairsOut;i++) - { - - processCompoundPairsKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,0,cpuChildShapes, - cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,numCompoundPairsOut,i); - } - - for (int i=0;i<numCompoundPairsOut;i++) - { - clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0],rigidBodies,collidables,convexShapes,convexVertices,hostUniqueEdges,faces,convexIndices,cpuChildShapes, - cpuCompoundSepNormalsOut,cpuHasCompoundSepNormalsOut,globalContactsOut,&nGlobalContactsOut,numCompoundPairsOut,maxContactCapacity,i); - } - /* - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - - int shapeIndexA = collidables[childColIndexA].m_shapeIndex; - - - bool foundSepAxis = findSeparatingAxis(hullA,hullB, - posA, - ornA, - posB, - ornB, - - convexVertices,uniqueEdges,faces,convexIndices, - convexVertices,uniqueEdges,faces,convexIndices, - - sepNormalWorldSpace - ); - */ - - - /* - if (foundSepAxis) - { - - - contactIndex = clipHullHullSingle( - bodyIndexA, bodyIndexB, - posA,ornA, - posB,ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - - } - */ - -// return contactIndex; - - /* - - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int c=0;c<numChildrenB;c++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; - int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; - - float4 rootPosB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; - b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; - float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; - b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; - - } - */ - -} - -void computeContactPlaneCompound(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3GpuChildShape* cpuChildShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); - - - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int c=0;c<numChildrenB;c++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; - int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; - - float4 rootPosB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; - b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; - float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; - b3Quaternion ornB = rootOrnB*childOrnB;//b3QuatMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; - - - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - - // int numContactsOut = 0; - // int numWorldVertsB1= 0; - - b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal=b3MakeVector3(planeEq.x,planeEq.y,planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA,planeNormal); - float planeConstant = planeEq.w; - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(posB); - convexWorldTransform.setRotation(ornB); - b3Transform planeTransform; - planeTransform.setIdentity(); - planeTransform.setOrigin(posA); - planeTransform.setRotation(ornA); - - b3Transform planeInConvex; - planeInConvex= convexWorldTransform.inverse() * planeTransform; - b3Transform convexInPlane; - convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis()*-planeNormal; - float maxDot = -1e30; - int hitVertex=-1; - b3Vector3 hitVtx; - - #define MAX_PLANE_CONVEX_POINTS 64 - - b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - b3Int4 contactIdx; - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - for (int i=0;i<hullB->m_numVertices;i++) - { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset+i]; - float curDot = vtx.dot(planeNormalInConvex); - - - if (curDot>maxDot) - { - hitVertex=i; - maxDot=curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints<MAX_PLANE_CONVEX_POINTS) - { - b3Vector3 vtxWorld = convexWorldTransform*vtx; - b3Vector3 vtxInPlane = planeTransform.inverse()*vtxWorld; - float dist = planeNormal.dot(vtxInPlane)-planeConstant; - if (dist<0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - - } - - int numReducedPoints = 0; - - numReducedPoints = numPoints; - - if (numPoints>4) - { - numReducedPoints = extractManifoldSequentialGlobal( contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints>0) - { - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx=nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - for (int i=0;i<numReducedPoints;i++) - { - b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; - c->m_worldPosB[i] = pOnB1; - } - c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - }//if (dstIdx < numPairs) - } - - } - - -} - - - - - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - - float radius = collidables[collidableIndexA].m_radius; - float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; - b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; - - - - float4 pos = rigidBodies[bodyIndexB].m_pos; - - - b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; - - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(pos); - tr.setRotation(quat); - b3Transform trInv = tr.inverse(); - - float4 spherePos = trInv(spherePos1); - - int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; - int shapeIndex = collidables[collidableIndex].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = b3MakeVector3(0, 0, 0, 0); -// float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); - float minDist = -1000000.f; // TODO: What is the largest/smallest float? - bool bCollide = true; - int region = -1; - float4 localHitNormal; - for ( int f = 0; f < numFaces; f++ ) - { - b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; - float4 planeEqn; - float4 localPlaneNormal = b3MakeVector3(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - float4 n1 = localPlaneNormal;//quatRotate(quat,localPlaneNormal); - planeEqn = n1; - planeEqn[3] = face.m_plane.w; - - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - if ( dist > radius) - { - bCollide = false; - break; - } - - if ( dist > 0 ) - { - //might hit an edge or vertex - b3Vector3 out; - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist>minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region=1; - } - } else - { - b3Vector3 tmp = spherePos-out; - b3Scalar l2 = tmp.length2(); - if (l2<radius*radius) - { - dist = b3Sqrt(l2); - if (dist>minDist) - { - minDist = dist; - closestPnt = out; - localHitNormal = tmp/dist; - region=2; - } - - } else - { - bCollide = false; - break; - } - } - } - else - { - if ( dist > minDist ) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region=3; - } - } - } - static int numChecks = 0; - numChecks++; - - if (bCollide && minDist > -10000) - { - - float4 normalOnSurfaceB1 = tr.getBasis()*localHitNormal;//-hitNormalWorld; - float4 pOnB1 = tr(closestPnt); - //printf("dist ,%f,",minDist); - float actualDepth = minDist-radius; - if (actualDepth<0) - { - //printf("actualDepth = ,%f,", actualDepth); - //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); - //printf("region=,%d,\n", region); - pOnB1[3] = actualDepth; - - int dstIdx; -// dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx=nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB1; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - int numPoints = 1; - c->m_worldNormalOnB.w = (b3Scalar)numPoints; - }//if (dstIdx < numPairs) - } - }//if (hasCollision) - -} - - - - -int computeContactConvexConvex2( - int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, - const b3AlignedObjectArray<b3Collidable>& collidables, - const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, - const b3AlignedObjectArray<b3Vector3>& convexVertices, - const b3AlignedObjectArray<b3Vector3>& uniqueEdges, - const b3AlignedObjectArray<int>& convexIndices, - const b3AlignedObjectArray<b3GpuFace>& faces, - b3AlignedObjectArray<b3Contact4>& globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - const b3AlignedObjectArray<b3Contact4>& oldContacts - ) -{ - int contactIndex = -1; - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - - - b3ConvexPolyhedronData hullA, hullB; - - b3Vector3 sepNormalWorldSpace; - - - - b3Collidable colA = collidables[collidableIndexA]; - hullA = convexShapes[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - - b3Collidable colB = collidables[collidableIndexB]; - hullB = convexShapes[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - -// int contactCapacity = MAX_VERTS; - //int numContactsOut=0; - - -#ifdef _WIN32 - b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); - b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); -#endif - - bool foundSepAxis = findSeparatingAxis(hullA,hullB, - posA, - ornA, - posB, - ornB, - - convexVertices,uniqueEdges,faces,convexIndices, - convexVertices,uniqueEdges,faces,convexIndices, - - sepNormalWorldSpace - ); - - - if (foundSepAxis) - { - - - contactIndex = clipHullHullSingle( - bodyIndexA, bodyIndexB, - posA,ornA, - posB,ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - - } - - return contactIndex; -} - - - - - - - -void GpuSatCollision::computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, - const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, - const b3OpenCLArray<b3Contact4>* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, - const b3OpenCLArray<b3Vector3>& gpuVertices, - const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, - const b3OpenCLArray<b3GpuFace>& gpuFaces, - const b3OpenCLArray<int>& gpuIndices, - const b3OpenCLArray<b3Collidable>& gpuCollidables, - const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - - const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, - const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - - b3OpenCLArray<b3Vector3>& worldVertsB1GPU, - b3OpenCLArray<b3Int4>& clippingFacesOutGPU, - b3OpenCLArray<b3Vector3>& worldNormalsAGPU, - b3OpenCLArray<b3Vector3>& worldVertsA1GPU, - b3OpenCLArray<b3Vector3>& worldVertsB2GPU, - b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, - b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, - b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, - b3OpenCLArray<b3BvhInfo>* bvhInfo, - - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray<b3Int4>& triangleConvexPairsOut, - int& numTriConvexPairsOut - ) -{ - myframecount++; - - if (!nPairs) - return; - -#ifdef CHECK_ON_HOST - - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - - b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs; - - b3AlignedObjectArray<b3Contact4> hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - - b3AlignedObjectArray<b3Contact4> oldHostContacts; - - if (oldContacts->size()) - { - oldContacts->copyToHost(oldHostContacts); - } - - - hostContacts.resize(maxContactCapacity); - - for (int i=0;i<nPairs;i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - computeContactSphereConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - computeContactSphereConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); - //printf("convex-sphere\n"); - - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - computeContactPlaneConvex(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("convex-plane\n"); - - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - computeContactPlaneConvex(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("plane-convex\n"); - - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - computeContactCompoundCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], hostAabbsWorldSpace,hostAabbsLocalSpace,hostVertices,hostUniqueEdges,hostIndices,hostFaces,&hostContacts[0], - nContacts,maxContactCapacity,treeNodesCPU,subTreesCPU,bvhInfoCPU); -// printf("convex-plane\n"); - - } - - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - computeContactPlaneCompound(i,bodyIndexB,bodyIndexA,collidableIndexB,collidableIndexA,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0], &hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("convex-plane\n"); - - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - computeContactPlaneCompound(i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,&hostBodyBuf[0], - &hostCollidables[0],&hostConvexData[0],&cpuChildShapes[0],&hostVertices[0],&hostIndices[0],&hostFaces[0],&hostContacts[0],nContacts,maxContactCapacity); -// printf("plane-convex\n"); - - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - - - if (contactIndex>=0) - { -// printf("convex convex contactIndex = %d\n",contactIndex); - hostPairs[i].z = contactIndex; - } -// printf("plane-convex\n"); - - } - - - } - - if (hostPairs.size()) - { - pairs->copyFromHost(hostPairs); - } - - hostContacts.resize(nContacts); - if (nContacts) - { - - contactOut->copyFromHost(hostContacts); - } else - { - contactOut->resize(0); - } - - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - //printf("(HOST) nContacts = %d\n",nContacts); - -#else - - { - if (nPairs) - { - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - - B3_PROFILE("primitiveContactsKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_primitiveContactsKernel,"m_primitiveContactsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); - launcher.setConst(maxContactCapacity); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } - } - - -#endif//CHECK_ON_HOST - - B3_PROFILE("computeConvexConvexContactsGPUSAT"); - // printf("nContacts = %d\n",nContacts); - - - m_sepNormals.resize(nPairs); - m_hasSeparatingNormals.resize(nPairs); - - int concaveCapacity=maxTriConvexPairCapacity; - m_concaveSepNormals.resize(concaveCapacity); - m_concaveHasSeparatingNormals.resize(concaveCapacity); - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(0); - - - m_gpuCompoundPairs.resize(compoundPairCapacity); - - m_gpuCompoundSepNormals.resize(compoundPairCapacity); - - - m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); - - m_numCompoundPairsOut.resize(0); - m_numCompoundPairsOut.push_back(0); - - int numCompoundPairs = 0; - - int numConcavePairs =0; - - { - clFinish(m_queue); - if (findSeparatingAxisOnGpu) - { - m_dmins.resize(nPairs); - if (splitSearchSepAxisConvex) - { - - - if (useMprGpu) - { - nContacts = m_totalContactsOut.at(0); - { - B3_PROFILE("mprPenetrationKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_mprPenetrationKernel,"mprPenetrationKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - - launcher.setConst(maxContactCapacity); - launcher.setConst( nPairs ); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - /* - b3AlignedObjectArray<int>hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray<b3Vector3>hostSepAxis; - m_sepNormals.copyToHost(hostSepAxis); - */ - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); - if (nContacts>maxContactCapacity) - { - - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - - } - } - - if (1) - { - - if (1) - { - { - B3_PROFILE("findSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel,"findSeparatingAxisVertexFaceKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - - int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); - - { - B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()), - b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true) - - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel,"findSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numDirections); - launcher.setConst( nPairs ); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - } - } - if (useMprGpu) - { - B3_PROFILE("findSeparatingAxisUnitSphereKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( m_unitSphereDirections.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel,"findSeparatingAxisUnitSphereKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - int numDirections = sizeof(unitSphere162)/sizeof(b3Vector3); - launcher.setConst( numDirections); - - launcher.setConst( nPairs ); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } - } - - - } else - { - B3_PROFILE("findSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel,"m_findSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - - } - else - { - - B3_PROFILE("findSeparatingAxisKernel CPU"); - - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; - convexData.copyToHost(hostConvexShapeData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<int> hostHasSepAxis; - hostHasSepAxis.resize(nPairs); - b3AlignedObjectArray<b3Vector3> hostSepAxis; - hostSepAxis.resize(nPairs); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - - b3AlignedObjectArray<b3Contact4> hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - hostContacts.resize(maxContactCapacity); - int nGlobalContactsOut = nContacts; - - - for (int i=0;i<nPairs;i++) - { - - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - hostHasSepAxis[i] = 0; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((hostBodyBuf[bodyIndexA].m_invMass==0) &&(hostBodyBuf[bodyIndexB].m_invMass==0)) - { - continue; - } - - - if ((hostCollidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(hostCollidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - continue; - } - - float dmin = FLT_MAX; - - b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; - b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; - b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; - b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; - b3Quaternion ornA =hostBodyBuf[bodyIndexA].m_quat; - b3Quaternion ornB =hostBodyBuf[bodyIndexB].m_quat; - - - if (useGjk) - { - - //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR - { - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; - b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); - b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); - b3Vector3 DeltaC2 = c0 - c1; - - b3Vector3 sepAxis; - - bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - - if (hasSepAxisA) - { - bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - if (hasSepAxisB) - { - bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin,false); - - if (hasEdgeEdge) - { - hostHasSepAxis[i] = 1; - hostSepAxis[i] = sepAxis; - hostSepAxis[i].w = dmin; - } - } - } - } - - if (hostHasSepAxis[i]) - { - int pairIndex = i; - - bool useMpr = true; - if (useMpr) - { - int res=0; - float depth = 0.f; - b3Vector3 sepAxis2 = b3MakeVector3(1,0,0); - b3Vector3 resultPointOnBWorld = b3MakeVector3(0,0,0); - - float depthOut; - b3Vector3 dirOut; - b3Vector3 posOut; - - - //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); - res = b3MprPenetration(pairIndex,bodyIndexA,bodyIndexB,&hostBodyBuf[0],&hostConvexShapeData[0],&hostCollidables[0],&hostVertices[0],&hostSepAxis[0],&hostHasSepAxis[0],&depthOut,&dirOut,&posOut); - depth = depthOut; - sepAxis2 = b3MakeVector3(-dirOut.x,-dirOut.y,-dirOut.z); - resultPointOnBWorld = posOut; - //hostHasSepAxis[i] = 0; - - - if (res==0) - { - //add point? - //printf("depth = %f\n",depth); - //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); - //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); - - - - float dist=0.f; - - const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; - const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; - - if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) - { - if (depth > dist) - { - float diff = depth - dist; - - static float maxdiff = 0.f; - if (maxdiff < diff) - { - maxdiff = diff; - printf("maxdiff = %20.10f\n",maxdiff); - } - } - } - if (depth > dmin) - { - b3Vector3 oldAxis = hostSepAxis[i]; - depth = dmin; - sepAxis2 = oldAxis; - } - - - - if(b3TestSepAxis( &hullA, &hullB, posA,ornA,posB,ornB,&sepAxis2, &hostVertices[0], &hostVertices[0],&dist)) - { - if (depth > dist) - { - float diff = depth - dist; - //printf("?diff = %f\n",diff ); - static float maxdiff = 0.f; - if (maxdiff < diff) - { - maxdiff = diff; - printf("maxdiff = %20.10f\n",maxdiff); - } - } - //this is used for SAT - //hostHasSepAxis[i] = 1; - //hostSepAxis[i] = sepAxis2; - - //add contact point - - //int contactIndex = nGlobalContactsOut; - b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); - nGlobalContactsOut++; - newContact.m_batchIdx = 0;//i; - newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass==0)? -bodyIndexA:bodyIndexA; - newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass==0)? -bodyIndexB:bodyIndexB; - - newContact.m_frictionCoeffCmp = 45874; - newContact.m_restituitionCoeffCmp = 0; - - - static float maxDepth = 0.f; - - if (depth > maxDepth) - { - maxDepth = depth; - printf("MPR maxdepth = %f\n",maxDepth ); - - } - - - resultPointOnBWorld.w = -depth; - newContact.m_worldPosB[0] = resultPointOnBWorld; - //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; - newContact.m_worldNormalOnB = sepAxis2; - newContact.m_worldNormalOnB.w = (b3Scalar)1; - } else - { - printf("rejected\n"); - } - - - } - } else - { - - - - //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - b3AlignedObjectArray<b3Contact4> oldHostContacts; - int result; - result = computeContactConvexConvex2( //hostPairs, - pairIndex, - bodyIndexA, bodyIndexB, - collidableIndexA, collidableIndexB, - hostBodyBuf, - hostCollidables, - hostConvexShapeData, - hostVertices, - hostUniqueEdges, - hostIndices, - hostFaces, - hostContacts, - nGlobalContactsOut, - maxContactCapacity, - oldHostContacts - //hostHasSepAxis, - //hostSepAxis - - ); - }//mpr - }//hostHasSepAxis[i] = 1; - - } else - { - - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; - b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); - b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local,posB,ornB); - b3Vector3 DeltaC2 = c0 - c1; - - b3Vector3 sepAxis; - - bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - - if (hasSepAxisA) - { - bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - if (hasSepAxisB) - { - bool hasEdgeEdge =b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin,true); - - if (hasEdgeEdge) - { - hostHasSepAxis[i] = 1; - hostSepAxis[i] = sepAxis; - } - } - } - } - } - - if (useGjkContacts)//nGlobalContactsOut>0) - { - //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); - nContacts = nGlobalContactsOut; - contactOut->copyFromHost(hostContacts); - - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - } - - m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); - m_sepNormals.copyFromHost(hostSepAxis); - - /* - //double-check results from GPU (comment-out the 'else' so both paths are executed - b3AlignedObjectArray<int> checkHasSepAxis; - m_hasSeparatingNormals.copyToHost(checkHasSepAxis); - static int frameCount = 0; - frameCount++; - for (int i=0;i<nPairs;i++) - { - if (hostHasSepAxis[i] != checkHasSepAxis[i]) - { - printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n", - frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]); - } - } - //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); - // m_sepNormals.copyFromHost(hostSepAxis); - */ - } - - - numCompoundPairs = m_numCompoundPairsOut.at(0); - bool useGpuFindCompoundPairs=true; - if (useGpuFindCompoundPairs) - { - B3_PROFILE("findCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsLocalSpace.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL()), - b3BufferInfoCL( m_numCompoundPairsOut.getBufferCL()), - b3BufferInfoCL(subTreesGPU->getBufferCL()), - b3BufferInfoCL(treeNodesGPU->getBufferCL()), - b3BufferInfoCL(bvhInfo->getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel,"m_findCompoundPairsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); - launcher.setConst( compoundPairCapacity); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - numCompoundPairs = m_numCompoundPairsOut.at(0); - //printf("numCompoundPairs =%d\n",numCompoundPairs ); - if (numCompoundPairs) - { - //printf("numCompoundPairs=%d\n",numCompoundPairs); - } - - - } else - { - - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - - b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; - cpuCompoundPairsOut.resize(compoundPairCapacity); - - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - - - - for (int pairIndex=0;pairIndex<nPairs;pairIndex++) - { - int bodyIndexA = hostPairs[pairIndex].x; - int bodyIndexB = hostPairs[pairIndex].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - if (cpuChildShapes.size()) - { - findCompoundPairsKernel( - pairIndex, - bodyIndexA, - bodyIndexB, - collidableIndexA, - collidableIndexB, - &hostBodyBuf[0], - &hostCollidables[0], - &hostConvexData[0], - hostVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - &cpuChildShapes[0], - &cpuCompoundPairsOut[0], - &numCompoundPairs, - compoundPairCapacity, - treeNodesCPU, - subTreesCPU, - bvhInfoCPU - ); - } - } - - - m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs,1,0,true); - if (numCompoundPairs) - { - b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0]; - m_gpuCompoundPairs.copyFromHostPointer(ptr,numCompoundPairs,0,true); - } - //cpuCompoundPairsOut - - } - if (numCompoundPairs) - { - printf("numCompoundPairs=%d\n",numCompoundPairs); - } - - if (numCompoundPairs > compoundPairCapacity) - { - b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); - numCompoundPairs = compoundPairCapacity; - } - - - - m_gpuCompoundPairs.resize(numCompoundPairs); - m_gpuHasCompoundSepNormals.resize(numCompoundPairs); - m_gpuCompoundSepNormals.resize(numCompoundPairs); - - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsPrimitivesKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel,"m_processCompoundPairsPrimitivesKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numCompoundPairs ); - launcher.setConst(maxContactCapacity); - - int num = numCompoundPairs; - launcher.launch1D( num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); - if (nContacts>maxContactCapacity) - { - - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL()), - b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel,"m_processCompoundPairsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( numCompoundPairs ); - - int num = numCompoundPairs; - launcher.launch1D( num); - clFinish(m_queue); - - } - - - //printf("numConcave = %d\n",numConcave); - - - -// printf("hostNormals.size()=%d\n",hostNormals.size()); - //int numPairs = pairCount.at(0); - - - - } - int vertexFaceCapacity = 64; - - - - { - //now perform the tree query on GPU - - - - - if (treeNodesGPU->size() && treeNodesGPU->size()) - { - if (bvhTraversalKernelGPU) - { - - B3_PROFILE("m_bvhTraversalKernel"); - - - numConcavePairs = m_numConcavePairsOut.at(0); - - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel,"m_bvhTraversalKernel"); - launcher.setBuffer( pairs->getBufferCL()); - launcher.setBuffer( bodyBuf->getBufferCL()); - launcher.setBuffer( gpuCollidables.getBufferCL()); - launcher.setBuffer( clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer( triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer( m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer( subTreesGPU->getBufferCL()); - launcher.setBuffer( treeNodesGPU->getBufferCL()); - launcher.setBuffer( bvhInfo->getBufferCL()); - - launcher.setConst( nPairs ); - launcher.setConst( maxTriConvexPairCapacity); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - numConcavePairs = m_numConcavePairsOut.at(0); - } else - { - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - //int maxTriConvexPairCapacity, - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - - //int numTriConvexPairsOutHost=0; - numConcavePairs = 0; - //m_numConcavePairsOut - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - //compute it... - - volatile int hostNumConcavePairsOut=0; - - // - for (int i=0;i<nPairs;i++) - { - b3BvhTraversal( &hostPairs.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostAabbsWorldSpace.at(0), - &triangleConvexPairsOutHost.at(0), - &hostNumConcavePairsOut, - &subTreesCPU.at(0), - &treeNodesCPU.at(0), - &bvhInfoCPU.at(0), - nPairs, - maxTriConvexPairCapacity, - i); - } - numConcavePairs = hostNumConcavePairsOut; - - if (hostNumConcavePairsOut) - { - triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); - triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); - } - // - - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(numConcavePairs); - } - - //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); - - if (numConcavePairs > maxTriConvexPairCapacity) - { - static int exceeded_maxTriConvexPairCapacity_count = 0; - b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", - numConcavePairs,maxTriConvexPairCapacity,exceeded_maxTriConvexPairCapacity_count++); - numConcavePairs = maxTriConvexPairCapacity; - } - triangleConvexPairsOut.resize(numConcavePairs); - - if (numConcavePairs) - { - - - - - clippingFacesOutGPU.resize(numConcavePairs); - worldNormalsAGPU.resize(numConcavePairs); - worldVertsA1GPU.resize(vertexFaceCapacity*(numConcavePairs)); - worldVertsB1GPU.resize(vertexFaceCapacity*(numConcavePairs)); - - - if (findConcaveSeparatingAxisKernelGPU) - { - - /* - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - */ - - //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - if (splitSearchSepAxisConcave) - { - //printf("numConcavePairs = %d\n",numConcavePairs); - m_dmins.resize(numConcavePairs); - { - B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel,"m_findConcaveSeparatingAxisVertexFaceKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - - - } -// numConcavePairs = 0; - if (1) - { - B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel,"m_findConcaveSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - - // numConcavePairs = 0; - - - - - - - } else - { - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel,"m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst( numConcavePairs ); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - - } else - { - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; - - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); - - b3GpuChildShape* childShapePointerCPU = 0; - if (cpuChildShapes.size()) - childShapePointerCPU = &cpuChildShapes.at(0); - - clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - worldNormalsACPU.resize(worldNormalsAGPU.size()); - worldVertsB1CPU.resize(worldVertsB1GPU.size()); - - for (int i=0;i<numConcavePairs;i++) - { - b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostConvexData.at(0), &hostVertices.at(0),&hostUniqueEdges.at(0), - &hostFaces.at(0),&hostIndices.at(0),childShapePointerCPU, - &hostAabbsWorldSpace.at(0), - &concaveSepNormalsHost.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - vertexFaceCapacity, - numConcavePairs,i); - }; - - m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - - - - } -// b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; -// m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); -// b3AlignedObjectArray<b3Int4> cpuConcavePairs; -// triangleConvexPairsOut.copyToHost(cpuConcavePairs); - - - } - } - - - } - - if (numConcavePairs) - { - if (numConcavePairs) - { - B3_PROFILE("findConcaveSphereContactsKernel"); - nContacts = m_totalContactsOut.at(0); -// printf("nContacts1 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL() ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( clAabbsWorldSpace.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel,"m_findConcaveSphereContactsKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - - launcher.setConst( numConcavePairs ); - launcher.setConst(maxContactCapacity); - - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); - - //printf("nContacts2 = %d\n",nContacts); - - if (nContacts >= maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - - } - - - -#ifdef __APPLE__ - bool contactClippingOnGpu = true; -#else - bool contactClippingOnGpu = true; -#endif - - if (contactClippingOnGpu) - { - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -// printf("nContacts3 = %d\n",nContacts); - - - //B3_PROFILE("clipHullHullKernel"); - - bool breakupConcaveConvexKernel = true; - -#ifdef __APPLE__ - //actually, some Apple OpenCL platform/device combinations work fine... - breakupConcaveConvexKernel = true; -#endif - //concave-convex contact clipping - if (numConcavePairs) - { - // printf("numConcavePairs = %d\n", numConcavePairs); - // nContacts = m_totalContactsOut.at(0); - // printf("nContacts before = %d\n", nContacts); - - if (breakupConcaveConvexKernel) - { - - worldVertsB2GPU.resize(vertexFaceCapacity*numConcavePairs); - - - //clipFacesAndFindContacts - - if (clipConcaveFacesAndFindContactsCPU) - { - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - worldNormalsAGPU.copyToHost(worldNormalsACPU); - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - - - b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; - m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2CPU.resize(worldVertsB2GPU.size()); - - - for (int i=0;i<numConcavePairs;i++) - { - - clipFacesAndFindContactsKernel( &concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - - - } else - { - - if (1) - { - - - - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - - launcher.setConst( numConcavePairs ); - int debugMode = 0; - launcher.setConst( debugMode); - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - //int bla = m_totalContactsOut.at(0); - } - } - //contactReduction - { - int newContactCapacity=nContacts+numConcavePairs; - contactOut->reserve(newContactCapacity); - if (reduceConcaveContactsOnGPU) - { -// printf("newReservation = %d\n",newReservation); - { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst( numConcavePairs ); - int num = numConcavePairs; - - launcher.launch1D( num); - } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - }else - { - - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<int>concaveHasSeparatingNormalsCPU; - m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - - - b3AlignedObjectArray<b3Contact4> hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - hostContacts.resize(newContactCapacity); - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - - - for (int i=0;i<numConcavePairs;i++) - { - b3NewContactReductionKernel( &triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &hostContacts.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - numConcavePairs, - i - ); - - } - - - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); -// nContacts = m_totalContactsOut.at(0); - //contactOut->resize(nContacts); - hostContacts.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContacts); - } - - } - //re-use? - - - } else - { - B3_PROFILE("clipHullHullConcaveConvexKernel"); - nContacts = m_totalContactsOut.at(0); - int newContactCapacity = contactOut->capacity(); - - //printf("contactOut5 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( triangleConvexPairsOut.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel,"m_clipHullHullConcaveConvexKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(newContactCapacity); - launcher.setConst( numConcavePairs ); - int num = numConcavePairs; - launcher.launch1D( num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - //printf("contactOut6 = %d\n",nContacts); - b3AlignedObjectArray<b3Contact4> cpuContacts; - contactOut->copyToHost(cpuContacts); - } - // printf("nContacts after = %d\n", nContacts); - }//numConcavePairs - - - - //convex-convex contact clipping - - bool breakupKernel = false; - -#ifdef __APPLE__ - breakupKernel = true; -#endif - -#ifdef CHECK_ON_HOST - bool computeConvexConvex = false; -#else - bool computeConvexConvex = true; -#endif//CHECK_ON_HOST - if (computeConvexConvex) - { - B3_PROFILE("clipHullHullKernel"); - if (breakupKernel) - { - - - - - worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - clippingFacesOutGPU.resize(nPairs); - worldNormalsAGPU.resize(nPairs); - worldVertsA1GPU.resize(vertexFaceCapacity*nPairs); - worldVertsB2GPU.resize(vertexFaceCapacity*nPairs); - - if (findConvexClippingFacesGPU) - { - B3_PROFILE("findClippingFacesKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB1GPU.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_findClippingFacesKernel,"m_findClippingFacesKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( vertexFaceCapacity); - launcher.setConst( nPairs ); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - } else - { - - float minDist = -1e30f; - float maxDist = 0.02f; - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<int> hostHasSepNormals; - m_hasSeparatingNormals.copyToHost(hostHasSepNormals); - b3AlignedObjectArray<b3Vector3> cpuSepNormals; - m_sepNormals.copyToHost(cpuSepNormals); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - - //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - worldNormalsACPU.resize(nPairs); - - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - - - for (int i=0;i<nPairs;i++) - { - - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - - if (hostHasSepNormals[i]) - { - b3FindClippingFaces(cpuSepNormals[i], - &hostConvexData[shapeIndexA], - &hostConvexData[shapeIndexB], - hostBodyBuf[bodyIndexA].m_pos,hostBodyBuf[bodyIndexA].m_quat, - hostBodyBuf[bodyIndexB].m_pos,hostBodyBuf[bodyIndexB].m_quat, - &worldVertsA1CPU.at(0),&worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - vertexFaceCapacity,minDist,maxDist, - &hostVertices.at(0),&hostFaces.at(0), - &hostIndices.at(0), - &hostVertices.at(0),&hostFaces.at(0), - &hostIndices.at(0),&clippingFacesOutCPU.at(0),i); - } - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - - } - - - - - - ///clip face B against face A, reduce contacts and append them to a global contact array - if (1) - { - if (clipConvexFacesAndFindContactsCPU) - { - - //b3AlignedObjectArray<b3Int4> hostPairs; - //pairs->copyToHost(hostPairs); - - b3AlignedObjectArray<b3Vector3> hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray<int> hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - - b3AlignedObjectArray<b3Int4> hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2CPU.resize(vertexFaceCapacity*nPairs); - - b3AlignedObjectArray<b3Vector3>worldVertsA1CPU; - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - worldNormalsAGPU.copyToHost(worldNormalsACPU); - - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - /* - __global const b3Float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global b3Int4* clippingFacesOut, - __global b3Float4* worldVertsA1, - __global b3Float4* worldNormalsA1, - __global b3Float4* worldVertsB1, - __global b3Float4* worldVertsB2, - int vertexFaceCapacity, - int pairIndex - */ - for (int i=0;i<nPairs;i++) - { - clipFacesAndFindContactsKernel( - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostClippingFaces.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(hostClippingFaces); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - - } else - { - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL( worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts,"m_clipFacesAndFindContacts"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - - launcher.setConst( nPairs ); - int debugMode = 0; - launcher.setConst( debugMode); - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - } - - { - nContacts = m_totalContactsOut.at(0); - //printf("nContacts = %d\n",nContacts); - - int newContactCapacity = nContacts+nPairs; - contactOut->reserve(newContactCapacity); - - if (reduceConvexContactsOnGPU) - { - { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL( worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel,"m_newContactReductionKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst( nPairs ); - int num = nPairs; - - launcher.launch1D( num); - } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } else - { - - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Vector3> hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray<int> hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray<b3Contact4> hostContactsOut; - contactOut->copyToHost(hostContactsOut); - hostContactsOut.resize(newContactCapacity); - - b3AlignedObjectArray<b3Int4> hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - for (int i=0;i<nPairs;i++) - { - b3NewContactReductionKernel(&hostPairs.at(0), - &hostBodyBuf.at(0), - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostContactsOut.at(0), - &hostClippingFaces.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - nPairs, - i); - } - - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts,1,0,true); - hostContactsOut.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContactsOut); - } - // b3Contact4 pt = contactOut->at(0); - // printf("nContacts = %d\n",nContacts); - } - } - } - else//breakupKernel - { - - if (nPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( pairs->getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( m_sepNormals.getBufferCL()), - b3BufferInfoCL( m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipHullHullKernel,"m_clipHullHullKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nPairs ); - launcher.setConst(maxContactCapacity); - - int num = nPairs; - launcher.launch1D( num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts >= maxContactCapacity) - { - b3Error("Exceeded contact capacity (%d/%d)\n",nContacts,maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - } - } - - - int nCompoundsPairs = m_gpuCompoundPairs.size(); - - if (nCompoundsPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_gpuCompoundPairs.getBufferCL(), true ), - b3BufferInfoCL( bodyBuf->getBufferCL(),true), - b3BufferInfoCL( gpuCollidables.getBufferCL(),true), - b3BufferInfoCL( convexData.getBufferCL(),true), - b3BufferInfoCL( gpuVertices.getBufferCL(),true), - b3BufferInfoCL( gpuUniqueEdges.getBufferCL(),true), - b3BufferInfoCL( gpuFaces.getBufferCL(),true), - b3BufferInfoCL( gpuIndices.getBufferCL(),true), - b3BufferInfoCL( gpuChildShapes.getBufferCL(),true), - b3BufferInfoCL( m_gpuCompoundSepNormals.getBufferCL(),true), - b3BufferInfoCL( m_gpuHasCompoundSepNormals.getBufferCL(),true), - b3BufferInfoCL( contactOut->getBufferCL()), - b3BufferInfoCL( m_totalContactsOut.getBufferCL()) - }; - b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel,"m_clipCompoundsHullHullKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nCompoundsPairs ); - launcher.setConst(maxContactCapacity); - - int num = nCompoundsPairs; - launcher.launch1D( num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts>maxContactCapacity) - { - - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - }//if nCompoundsPairs - } - }//contactClippingOnGpu - - //printf("nContacts end = %d\n",nContacts); - - //printf("frameCount = %d\n",frameCount++); -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h deleted file mode 100644 index e24c1579c6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ /dev/null @@ -1,118 +0,0 @@ - -#ifndef _CONVEX_HULL_CONTACT_H -#define _CONVEX_HULL_CONTACT_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "b3OptimizedBvh.h" -#include "b3BvhInfo.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" - -//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" - - - - -struct GpuSatCollision -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - cl_kernel m_findSeparatingAxisKernel; - cl_kernel m_mprPenetrationKernel; - cl_kernel m_findSeparatingAxisUnitSphereKernel; - - - cl_kernel m_findSeparatingAxisVertexFaceKernel; - cl_kernel m_findSeparatingAxisEdgeEdgeKernel; - - cl_kernel m_findConcaveSeparatingAxisKernel; - cl_kernel m_findConcaveSeparatingAxisVertexFaceKernel; - cl_kernel m_findConcaveSeparatingAxisEdgeEdgeKernel; - - - - - cl_kernel m_findCompoundPairsKernel; - cl_kernel m_processCompoundPairsKernel; - - cl_kernel m_clipHullHullKernel; - cl_kernel m_clipCompoundsHullHullKernel; - - cl_kernel m_clipFacesAndFindContacts; - cl_kernel m_findClippingFacesKernel; - - cl_kernel m_clipHullHullConcaveConvexKernel; -// cl_kernel m_extractManifoldAndAddContactKernel; - cl_kernel m_newContactReductionKernel; - - cl_kernel m_bvhTraversalKernel; - cl_kernel m_primitiveContactsKernel; - cl_kernel m_findConcaveSphereContactsKernel; - - cl_kernel m_processCompoundPairsPrimitivesKernel; - - b3OpenCLArray<b3Vector3> m_unitSphereDirections; - - b3OpenCLArray<int> m_totalContactsOut; - - b3OpenCLArray<b3Vector3> m_sepNormals; - b3OpenCLArray<float> m_dmins; - - b3OpenCLArray<int> m_hasSeparatingNormals; - b3OpenCLArray<b3Vector3> m_concaveSepNormals; - b3OpenCLArray<int> m_concaveHasSeparatingNormals; - b3OpenCLArray<int> m_numConcavePairsOut; - b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs; - b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals; - b3OpenCLArray<int> m_gpuHasCompoundSepNormals; - b3OpenCLArray<int> m_numCompoundPairsOut; - - - GpuSatCollision(cl_context ctx,cl_device_id device, cl_command_queue q ); - virtual ~GpuSatCollision(); - - - void computeConvexConvexContactsGPUSAT( b3OpenCLArray<b3Int4>* pairs, int nPairs, - const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, - const b3OpenCLArray<b3Contact4>* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray<b3ConvexPolyhedronData>& hostConvexData, - const b3OpenCLArray<b3Vector3>& vertices, - const b3OpenCLArray<b3Vector3>& uniqueEdges, - const b3OpenCLArray<b3GpuFace>& faces, - const b3OpenCLArray<int>& indices, - const b3OpenCLArray<b3Collidable>& gpuCollidables, - const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - - const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, - const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - - b3OpenCLArray<b3Vector3>& worldVertsB1GPU, - b3OpenCLArray<b3Int4>& clippingFacesOutGPU, - b3OpenCLArray<b3Vector3>& worldNormalsAGPU, - b3OpenCLArray<b3Vector3>& worldVertsA1GPU, - b3OpenCLArray<b3Vector3>& worldVertsB2GPU, - b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData, - b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, - b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, - b3OpenCLArray<b3BvhInfo>* bvhInfo, - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray<b3Int4>& triangleConvexPairs, - int& numTriConvexPairsOut - ); - - -}; - -#endif //_CONVEX_HULL_CONTACT_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h deleted file mode 100644 index 337100fb1a..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef CONVEX_POLYHEDRON_CL -#define CONVEX_POLYHEDRON_CL - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - - - -#endif //CONVEX_POLYHEDRON_CL diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp deleted file mode 100644 index d636f983c6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp +++ /dev/null @@ -1,1014 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the -use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software in a -product, an acknowledgment in the product documentation would be appreciated -but is not required. -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -GJK-EPA collision solver by Nathanael Presson, 2008 -*/ - -#include "b3GjkEpa.h" - -#include "b3SupportMappings.h" - -namespace gjkepa2_impl2 -{ - - // Config - - /* GJK */ -#define GJK_MAX_ITERATIONS 128 -#define GJK_ACCURACY ((b3Scalar)0.0001) -#define GJK_MIN_DISTANCE ((b3Scalar)0.0001) -#define GJK_DUPLICATED_EPS ((b3Scalar)0.0001) -#define GJK_SIMPLEX2_EPS ((b3Scalar)0.0) -#define GJK_SIMPLEX3_EPS ((b3Scalar)0.0) -#define GJK_SIMPLEX4_EPS ((b3Scalar)0.0) - - /* EPA */ -#define EPA_MAX_VERTICES 64 -#define EPA_MAX_FACES (EPA_MAX_VERTICES*2) -#define EPA_MAX_ITERATIONS 255 -#define EPA_ACCURACY ((b3Scalar)0.0001) -#define EPA_FALLBACK (10*EPA_ACCURACY) -#define EPA_PLANE_EPS ((b3Scalar)0.00001) -#define EPA_INSIDE_EPS ((b3Scalar)0.01) - - - // Shorthands - - - // MinkowskiDiff - struct b3MinkowskiDiff - { - - - const b3ConvexPolyhedronData* m_shapes[2]; - - - b3Matrix3x3 m_toshape1; - b3Transform m_toshape0; - - bool m_enableMargin; - - - void EnableMargin(bool enable) - { - m_enableMargin = enable; - } - inline b3Vector3 Support0(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA) const - { - if (m_enableMargin) - { - return localGetSupportVertexWithMargin(d,m_shapes[0],verticesA,0.f); - } else - { - return localGetSupportVertexWithoutMargin(d,m_shapes[0],verticesA); - } - } - inline b3Vector3 Support1(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesB) const - { - if (m_enableMargin) - { - return m_toshape0*(localGetSupportVertexWithMargin(m_toshape1*d,m_shapes[1],verticesB,0.f)); - } else - { - return m_toshape0*(localGetSupportVertexWithoutMargin(m_toshape1*d,m_shapes[1],verticesB)); - } - } - - inline b3Vector3 Support(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const - { - return(Support0(d,verticesA)-Support1(-d,verticesB)); - } - b3Vector3 Support(const b3Vector3& d,unsigned int index,const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const - { - if(index) - return(Support1(d,verticesA)); - else - return(Support0(d,verticesB)); - } - }; - - typedef b3MinkowskiDiff tShape; - - - // GJK - struct b3GJK - { - /* Types */ - struct sSV - { - b3Vector3 d,w; - }; - struct sSimplex - { - sSV* c[4]; - b3Scalar p[4]; - unsigned int rank; - }; - struct eStatus { enum _ { - Valid, - Inside, - Failed };}; - /* Fields */ - tShape m_shape; - const b3AlignedObjectArray<b3Vector3>& m_verticesA; - const b3AlignedObjectArray<b3Vector3>& m_verticesB; - b3Vector3 m_ray; - b3Scalar m_distance; - sSimplex m_simplices[2]; - sSV m_store[4]; - sSV* m_free[4]; - unsigned int m_nfree; - unsigned int m_current; - sSimplex* m_simplex; - eStatus::_ m_status; - /* Methods */ - b3GJK(const b3AlignedObjectArray<b3Vector3>& verticesA,const b3AlignedObjectArray<b3Vector3>& verticesB) - :m_verticesA(verticesA),m_verticesB(verticesB) - { - Initialize(); - } - void Initialize() - { - m_ray = b3MakeVector3(0,0,0); - m_nfree = 0; - m_status = eStatus::Failed; - m_current = 0; - m_distance = 0; - } - eStatus::_ Evaluate(const tShape& shapearg,const b3Vector3& guess) - { - unsigned int iterations=0; - b3Scalar sqdist=0; - b3Scalar alpha=0; - b3Vector3 lastw[4]; - unsigned int clastw=0; - /* Initialize solver */ - m_free[0] = &m_store[0]; - m_free[1] = &m_store[1]; - m_free[2] = &m_store[2]; - m_free[3] = &m_store[3]; - m_nfree = 4; - m_current = 0; - m_status = eStatus::Valid; - m_shape = shapearg; - m_distance = 0; - /* Initialize simplex */ - m_simplices[0].rank = 0; - m_ray = guess; - const b3Scalar sqrl= m_ray.length2(); - appendvertice(m_simplices[0],sqrl>0?-m_ray:b3MakeVector3(1,0,0)); - m_simplices[0].p[0] = 1; - m_ray = m_simplices[0].c[0]->w; - sqdist = sqrl; - lastw[0] = - lastw[1] = - lastw[2] = - lastw[3] = m_ray; - /* Loop */ - do { - const unsigned int next=1-m_current; - sSimplex& cs=m_simplices[m_current]; - sSimplex& ns=m_simplices[next]; - /* Check zero */ - const b3Scalar rl=m_ray.length(); - if(rl<GJK_MIN_DISTANCE) - {/* Touching or inside */ - m_status=eStatus::Inside; - break; - } - /* Append new vertice in -'v' direction */ - appendvertice(cs,-m_ray); - const b3Vector3& w=cs.c[cs.rank-1]->w; - bool found=false; - for(unsigned int i=0;i<4;++i) - { - if((w-lastw[i]).length2()<GJK_DUPLICATED_EPS) - { found=true;break; } - } - if(found) - {/* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - else - {/* Update lastw */ - lastw[clastw=(clastw+1)&3]=w; - } - /* Check for termination */ - const b3Scalar omega=b3Dot(m_ray,w)/rl; - alpha=b3Max(omega,alpha); - if(((rl-alpha)-(GJK_ACCURACY*rl))<=0) - {/* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - /* Reduce simplex */ - b3Scalar weights[4]; - unsigned int mask=0; - switch(cs.rank) - { - case 2: sqdist=projectorigin( cs.c[0]->w, - cs.c[1]->w, - weights,mask);break; - case 3: sqdist=projectorigin( cs.c[0]->w, - cs.c[1]->w, - cs.c[2]->w, - weights,mask);break; - case 4: sqdist=projectorigin( cs.c[0]->w, - cs.c[1]->w, - cs.c[2]->w, - cs.c[3]->w, - weights,mask);break; - } - if(sqdist>=0) - {/* Valid */ - ns.rank = 0; - m_ray = b3MakeVector3(0,0,0); - m_current = next; - for(unsigned int i=0,ni=cs.rank;i<ni;++i) - { - if(mask&(1<<i)) - { - ns.c[ns.rank] = cs.c[i]; - ns.p[ns.rank++] = weights[i]; - m_ray += cs.c[i]->w*weights[i]; - } - else - { - m_free[m_nfree++] = cs.c[i]; - } - } - if(mask==15) m_status=eStatus::Inside; - } - else - {/* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - m_status=((++iterations)<GJK_MAX_ITERATIONS)?m_status:eStatus::Failed; - } while(m_status==eStatus::Valid); - m_simplex=&m_simplices[m_current]; - switch(m_status) - { - case eStatus::Valid: m_distance=m_ray.length();break; - case eStatus::Inside: m_distance=0;break; - default: - { - } - } - return(m_status); - } - bool EncloseOrigin() - { - switch(m_simplex->rank) - { - case 1: - { - for(unsigned int i=0;i<3;++i) - { - b3Vector3 axis=b3MakeVector3(0,0,0); - axis[i]=1; - appendvertice(*m_simplex, axis); - if(EncloseOrigin()) return(true); - removevertice(*m_simplex); - appendvertice(*m_simplex,-axis); - if(EncloseOrigin()) return(true); - removevertice(*m_simplex); - } - } - break; - case 2: - { - const b3Vector3 d=m_simplex->c[1]->w-m_simplex->c[0]->w; - for(unsigned int i=0;i<3;++i) - { - b3Vector3 axis=b3MakeVector3(0,0,0); - axis[i]=1; - const b3Vector3 p=b3Cross(d,axis); - if(p.length2()>0) - { - appendvertice(*m_simplex, p); - if(EncloseOrigin()) return(true); - removevertice(*m_simplex); - appendvertice(*m_simplex,-p); - if(EncloseOrigin()) return(true); - removevertice(*m_simplex); - } - } - } - break; - case 3: - { - const b3Vector3 n=b3Cross(m_simplex->c[1]->w-m_simplex->c[0]->w, - m_simplex->c[2]->w-m_simplex->c[0]->w); - if(n.length2()>0) - { - appendvertice(*m_simplex,n); - if(EncloseOrigin()) return(true); - removevertice(*m_simplex); - appendvertice(*m_simplex,-n); - if(EncloseOrigin()) return(true); - removevertice(*m_simplex); - } - } - break; - case 4: - { - if(b3Fabs(det( m_simplex->c[0]->w-m_simplex->c[3]->w, - m_simplex->c[1]->w-m_simplex->c[3]->w, - m_simplex->c[2]->w-m_simplex->c[3]->w))>0) - return(true); - } - break; - } - return(false); - } - /* Internals */ - void getsupport(const b3Vector3& d,sSV& sv) const - { - sv.d = d/d.length(); - sv.w = m_shape.Support(sv.d,m_verticesA,m_verticesB); - } - void removevertice(sSimplex& simplex) - { - m_free[m_nfree++]=simplex.c[--simplex.rank]; - } - void appendvertice(sSimplex& simplex,const b3Vector3& v) - { - simplex.p[simplex.rank]=0; - simplex.c[simplex.rank]=m_free[--m_nfree]; - getsupport(v,*simplex.c[simplex.rank++]); - } - static b3Scalar det(const b3Vector3& a,const b3Vector3& b,const b3Vector3& c) - { - return( a.y*b.z*c.x+a.z*b.x*c.y- - a.x*b.z*c.y-a.y*b.x*c.z+ - a.x*b.y*c.z-a.z*b.y*c.x); - } - static b3Scalar projectorigin( const b3Vector3& a, - const b3Vector3& b, - b3Scalar* w,unsigned int& m) - { - const b3Vector3 d=b-a; - const b3Scalar l=d.length2(); - if(l>GJK_SIMPLEX2_EPS) - { - const b3Scalar t(l>0?-b3Dot(a,d)/l:0); - if(t>=1) { w[0]=0;w[1]=1;m=2;return(b.length2()); } - else if(t<=0) { w[0]=1;w[1]=0;m=1;return(a.length2()); } - else { w[0]=1-(w[1]=t);m=3;return((a+d*t).length2()); } - } - return(-1); - } - static b3Scalar projectorigin( const b3Vector3& a, - const b3Vector3& b, - const b3Vector3& c, - b3Scalar* w,unsigned int& m) - { - static const unsigned int imd3[]={1,2,0}; - const b3Vector3* vt[]={&a,&b,&c}; - const b3Vector3 dl[]={a-b,b-c,c-a}; - const b3Vector3 n=b3Cross(dl[0],dl[1]); - const b3Scalar l=n.length2(); - if(l>GJK_SIMPLEX3_EPS) - { - b3Scalar mindist=-1; - b3Scalar subw[2]={0.f,0.f}; - unsigned int subm(0); - for(unsigned int i=0;i<3;++i) - { - if(b3Dot(*vt[i],b3Cross(dl[i],n))>0) - { - const unsigned int j=imd3[i]; - const b3Scalar subd(projectorigin(*vt[i],*vt[j],subw,subm)); - if((mindist<0)||(subd<mindist)) - { - mindist = subd; - m = static_cast<unsigned int>(((subm&1)?1<<i:0)+((subm&2)?1<<j:0)); - w[i] = subw[0]; - w[j] = subw[1]; - w[imd3[j]] = 0; - } - } - } - if(mindist<0) - { - const b3Scalar d=b3Dot(a,n); - const b3Scalar s=b3Sqrt(l); - const b3Vector3 p=n*(d/l); - mindist = p.length2(); - m = 7; - w[0] = (b3Cross(dl[1],b-p)).length()/s; - w[1] = (b3Cross(dl[2],c-p)).length()/s; - w[2] = 1-(w[0]+w[1]); - } - return(mindist); - } - return(-1); - } - static b3Scalar projectorigin( const b3Vector3& a, - const b3Vector3& b, - const b3Vector3& c, - const b3Vector3& d, - b3Scalar* w,unsigned int& m) - { - static const unsigned int imd3[]={1,2,0}; - const b3Vector3* vt[]={&a,&b,&c,&d}; - const b3Vector3 dl[]={a-d,b-d,c-d}; - const b3Scalar vl=det(dl[0],dl[1],dl[2]); - const bool ng=(vl*b3Dot(a,b3Cross(b-c,a-b)))<=0; - if(ng&&(b3Fabs(vl)>GJK_SIMPLEX4_EPS)) - { - b3Scalar mindist=-1; - b3Scalar subw[3]={0.f,0.f,0.f}; - unsigned int subm(0); - for(unsigned int i=0;i<3;++i) - { - const unsigned int j=imd3[i]; - const b3Scalar s=vl*b3Dot(d,b3Cross(dl[i],dl[j])); - if(s>0) - { - const b3Scalar subd=projectorigin(*vt[i],*vt[j],d,subw,subm); - if((mindist<0)||(subd<mindist)) - { - mindist = subd; - m = static_cast<unsigned int>((subm&1?1<<i:0)+ - (subm&2?1<<j:0)+ - (subm&4?8:0)); - w[i] = subw[0]; - w[j] = subw[1]; - w[imd3[j]] = 0; - w[3] = subw[2]; - } - } - } - if(mindist<0) - { - mindist = 0; - m = 15; - w[0] = det(c,b,d)/vl; - w[1] = det(a,c,d)/vl; - w[2] = det(b,a,d)/vl; - w[3] = 1-(w[0]+w[1]+w[2]); - } - return(mindist); - } - return(-1); - } - }; - - // EPA - struct b3EPA - { - /* Types */ - typedef b3GJK::sSV sSV; - struct sFace - { - b3Vector3 n; - b3Scalar d; - sSV* c[3]; - sFace* f[3]; - sFace* l[2]; - unsigned char e[3]; - unsigned char pass; - }; - struct sList - { - sFace* root; - unsigned int count; - sList() : root(0),count(0) {} - }; - struct sHorizon - { - sFace* cf; - sFace* ff; - unsigned int nf; - sHorizon() : cf(0),ff(0),nf(0) {} - }; - struct eStatus { enum _ { - Valid, - Touching, - Degenerated, - NonConvex, - InvalidHull, - OutOfFaces, - OutOfVertices, - AccuraryReached, - FallBack, - Failed };}; - /* Fields */ - eStatus::_ m_status; - b3GJK::sSimplex m_result; - b3Vector3 m_normal; - b3Scalar m_depth; - sSV m_sv_store[EPA_MAX_VERTICES]; - sFace m_fc_store[EPA_MAX_FACES]; - unsigned int m_nextsv; - sList m_hull; - sList m_stock; - /* Methods */ - b3EPA() - { - Initialize(); - } - - - static inline void bind(sFace* fa,unsigned int ea,sFace* fb,unsigned int eb) - { - fa->e[ea]=(unsigned char)eb;fa->f[ea]=fb; - fb->e[eb]=(unsigned char)ea;fb->f[eb]=fa; - } - static inline void append(sList& list,sFace* face) - { - face->l[0] = 0; - face->l[1] = list.root; - if(list.root) list.root->l[0]=face; - list.root = face; - ++list.count; - } - static inline void remove(sList& list,sFace* face) - { - if(face->l[1]) face->l[1]->l[0]=face->l[0]; - if(face->l[0]) face->l[0]->l[1]=face->l[1]; - if(face==list.root) list.root=face->l[1]; - --list.count; - } - - - void Initialize() - { - m_status = eStatus::Failed; - m_normal = b3MakeVector3(0,0,0); - m_depth = 0; - m_nextsv = 0; - for(unsigned int i=0;i<EPA_MAX_FACES;++i) - { - append(m_stock,&m_fc_store[EPA_MAX_FACES-i-1]); - } - } - eStatus::_ Evaluate(b3GJK& gjk,const b3Vector3& guess) - { - b3GJK::sSimplex& simplex=*gjk.m_simplex; - if((simplex.rank>1)&&gjk.EncloseOrigin()) - { - - /* Clean up */ - while(m_hull.root) - { - sFace* f = m_hull.root; - remove(m_hull,f); - append(m_stock,f); - } - m_status = eStatus::Valid; - m_nextsv = 0; - /* Orient simplex */ - if(gjk.det( simplex.c[0]->w-simplex.c[3]->w, - simplex.c[1]->w-simplex.c[3]->w, - simplex.c[2]->w-simplex.c[3]->w)<0) - { - b3Swap(simplex.c[0],simplex.c[1]); - b3Swap(simplex.p[0],simplex.p[1]); - } - /* Build initial hull */ - sFace* tetra[]={newface(simplex.c[0],simplex.c[1],simplex.c[2],true), - newface(simplex.c[1],simplex.c[0],simplex.c[3],true), - newface(simplex.c[2],simplex.c[1],simplex.c[3],true), - newface(simplex.c[0],simplex.c[2],simplex.c[3],true)}; - if(m_hull.count==4) - { - sFace* best=findbest(); - sFace outer=*best; - unsigned int pass=0; - unsigned int iterations=0; - bind(tetra[0],0,tetra[1],0); - bind(tetra[0],1,tetra[2],0); - bind(tetra[0],2,tetra[3],0); - bind(tetra[1],1,tetra[3],2); - bind(tetra[1],2,tetra[2],1); - bind(tetra[2],2,tetra[3],1); - m_status=eStatus::Valid; - for(;iterations<EPA_MAX_ITERATIONS;++iterations) - { - if(m_nextsv<EPA_MAX_VERTICES) - { - sHorizon horizon; - sSV* w=&m_sv_store[m_nextsv++]; - bool valid=true; - best->pass = (unsigned char)(++pass); - gjk.getsupport(best->n,*w); - const b3Scalar wdist=b3Dot(best->n,w->w)-best->d; - if(wdist>EPA_ACCURACY) - { - for(unsigned int j=0;(j<3)&&valid;++j) - { - valid&=expand( pass,w, - best->f[j],best->e[j], - horizon); - } - if(valid&&(horizon.nf>=3)) - { - bind(horizon.cf,1,horizon.ff,2); - remove(m_hull,best); - append(m_stock,best); - best=findbest(); - outer=*best; - } else { - m_status=eStatus::Failed; - //m_status=eStatus::InvalidHull; - break; } - } else { m_status=eStatus::AccuraryReached;break; } - } else { m_status=eStatus::OutOfVertices;break; } - } - const b3Vector3 projection=outer.n*outer.d; - m_normal = outer.n; - m_depth = outer.d; - m_result.rank = 3; - m_result.c[0] = outer.c[0]; - m_result.c[1] = outer.c[1]; - m_result.c[2] = outer.c[2]; - m_result.p[0] = b3Cross( outer.c[1]->w-projection, - outer.c[2]->w-projection).length(); - m_result.p[1] = b3Cross( outer.c[2]->w-projection, - outer.c[0]->w-projection).length(); - m_result.p[2] = b3Cross( outer.c[0]->w-projection, - outer.c[1]->w-projection).length(); - const b3Scalar sum=m_result.p[0]+m_result.p[1]+m_result.p[2]; - m_result.p[0] /= sum; - m_result.p[1] /= sum; - m_result.p[2] /= sum; - return(m_status); - } - } - /* Fallback */ - m_status = eStatus::FallBack; - m_normal = -guess; - const b3Scalar nl=m_normal.length(); - if(nl>0) - m_normal = m_normal/nl; - else - m_normal = b3MakeVector3(1,0,0); - m_depth = 0; - m_result.rank=1; - m_result.c[0]=simplex.c[0]; - m_result.p[0]=1; - return(m_status); - } - bool getedgedist(sFace* face, sSV* a, sSV* b, b3Scalar& dist) - { - const b3Vector3 ba = b->w - a->w; - const b3Vector3 n_ab = b3Cross(ba, face->n); // Outward facing edge normal direction, on triangle plane - const b3Scalar a_dot_nab = b3Dot(a->w, n_ab); // Only care about the sign to determine inside/outside, so not normalization required - - if(a_dot_nab < 0) - { - // Outside of edge a->b - - const b3Scalar ba_l2 = ba.length2(); - const b3Scalar a_dot_ba = b3Dot(a->w, ba); - const b3Scalar b_dot_ba = b3Dot(b->w, ba); - - if(a_dot_ba > 0) - { - // Pick distance vertex a - dist = a->w.length(); - } - else if(b_dot_ba < 0) - { - // Pick distance vertex b - dist = b->w.length(); - } - else - { - // Pick distance to edge a->b - const b3Scalar a_dot_b = b3Dot(a->w, b->w); - dist = b3Sqrt(b3Max((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (b3Scalar)0)); - } - - return true; - } - - return false; - } - sFace* newface(sSV* a,sSV* b,sSV* c,bool forced) - { - if(m_stock.root) - { - sFace* face=m_stock.root; - remove(m_stock,face); - append(m_hull,face); - face->pass = 0; - face->c[0] = a; - face->c[1] = b; - face->c[2] = c; - face->n = b3Cross(b->w-a->w,c->w-a->w); - const b3Scalar l=face->n.length(); - const bool v=l>EPA_ACCURACY; - - if(v) - { - if(!(getedgedist(face, a, b, face->d) || - getedgedist(face, b, c, face->d) || - getedgedist(face, c, a, face->d))) - { - // Origin projects to the interior of the triangle - // Use distance to triangle plane - face->d = b3Dot(a->w, face->n) / l; - } - - face->n /= l; - if(forced || (face->d >= -EPA_PLANE_EPS)) - { - return face; - } - else - m_status=eStatus::NonConvex; - } - else - m_status=eStatus::Degenerated; - - remove(m_hull, face); - append(m_stock, face); - return 0; - - } - m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces; - return 0; - } - sFace* findbest() - { - sFace* minf=m_hull.root; - b3Scalar mind=minf->d*minf->d; - for(sFace* f=minf->l[1];f;f=f->l[1]) - { - const b3Scalar sqd=f->d*f->d; - if(sqd<mind) - { - minf=f; - mind=sqd; - } - } - return(minf); - } - bool expand(unsigned int pass,sSV* w,sFace* f,unsigned int e,sHorizon& horizon) - { - static const unsigned int i1m3[]={1,2,0}; - static const unsigned int i2m3[]={2,0,1}; - if(f->pass!=pass) - { - const unsigned int e1=i1m3[e]; - if((b3Dot(f->n,w->w)-f->d)<-EPA_PLANE_EPS) - { - sFace* nf=newface(f->c[e1],f->c[e],w,false); - if(nf) - { - bind(nf,0,f,e); - if(horizon.cf) bind(horizon.cf,1,nf,2); else horizon.ff=nf; - horizon.cf=nf; - ++horizon.nf; - return(true); - } - } - else - { - const unsigned int e2=i2m3[e]; - f->pass = (unsigned char)pass; - if( expand(pass,w,f->f[e1],f->e[e1],horizon)&& - expand(pass,w,f->f[e2],f->e[e2],horizon)) - { - remove(m_hull,f); - append(m_stock,f); - return(true); - } - } - } - return(false); - } - - }; - - // - static void Initialize(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - b3GjkEpaSolver2::sResults& results, - tShape& shape, - bool withmargins) - { - /* Results */ - results.witnesses[0] = - results.witnesses[1] = b3MakeVector3(0,0,0); - results.status = b3GjkEpaSolver2::sResults::Separated; - /* Shape */ - shape.m_shapes[0] = hullA; - shape.m_shapes[1] = hullB; - shape.m_toshape1 = transB.getBasis().transposeTimes(transA.getBasis()); - shape.m_toshape0 = transA.inverseTimes(transB); - shape.EnableMargin(withmargins); - } - -} - -// -// Api -// - -using namespace gjkepa2_impl2; - -// -int b3GjkEpaSolver2::StackSizeRequirement() -{ - return(sizeof(b3GJK)+sizeof(b3EPA)); -} - -// -bool b3GjkEpaSolver2::Distance( const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results) -{ - tShape shape; - Initialize(transA,transB,hullA,hullB,verticesA,verticesB,results,shape,false); - b3GJK gjk(verticesA,verticesB); - b3GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,guess); - if(gjk_status==b3GJK::eStatus::Valid) - { - b3Vector3 w0=b3MakeVector3(0,0,0); - b3Vector3 w1=b3MakeVector3(0,0,0); - for(unsigned int i=0;i<gjk.m_simplex->rank;++i) - { - const b3Scalar p=gjk.m_simplex->p[i]; - w0+=shape.Support( gjk.m_simplex->c[i]->d,0,verticesA,verticesB)*p; - w1+=shape.Support(-gjk.m_simplex->c[i]->d,1,verticesA,verticesB)*p; - } - results.witnesses[0] = transA*w0; - results.witnesses[1] = transA*w1; - results.normal = w0-w1; - results.distance = results.normal.length(); - results.normal /= results.distance>GJK_MIN_DISTANCE?results.distance:1; - return(true); - } - else - { - results.status = gjk_status==b3GJK::eStatus::Inside? - sResults::Penetrating : - sResults::GJK_Failed ; - return(false); - } -} - -// -bool b3GjkEpaSolver2::Penetration( const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results, - bool usemargins) -{ - - tShape shape; - Initialize(transA,transB,hullA,hullB,verticesA,verticesB,results,shape,usemargins); - b3GJK gjk(verticesA,verticesB); - b3GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,guess); - switch(gjk_status) - { - case b3GJK::eStatus::Inside: - { - b3EPA epa; - b3EPA::eStatus::_ epa_status=epa.Evaluate(gjk,-guess); - if(epa_status!=b3EPA::eStatus::Failed) - { - b3Vector3 w0=b3MakeVector3(0,0,0); - for(unsigned int i=0;i<epa.m_result.rank;++i) - { - w0+=shape.Support(epa.m_result.c[i]->d,0,verticesA,verticesB)*epa.m_result.p[i]; - } - results.status = sResults::Penetrating; - results.witnesses[0] = transA*w0; - results.witnesses[1] = transA*(w0-epa.m_normal*epa.m_depth); - results.normal = -epa.m_normal; - results.distance = -epa.m_depth; - return(true); - } else results.status=sResults::EPA_Failed; - } - break; - case b3GJK::eStatus::Failed: - results.status=sResults::GJK_Failed; - break; - default: - { - } - } - return(false); -} - - -#if 0 -// -b3Scalar b3GjkEpaSolver2::SignedDistance(const b3Vector3& position, - b3Scalar margin, - const b3Transform& transA, - const b3ConvexPolyhedronData& hullA, - const b3AlignedObjectArray<b3Vector3>& verticesA, - sResults& results) -{ - tShape shape; - btSphereShape shape1(margin); - b3Transform wtrs1(b3Quaternion(0,0,0,1),position); - Initialize(shape0,wtrs0,&shape1,wtrs1,results,shape,false); - GJK gjk; - GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,b3Vector3(1,1,1)); - if(gjk_status==GJK::eStatus::Valid) - { - b3Vector3 w0=b3Vector3(0,0,0); - b3Vector3 w1=b3Vector3(0,0,0); - for(unsigned int i=0;i<gjk.m_simplex->rank;++i) - { - const b3Scalar p=gjk.m_simplex->p[i]; - w0+=shape.Support( gjk.m_simplex->c[i]->d,0)*p; - w1+=shape.Support(-gjk.m_simplex->c[i]->d,1)*p; - } - results.witnesses[0] = wtrs0*w0; - results.witnesses[1] = wtrs0*w1; - const b3Vector3 delta= results.witnesses[1]- - results.witnesses[0]; - const b3Scalar margin= shape0->getMarginNonVirtual()+ - shape1.getMarginNonVirtual(); - const b3Scalar length= delta.length(); - results.normal = delta/length; - results.witnesses[0] += results.normal*margin; - return(length-margin); - } - else - { - if(gjk_status==GJK::eStatus::Inside) - { - if(Penetration(shape0,wtrs0,&shape1,wtrs1,gjk.m_ray,results)) - { - const b3Vector3 delta= results.witnesses[0]- - results.witnesses[1]; - const b3Scalar length= delta.length(); - if (length >= B3_EPSILON) - results.normal = delta/length; - return(-length); - } - } - } - return(B3_INFINITY); -} - -// -bool b3GjkEpaSolver2::SignedDistance(const btConvexShape* shape0, - const b3Transform& wtrs0, - const btConvexShape* shape1, - const b3Transform& wtrs1, - const b3Vector3& guess, - sResults& results) -{ - if(!Distance(shape0,wtrs0,shape1,wtrs1,guess,results)) - return(Penetration(shape0,wtrs0,shape1,wtrs1,guess,results,false)); - else - return(true); -} -#endif - - -/* Symbols cleanup */ - -#undef GJK_MAX_ITERATIONS -#undef GJK_ACCURACY -#undef GJK_MIN_DISTANCE -#undef GJK_DUPLICATED_EPS -#undef GJK_SIMPLEX2_EPS -#undef GJK_SIMPLEX3_EPS -#undef GJK_SIMPLEX4_EPS - -#undef EPA_MAX_VERTICES -#undef EPA_MAX_FACES -#undef EPA_MAX_ITERATIONS -#undef EPA_ACCURACY -#undef EPA_FALLBACK -#undef EPA_PLANE_EPS -#undef EPA_INSIDE_EPS diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h deleted file mode 100644 index 976238a04c..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the -use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software in a -product, an acknowledgment in the product documentation would be appreciated -but is not required. -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -GJK-EPA collision solver by Nathanael Presson, 2008 -*/ -#ifndef B3_GJK_EPA2_H -#define B3_GJK_EPA2_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - - -///btGjkEpaSolver contributed under zlib by Nathanael Presson -struct b3GjkEpaSolver2 -{ -struct sResults - { - enum eStatus - { - Separated, /* Shapes doesnt penetrate */ - Penetrating, /* Shapes are penetrating */ - GJK_Failed, /* GJK phase fail, no big issue, shapes are probably just 'touching' */ - EPA_Failed /* EPA phase fail, bigger problem, need to save parameters, and debug */ - } status; - b3Vector3 witnesses[2]; - b3Vector3 normal; - b3Scalar distance; - }; - -static int StackSizeRequirement(); - -static bool Distance( const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results); - -static bool Penetration( const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results, - bool usemargins=true); -#if 0 -static b3Scalar SignedDistance( const b3Vector3& position, - b3Scalar margin, - const btConvexShape* shape, - const btTransform& wtrs, - sResults& results); - -static bool SignedDistance( const btConvexShape* shape0,const btTransform& wtrs0, - const btConvexShape* shape1,const btTransform& wtrs1, - const b3Vector3& guess, - sResults& results); -#endif - -}; - -#endif //B3_GJK_EPA2_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp deleted file mode 100644 index e9e51d5a36..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp +++ /dev/null @@ -1,390 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#include "b3OptimizedBvh.h" -#include "b3StridingMeshInterface.h" -#include "Bullet3Geometry/b3AabbUtil.h" - - -b3OptimizedBvh::b3OptimizedBvh() -{ -} - -b3OptimizedBvh::~b3OptimizedBvh() -{ -} - - -void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) -{ - m_useQuantization = useQuantizedAabbCompression; - - - // NodeArray triangleNodes; - - struct NodeTriangleCallback : public b3InternalTriangleIndexCallback - { - - NodeArray& m_triangleNodes; - - NodeTriangleCallback& operator=(NodeTriangleCallback& other) - { - m_triangleNodes.copyFromArray(other.m_triangleNodes); - return *this; - } - - NodeTriangleCallback(NodeArray& triangleNodes) - :m_triangleNodes(triangleNodes) - { - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) - { - b3OptimizedBvhNode node; - b3Vector3 aabbMin,aabbMax; - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangle[0]); - aabbMax.setMax(triangle[0]); - aabbMin.setMin(triangle[1]); - aabbMax.setMax(triangle[1]); - aabbMin.setMin(triangle[2]); - aabbMax.setMax(triangle[2]); - - //with quantization? - node.m_aabbMinOrg = aabbMin; - node.m_aabbMaxOrg = aabbMax; - - node.m_escapeIndex = -1; - - //for child nodes - node.m_subPart = partId; - node.m_triangleIndex = triangleIndex; - m_triangleNodes.push_back(node); - } - }; - struct QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback - { - QuantizedNodeArray& m_triangleNodes; - const b3QuantizedBvh* m_optimizedTree; // for quantization - - QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other) - { - m_triangleNodes.copyFromArray(other.m_triangleNodes); - m_optimizedTree = other.m_optimizedTree; - return *this; - } - - QuantizedNodeTriangleCallback(QuantizedNodeArray& triangleNodes,const b3QuantizedBvh* tree) - :m_triangleNodes(triangleNodes),m_optimizedTree(tree) - { - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) - { - // The partId and triangle index must fit in the same (positive) integer - b3Assert(partId < (1<<MAX_NUM_PARTS_IN_BITS)); - b3Assert(triangleIndex < (1<<(31-MAX_NUM_PARTS_IN_BITS))); - //negative indices are reserved for escapeIndex - b3Assert(triangleIndex>=0); - - b3QuantizedBvhNode node; - b3Vector3 aabbMin,aabbMax; - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangle[0]); - aabbMax.setMax(triangle[0]); - aabbMin.setMin(triangle[1]); - aabbMax.setMax(triangle[1]); - aabbMin.setMin(triangle[2]); - aabbMax.setMax(triangle[2]); - - //PCK: add these checks for zero dimensions of aabb - const b3Scalar MIN_AABB_DIMENSION = b3Scalar(0.002); - const b3Scalar MIN_AABB_HALF_DIMENSION = b3Scalar(0.001); - if (aabbMax.getX() - aabbMin.getX() < MIN_AABB_DIMENSION) - { - aabbMax.setX(aabbMax.getX() + MIN_AABB_HALF_DIMENSION); - aabbMin.setX(aabbMin.getX() - MIN_AABB_HALF_DIMENSION); - } - if (aabbMax.getY() - aabbMin.getY() < MIN_AABB_DIMENSION) - { - aabbMax.setY(aabbMax.getY() + MIN_AABB_HALF_DIMENSION); - aabbMin.setY(aabbMin.getY() - MIN_AABB_HALF_DIMENSION); - } - if (aabbMax.getZ() - aabbMin.getZ() < MIN_AABB_DIMENSION) - { - aabbMax.setZ(aabbMax.getZ() + MIN_AABB_HALF_DIMENSION); - aabbMin.setZ(aabbMin.getZ() - MIN_AABB_HALF_DIMENSION); - } - - m_optimizedTree->quantize(&node.m_quantizedAabbMin[0],aabbMin,0); - m_optimizedTree->quantize(&node.m_quantizedAabbMax[0],aabbMax,1); - - node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | triangleIndex; - - m_triangleNodes.push_back(node); - } - }; - - - - int numLeafNodes = 0; - - - if (m_useQuantization) - { - - //initialize quantization values - setQuantizationValues(bvhAabbMin,bvhAabbMax); - - QuantizedNodeTriangleCallback callback(m_quantizedLeafNodes,this); - - - triangles->InternalProcessAllTriangles(&callback,m_bvhAabbMin,m_bvhAabbMax); - - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_quantizedLeafNodes.size(); - - - m_quantizedContiguousNodes.resize(2*numLeafNodes); - - - } else - { - NodeTriangleCallback callback(m_leafNodes); - - b3Vector3 aabbMin=b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - b3Vector3 aabbMax=b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - - triangles->InternalProcessAllTriangles(&callback,aabbMin,aabbMax); - - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_leafNodes.size(); - - m_contiguousNodes.resize(2*numLeafNodes); - } - - m_curNodeIndex = 0; - - buildTree(0,numLeafNodes); - - ///if the entire tree is small then subtree size, we need to create a header info for the tree - if(m_useQuantization && !m_SubtreeHeaders.size()) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); - subtree.m_rootNodeIndex = 0; - subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary - m_quantizedLeafNodes.clear(); - m_leafNodes.clear(); -} - - - - -void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax) -{ - if (m_useQuantization) - { - - setQuantizationValues(aabbMin,aabbMax); - - updateBvhNodes(meshInterface,0,m_curNodeIndex,0); - - ///now update all subtree headers - - int i; - for (i=0;i<m_SubtreeHeaders.size();i++) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); - } - - } else - { - - } -} - - - - -void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface,const b3Vector3& aabbMin,const b3Vector3& aabbMax) -{ - //incrementally initialize quantization values - b3Assert(m_useQuantization); - - b3Assert(aabbMin.getX() > m_bvhAabbMin.getX()); - b3Assert(aabbMin.getY() > m_bvhAabbMin.getY()); - b3Assert(aabbMin.getZ() > m_bvhAabbMin.getZ()); - - b3Assert(aabbMax.getX() < m_bvhAabbMax.getX()); - b3Assert(aabbMax.getY() < m_bvhAabbMax.getY()); - b3Assert(aabbMax.getZ() < m_bvhAabbMax.getZ()); - - ///we should update all quantization values, using updateBvhNodes(meshInterface); - ///but we only update chunks that overlap the given aabb - - unsigned short quantizedQueryAabbMin[3]; - unsigned short quantizedQueryAabbMax[3]; - - quantize(&quantizedQueryAabbMin[0],aabbMin,0); - quantize(&quantizedQueryAabbMax[0],aabbMax,1); - - int i; - for (i=0;i<this->m_SubtreeHeaders.size();i++) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); - if (overlap != 0) - { - updateBvhNodes(meshInterface,subtree.m_rootNodeIndex,subtree.m_rootNodeIndex+subtree.m_subtreeSize,i); - - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); - } - } - -} - -void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index) -{ - (void)index; - - b3Assert(m_useQuantization); - - int curNodeSubPart=-1; - - //get access info to trianglemesh data - const unsigned char *vertexbase = 0; - int numverts = 0; - PHY_ScalarType type = PHY_INTEGER; - int stride = 0; - const unsigned char *indexbase = 0; - int indexstride = 0; - int numfaces = 0; - PHY_ScalarType indicestype = PHY_INTEGER; - - b3Vector3 triangleVerts[3]; - b3Vector3 aabbMin,aabbMax; - const b3Vector3& meshScaling = meshInterface->getScaling(); - - int i; - for (i=endNode-1;i>=firstNode;i--) - { - - - b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; - if (curNode.isLeafNode()) - { - //recalc aabb from triangle data - int nodeSubPart = curNode.getPartId(); - int nodeTriangleIndex = curNode.getTriangleIndex(); - if (nodeSubPart != curNodeSubPart) - { - if (curNodeSubPart >= 0) - meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); - meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase,numverts, type,stride,&indexbase,indexstride,numfaces,indicestype,nodeSubPart); - - curNodeSubPart = nodeSubPart; - b3Assert(indicestype==PHY_INTEGER||indicestype==PHY_SHORT); - } - //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, - - unsigned int* gfxbase = (unsigned int*)(indexbase+nodeTriangleIndex*indexstride); - - - for (int j=2;j>=0;j--) - { - - int graphicsindex = indicestype==PHY_SHORT?((unsigned short*)gfxbase)[j]:gfxbase[j]; - if (type == PHY_FLOAT) - { - float* graphicsbase = (float*)(vertexbase+graphicsindex*stride); - triangleVerts[j] = b3MakeVector3( - graphicsbase[0]*meshScaling.getX(), - graphicsbase[1]*meshScaling.getY(), - graphicsbase[2]*meshScaling.getZ()); - } - else - { - double* graphicsbase = (double*)(vertexbase+graphicsindex*stride); - triangleVerts[j] = b3MakeVector3( b3Scalar(graphicsbase[0]*meshScaling.getX()), b3Scalar(graphicsbase[1]*meshScaling.getY()), b3Scalar(graphicsbase[2]*meshScaling.getZ())); - } - } - - - - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangleVerts[0]); - aabbMax.setMax(triangleVerts[0]); - aabbMin.setMin(triangleVerts[1]); - aabbMax.setMax(triangleVerts[1]); - aabbMin.setMin(triangleVerts[2]); - aabbMax.setMax(triangleVerts[2]); - - quantize(&curNode.m_quantizedAabbMin[0],aabbMin,0); - quantize(&curNode.m_quantizedAabbMax[0],aabbMax,1); - - } else - { - //combine aabb from both children - - b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i+1]; - - b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i+2] : - &m_quantizedContiguousNodes[i+1+leftChildNode->getEscapeIndex()]; - - - { - for (int i=0;i<3;i++) - { - curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; - if (curNode.m_quantizedAabbMin[i]>rightChildNode->m_quantizedAabbMin[i]) - curNode.m_quantizedAabbMin[i]=rightChildNode->m_quantizedAabbMin[i]; - - curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; - if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) - curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i]; - } - } - } - - } - - if (curNodeSubPart >= 0) - meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); - - -} - -///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' -b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) -{ - b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer,i_dataBufferSize,i_swapEndian); - - //we don't add additional data so just do a static upcast - return static_cast<b3OptimizedBvh*>(bvh); -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h deleted file mode 100644 index 0272ef83bf..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h +++ /dev/null @@ -1,65 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -///Contains contributions from Disney Studio's - -#ifndef B3_OPTIMIZED_BVH_H -#define B3_OPTIMIZED_BVH_H - -#include "b3QuantizedBvh.h" - -class b3StridingMeshInterface; - - -///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface. -B3_ATTRIBUTE_ALIGNED16(class) b3OptimizedBvh : public b3QuantizedBvh -{ - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - -protected: - -public: - - b3OptimizedBvh(); - - virtual ~b3OptimizedBvh(); - - void build(b3StridingMeshInterface* triangles,bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax); - - void refit(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin,const b3Vector3& aabbMax); - - void refitPartial(b3StridingMeshInterface* triangles,const b3Vector3& aabbMin, const b3Vector3& aabbMax); - - void updateBvhNodes(b3StridingMeshInterface* meshInterface,int firstNode,int endNode,int index); - - /// Data buffer MUST be 16 byte aligned - virtual bool serializeInPlace(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const - { - return b3QuantizedBvh::serialize(o_alignedDataBuffer,i_dataBufferSize,i_swapEndian); - - } - - ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' - static b3OptimizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); - - -}; - - -#endif //B3_OPTIMIZED_BVH_H - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp deleted file mode 100644 index 52027e1118..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp +++ /dev/null @@ -1,1301 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3QuantizedBvh.h" - -#include "Bullet3Geometry/b3AabbUtil.h" - - -#define RAYAABB2 - -b3QuantizedBvh::b3QuantizedBvh() : - m_bulletVersion(B3_BULLET_VERSION), - m_useQuantization(false), - m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) - //m_traversalMode(TRAVERSAL_STACKLESS) - //m_traversalMode(TRAVERSAL_RECURSIVE) - ,m_subtreeHeaderCount(0) //PCK: add this line -{ - m_bvhAabbMin.setValue(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY); - m_bvhAabbMax.setValue(B3_INFINITY,B3_INFINITY,B3_INFINITY); -} - - - - - -void b3QuantizedBvh::buildInternal() -{ - ///assumes that caller filled in the m_quantizedLeafNodes - m_useQuantization = true; - int numLeafNodes = 0; - - if (m_useQuantization) - { - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_quantizedLeafNodes.size(); - - m_quantizedContiguousNodes.resize(2*numLeafNodes); - - } - - m_curNodeIndex = 0; - - buildTree(0,numLeafNodes); - - ///if the entire tree is small then subtree size, we need to create a header info for the tree - if(m_useQuantization && !m_SubtreeHeaders.size()) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); - subtree.m_rootNodeIndex = 0; - subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary - m_quantizedLeafNodes.clear(); - m_leafNodes.clear(); -} - - - -///just for debugging, to visualize the individual patches/subtrees -#ifdef DEBUG_PATCH_COLORS -b3Vector3 color[4]= -{ - b3Vector3(1,0,0), - b3Vector3(0,1,0), - b3Vector3(0,0,1), - b3Vector3(0,1,1) -}; -#endif //DEBUG_PATCH_COLORS - - - -void b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin) -{ - //enlarge the AABB to avoid division by zero when initializing the quantization values - b3Vector3 clampValue =b3MakeVector3(quantizationMargin,quantizationMargin,quantizationMargin); - m_bvhAabbMin = bvhAabbMin - clampValue; - m_bvhAabbMax = bvhAabbMax + clampValue; - b3Vector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; - m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0),b3Scalar(65533.0),b3Scalar(65533.0)) / aabbSize; - m_useQuantization = true; -} - - - - -b3QuantizedBvh::~b3QuantizedBvh() -{ -} - -#ifdef DEBUG_TREE_BUILDING -int gStackDepth = 0; -int gMaxStackDepth = 0; -#endif //DEBUG_TREE_BUILDING - -void b3QuantizedBvh::buildTree (int startIndex,int endIndex) -{ -#ifdef DEBUG_TREE_BUILDING - gStackDepth++; - if (gStackDepth > gMaxStackDepth) - gMaxStackDepth = gStackDepth; -#endif //DEBUG_TREE_BUILDING - - - int splitAxis, splitIndex, i; - int numIndices =endIndex-startIndex; - int curIndex = m_curNodeIndex; - - b3Assert(numIndices>0); - - if (numIndices==1) - { -#ifdef DEBUG_TREE_BUILDING - gStackDepth--; -#endif //DEBUG_TREE_BUILDING - - assignInternalNodeFromLeafNode(m_curNodeIndex,startIndex); - - m_curNodeIndex++; - return; - } - //calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'. - - splitAxis = calcSplittingAxis(startIndex,endIndex); - - splitIndex = sortAndCalcSplittingIndex(startIndex,endIndex,splitAxis); - - int internalNodeIndex = m_curNodeIndex; - - //set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value. - //the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values - setInternalNodeAabbMin(m_curNodeIndex,m_bvhAabbMax);//can't use b3Vector3(B3_INFINITY,B3_INFINITY,B3_INFINITY)) because of quantization - setInternalNodeAabbMax(m_curNodeIndex,m_bvhAabbMin);//can't use b3Vector3(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY)) because of quantization - - - for (i=startIndex;i<endIndex;i++) - { - mergeInternalNodeAabb(m_curNodeIndex,getAabbMin(i),getAabbMax(i)); - } - - m_curNodeIndex++; - - - //internalNode->m_escapeIndex; - - int leftChildNodexIndex = m_curNodeIndex; - - //build left child tree - buildTree(startIndex,splitIndex); - - int rightChildNodexIndex = m_curNodeIndex; - //build right child tree - buildTree(splitIndex,endIndex); - -#ifdef DEBUG_TREE_BUILDING - gStackDepth--; -#endif //DEBUG_TREE_BUILDING - - int escapeIndex = m_curNodeIndex - curIndex; - - if (m_useQuantization) - { - //escapeIndex is the number of nodes of this subtree - const int sizeQuantizedNode =sizeof(b3QuantizedBvhNode); - const int treeSizeInBytes = escapeIndex * sizeQuantizedNode; - if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES) - { - updateSubtreeHeaders(leftChildNodexIndex,rightChildNodexIndex); - } - } else - { - - } - - setInternalNodeEscapeIndex(internalNodeIndex,escapeIndex); - -} - -void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex) -{ - b3Assert(m_useQuantization); - - b3QuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex]; - int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex(); - int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); - - b3QuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex]; - int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex(); - int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); - - if(leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(leftChildNode); - subtree.m_rootNodeIndex = leftChildNodexIndex; - subtree.m_subtreeSize = leftSubTreeSize; - } - - if(rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(rightChildNode); - subtree.m_rootNodeIndex = rightChildNodexIndex; - subtree.m_subtreeSize = rightSubTreeSize; - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); -} - - -int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis) -{ - int i; - int splitIndex =startIndex; - int numIndices = endIndex - startIndex; - b3Scalar splitValue; - - b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - for (i=startIndex;i<endIndex;i++) - { - b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); - means+=center; - } - means *= (b3Scalar(1.)/(b3Scalar)numIndices); - - splitValue = means[splitAxis]; - - //sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'. - for (i=startIndex;i<endIndex;i++) - { - b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); - if (center[splitAxis] > splitValue) - { - //swap - swapLeafNodes(i,splitIndex); - splitIndex++; - } - } - - //if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex - //otherwise the tree-building might fail due to stack-overflows in certain cases. - //unbalanced1 is unsafe: it can cause stack overflows - //bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1))); - - //unbalanced2 should work too: always use center (perfect balanced trees) - //bool unbalanced2 = true; - - //this should be safe too: - int rangeBalancedIndices = numIndices/3; - bool unbalanced = ((splitIndex<=(startIndex+rangeBalancedIndices)) || (splitIndex >=(endIndex-1-rangeBalancedIndices))); - - if (unbalanced) - { - splitIndex = startIndex+ (numIndices>>1); - } - - bool unbal = (splitIndex==startIndex) || (splitIndex == (endIndex)); - (void)unbal; - b3Assert(!unbal); - - return splitIndex; -} - - -int b3QuantizedBvh::calcSplittingAxis(int startIndex,int endIndex) -{ - int i; - - b3Vector3 means=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - b3Vector3 variance=b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - int numIndices = endIndex-startIndex; - - for (i=startIndex;i<endIndex;i++) - { - b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); - means+=center; - } - means *= (b3Scalar(1.)/(b3Scalar)numIndices); - - for (i=startIndex;i<endIndex;i++) - { - b3Vector3 center = b3Scalar(0.5)*(getAabbMax(i)+getAabbMin(i)); - b3Vector3 diff2 = center-means; - diff2 = diff2 * diff2; - variance += diff2; - } - variance *= (b3Scalar(1.)/ ((b3Scalar)numIndices-1) ); - - return variance.maxAxis(); -} - - - -void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - //either choose recursive traversal (walkTree) or stackless (walkStacklessTree) - - if (m_useQuantization) - { - ///quantize query AABB - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin,aabbMin,0); - quantizeWithClamp(quantizedQueryAabbMax,aabbMax,1); - - switch (m_traversalMode) - { - case TRAVERSAL_STACKLESS: - walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax,0,m_curNodeIndex); - break; - case TRAVERSAL_STACKLESS_CACHE_FRIENDLY: - walkStacklessQuantizedTreeCacheFriendly(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); - break; - case TRAVERSAL_RECURSIVE: - { - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0]; - walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); - } - break; - default: - //unsupported - b3Assert(0); - } - } else - { - walkStacklessTree(nodeCallback,aabbMin,aabbMax); - } -} - - -static int b3s_maxIterations = 0; - - -void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - b3Assert(!m_useQuantization); - - const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; - int escapeIndex, curIndex = 0; - int walkIterations = 0; - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - while (curIndex < m_curNodeIndex) - { - //catch bugs in tree data - b3Assert (walkIterations < m_curNodeIndex); - - walkIterations++; - aabbOverlap = b3TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); - isLeafNode = rootNode->m_escapeIndex == -1; - - //PCK: unsigned instead of bool - if (isLeafNode && (aabbOverlap != 0)) - { - nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); - } - - //PCK: unsigned instead of bool - if ((aabbOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } else - { - escapeIndex = rootNode->m_escapeIndex; - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; - -} - -/* -///this was the original recursive traversal, before we optimized towards stackless traversal -void b3QuantizedBvh::walkTree(b3OptimizedBvhNode* rootNode,b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax); - if (aabbOverlap) - { - isLeafNode = (!rootNode->m_leftChild && !rootNode->m_rightChild); - if (isLeafNode) - { - nodeCallback->processNode(rootNode); - } else - { - walkTree(rootNode->m_leftChild,nodeCallback,aabbMin,aabbMax); - walkTree(rootNode->m_rightChild,nodeCallback,aabbMin,aabbMax); - } - } - -} -*/ - -void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const -{ - b3Assert(m_useQuantization); - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - //PCK: unsigned instead of bool - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,currentNode->m_quantizedAabbMin,currentNode->m_quantizedAabbMax); - isLeafNode = currentNode->isLeafNode(); - - //PCK: unsigned instead of bool - if (aabbOverlap != 0) - { - if (isLeafNode) - { - nodeCallback->processNode(currentNode->getPartId(),currentNode->getTriangleIndex()); - } else - { - //process left and right children - const b3QuantizedBvhNode* leftChildNode = currentNode+1; - walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); - - const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode+1:leftChildNode+leftChildNode->getEscapeIndex(); - walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode,nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax); - } - } -} - - - -void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const -{ - b3Assert(!m_useQuantization); - - const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; - int escapeIndex, curIndex = 0; - int walkIterations = 0; - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap=0; - unsigned rayBoxOverlap=0; - b3Scalar lambda_max = 1.0; - - /* Quick pruning by quantized box */ - b3Vector3 rayAabbMin = raySource; - b3Vector3 rayAabbMax = raySource; - rayAabbMin.setMin(rayTarget); - rayAabbMax.setMax(rayTarget); - - /* Add box cast extents to bounding box */ - rayAabbMin += aabbMin; - rayAabbMax += aabbMax; - -#ifdef RAYAABB2 - b3Vector3 rayDir = (rayTarget-raySource); - rayDir.normalize (); - lambda_max = rayDir.dot(rayTarget-raySource); - ///what about division by zero? --> just set rayDirection[i] to 1.0 - b3Vector3 rayDirectionInverse; - rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0]; - rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1]; - rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; - unsigned int sign[3] = { rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; -#endif - - b3Vector3 bounds[2]; - - while (curIndex < m_curNodeIndex) - { - b3Scalar param = 1.0; - //catch bugs in tree data - b3Assert (walkIterations < m_curNodeIndex); - - walkIterations++; - - bounds[0] = rootNode->m_aabbMinOrg; - bounds[1] = rootNode->m_aabbMaxOrg; - /* Add box cast extents */ - bounds[0] -= aabbMax; - bounds[1] -= aabbMin; - - aabbOverlap = b3TestAabbAgainstAabb2(rayAabbMin,rayAabbMax,rootNode->m_aabbMinOrg,rootNode->m_aabbMaxOrg); - //perhaps profile if it is worth doing the aabbOverlap test first - -#ifdef RAYAABB2 - ///careful with this check: need to check division by zero (above) and fix the unQuantize method - ///thanks Joerg/hiker for the reproduction case! - ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 - rayBoxOverlap = aabbOverlap ? b3RayAabb2 (raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false; - -#else - b3Vector3 normal; - rayBoxOverlap = b3RayAabb(raySource, rayTarget,bounds[0],bounds[1],param, normal); -#endif - - isLeafNode = rootNode->m_escapeIndex == -1; - - //PCK: unsigned instead of bool - if (isLeafNode && (rayBoxOverlap != 0)) - { - nodeCallback->processNode(rootNode->m_subPart,rootNode->m_triangleIndex); - } - - //PCK: unsigned instead of bool - if ((rayBoxOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } else - { - escapeIndex = rootNode->m_escapeIndex; - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; - -} - - - -void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const -{ - b3Assert(m_useQuantization); - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - (void)subTreeSize; - - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; - int escapeIndex; - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned boxBoxOverlap = 0; - unsigned rayBoxOverlap = 0; - - b3Scalar lambda_max = 1.0; - -#ifdef RAYAABB2 - b3Vector3 rayDirection = (rayTarget-raySource); - rayDirection.normalize (); - lambda_max = rayDirection.dot(rayTarget-raySource); - ///what about division by zero? --> just set rayDirection[i] to 1.0 - rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0]; - rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1]; - rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2]; - unsigned int sign[3] = { rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; -#endif - - /* Quick pruning by quantized box */ - b3Vector3 rayAabbMin = raySource; - b3Vector3 rayAabbMax = raySource; - rayAabbMin.setMin(rayTarget); - rayAabbMax.setMax(rayTarget); - - /* Add box cast extents to bounding box */ - rayAabbMin += aabbMin; - rayAabbMax += aabbMax; - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin,rayAabbMin,0); - quantizeWithClamp(quantizedQueryAabbMax,rayAabbMax,1); - - while (curIndex < endNodeIndex) - { - -//#define VISUALLY_ANALYZE_BVH 1 -#ifdef VISUALLY_ANALYZE_BVH - //some code snippet to debugDraw aabb, to visually analyze bvh structure - static int drawPatch = 0; - //need some global access to a debugDrawer - extern b3IDebugDraw* debugDrawerPtr; - if (curIndex==drawPatch) - { - b3Vector3 aabbMin,aabbMax; - aabbMin = unQuantize(rootNode->m_quantizedAabbMin); - aabbMax = unQuantize(rootNode->m_quantizedAabbMax); - b3Vector3 color(1,0,0); - debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); - } -#endif//VISUALLY_ANALYZE_BVH - - //catch bugs in tree data - b3Assert (walkIterations < subTreeSize); - - walkIterations++; - //PCK: unsigned instead of bool - // only interested if this is closer than any previous hit - b3Scalar param = 1.0; - rayBoxOverlap = 0; - boxBoxOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - if (boxBoxOverlap) - { - b3Vector3 bounds[2]; - bounds[0] = unQuantize(rootNode->m_quantizedAabbMin); - bounds[1] = unQuantize(rootNode->m_quantizedAabbMax); - /* Add box cast extents */ - bounds[0] -= aabbMax; - bounds[1] -= aabbMin; -#if 0 - b3Vector3 normal; - bool ra2 = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max); - bool ra = b3RayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal); - if (ra2 != ra) - { - printf("functions don't match\n"); - } -#endif -#ifdef RAYAABB2 - ///careful with this check: need to check division by zero (above) and fix the unQuantize method - ///thanks Joerg/hiker for the reproduction case! - ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 - - //B3_PROFILE("b3RayAabb2"); - rayBoxOverlap = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); - -#else - rayBoxOverlap = true;//b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); -#endif - } - - if (isLeafNode && rayBoxOverlap) - { - nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); - } - - //PCK: unsigned instead of bool - if ((rayBoxOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; - -} - -void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const -{ - b3Assert(m_useQuantization); - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - (void)subTreeSize; - - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; - int escapeIndex; - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - while (curIndex < endNodeIndex) - { - -//#define VISUALLY_ANALYZE_BVH 1 -#ifdef VISUALLY_ANALYZE_BVH - //some code snippet to debugDraw aabb, to visually analyze bvh structure - static int drawPatch = 0; - //need some global access to a debugDrawer - extern b3IDebugDraw* debugDrawerPtr; - if (curIndex==drawPatch) - { - b3Vector3 aabbMin,aabbMax; - aabbMin = unQuantize(rootNode->m_quantizedAabbMin); - aabbMax = unQuantize(rootNode->m_quantizedAabbMax); - b3Vector3 color(1,0,0); - debugDrawerPtr->drawAabb(aabbMin,aabbMax,color); - } -#endif//VISUALLY_ANALYZE_BVH - - //catch bugs in tree data - b3Assert (walkIterations < subTreeSize); - - walkIterations++; - //PCK: unsigned instead of bool - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode->m_quantizedAabbMin,rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - - if (isLeafNode && aabbOverlap) - { - nodeCallback->processNode(rootNode->getPartId(),rootNode->getTriangleIndex()); - } - - //PCK: unsigned instead of bool - if ((aabbOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; - -} - -//This traversal can be called from Playstation 3 SPU -void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const -{ - b3Assert(m_useQuantization); - - int i; - - - for (i=0;i<this->m_SubtreeHeaders.size();i++) - { - const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); - if (overlap != 0) - { - walkStacklessQuantizedTree(nodeCallback,quantizedQueryAabbMin,quantizedQueryAabbMax, - subtree.m_rootNodeIndex, - subtree.m_rootNodeIndex+subtree.m_subtreeSize); - } - } -} - - -void b3QuantizedBvh::reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const -{ - reportBoxCastOverlappingNodex(nodeCallback,raySource,rayTarget,b3MakeVector3(0,0,0),b3MakeVector3(0,0,0)); -} - - -void b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - //always use stackless - - if (m_useQuantization) - { - walkStacklessQuantizedTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); - } - else - { - walkStacklessTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); - } - /* - { - //recursive traversal - b3Vector3 qaabbMin = raySource; - b3Vector3 qaabbMax = raySource; - qaabbMin.setMin(rayTarget); - qaabbMax.setMax(rayTarget); - qaabbMin += aabbMin; - qaabbMax += aabbMax; - reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax); - } - */ - -} - - -void b3QuantizedBvh::swapLeafNodes(int i,int splitIndex) -{ - if (m_useQuantization) - { - b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i]; - m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; - m_quantizedLeafNodes[splitIndex] = tmp; - } else - { - b3OptimizedBvhNode tmp = m_leafNodes[i]; - m_leafNodes[i] = m_leafNodes[splitIndex]; - m_leafNodes[splitIndex] = tmp; - } -} - -void b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex) -{ - if (m_useQuantization) - { - m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex]; - } else - { - m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex]; - } -} - -//PCK: include -#include <new> - -#if 0 -//PCK: consts -static const unsigned BVH_ALIGNMENT = 16; -static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1; - -static const unsigned BVH_ALIGNMENT_BLOCKS = 2; -#endif - - -unsigned int b3QuantizedBvh::getAlignmentSerializationPadding() -{ - // I changed this to 0 since the extra padding is not needed or used. - return 0;//BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT; -} - -unsigned b3QuantizedBvh::calculateSerializeBufferSize() const -{ - unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding(); - baseSize += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; - if (m_useQuantization) - { - return baseSize + m_curNodeIndex * sizeof(b3QuantizedBvhNode); - } - return baseSize + m_curNodeIndex * sizeof(b3OptimizedBvhNode); -} - -bool b3QuantizedBvh::serialize(void *o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const -{ - b3Assert(m_subtreeHeaderCount == m_SubtreeHeaders.size()); - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - -/* if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) - { - ///check alignedment for buffer? - b3Assert(0); - return false; - } -*/ - - b3QuantizedBvh *targetBvh = (b3QuantizedBvh *)o_alignedDataBuffer; - - // construct the class so the virtual function table, etc will be set up - // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor - new (targetBvh) b3QuantizedBvh; - - if (i_swapEndian) - { - targetBvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(m_curNodeIndex)); - - - b3SwapVector3Endian(m_bvhAabbMin,targetBvh->m_bvhAabbMin); - b3SwapVector3Endian(m_bvhAabbMax,targetBvh->m_bvhAabbMax); - b3SwapVector3Endian(m_bvhQuantization,targetBvh->m_bvhQuantization); - - targetBvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(m_traversalMode); - targetBvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(m_subtreeHeaderCount)); - } - else - { - targetBvh->m_curNodeIndex = m_curNodeIndex; - targetBvh->m_bvhAabbMin = m_bvhAabbMin; - targetBvh->m_bvhAabbMax = m_bvhAabbMax; - targetBvh->m_bvhQuantization = m_bvhQuantization; - targetBvh->m_traversalMode = m_traversalMode; - targetBvh->m_subtreeHeaderCount = m_subtreeHeaderCount; - } - - targetBvh->m_useQuantization = m_useQuantization; - - unsigned char *nodeData = (unsigned char *)targetBvh; - nodeData += sizeof(b3QuantizedBvh); - - unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - int nodeCount = m_curNodeIndex; - - if (m_useQuantization) - { - targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); - } - } - else - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]; - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]; - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex; - - - } - } - nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(NULL, 0, 0); - } - else - { - targetBvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); - b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); - - targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex)); - targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_subPart)); - targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex)); - } - } - else - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg = m_contiguousNodes[nodeIndex].m_aabbMinOrg; - targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg = m_contiguousNodes[nodeIndex].m_aabbMaxOrg; - - targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = m_contiguousNodes[nodeIndex].m_escapeIndex; - targetBvh->m_contiguousNodes[nodeIndex].m_subPart = m_contiguousNodes[nodeIndex].m_subPart; - targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex; - } - } - nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0); - } - - sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - // Now serialize the subtree headers - targetBvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, m_subtreeHeaderCount, m_subtreeHeaderCount); - if (i_swapEndian) - { - for (int i = 0; i < m_subtreeHeaderCount; i++) - { - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex)); - targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_subtreeSize)); - } - } - else - { - for (int i = 0; i < m_subtreeHeaderCount; i++) - { - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = (m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = (m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = (m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = (m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = (m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = (m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = (m_SubtreeHeaders[i].m_rootNodeIndex); - targetBvh->m_SubtreeHeaders[i].m_subtreeSize = (m_SubtreeHeaders[i].m_subtreeSize); - - // need to clear padding in destination buffer - targetBvh->m_SubtreeHeaders[i].m_padding[0] = 0; - targetBvh->m_SubtreeHeaders[i].m_padding[1] = 0; - targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0; - } - } - nodeData += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_SubtreeHeaders.initializeFromBuffer(NULL, 0, 0); - - // this wipes the virtual function table pointer at the start of the buffer for the class - *((void**)o_alignedDataBuffer) = NULL; - - return true; -} - -b3QuantizedBvh *b3QuantizedBvh::deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) -{ - - if (i_alignedDataBuffer == NULL)// || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) - { - return NULL; - } - b3QuantizedBvh *bvh = (b3QuantizedBvh *)i_alignedDataBuffer; - - if (i_swapEndian) - { - bvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_curNodeIndex)); - - b3UnSwapVector3Endian(bvh->m_bvhAabbMin); - b3UnSwapVector3Endian(bvh->m_bvhAabbMax); - b3UnSwapVector3Endian(bvh->m_bvhQuantization); - - bvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(bvh->m_traversalMode); - bvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(bvh->m_subtreeHeaderCount)); - } - - unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize(); - b3Assert(calculatedBufSize <= i_dataBufferSize); - - if (calculatedBufSize > i_dataBufferSize) - { - return NULL; - } - - unsigned char *nodeData = (unsigned char *)bvh; - nodeData += sizeof(b3QuantizedBvh); - - unsigned sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - int nodeCount = bvh->m_curNodeIndex; - - // Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor - // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor - new (bvh) b3QuantizedBvh(*bvh, false); - - if (bvh->m_useQuantization) - { - bvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); - - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); - - bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); - } - } - nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; - } - else - { - bvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); - b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); - - bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex)); - bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart)); - bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex)); - } - } - nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; - } - - sizeToAdd = 0;//(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - // Now serialize the subtree headers - bvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, bvh->m_subtreeHeaderCount, bvh->m_subtreeHeaderCount); - if (i_swapEndian) - { - for (int i = 0; i < bvh->m_subtreeHeaderCount; i++) - { - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex)); - bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize)); - } - } - - return bvh; -} - -// Constructor that prevents b3Vector3's default constructor from being called -b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh &self, bool /* ownsMemory */) : -m_bvhAabbMin(self.m_bvhAabbMin), -m_bvhAabbMax(self.m_bvhAabbMax), -m_bvhQuantization(self.m_bvhQuantization), -m_bulletVersion(B3_BULLET_VERSION) -{ - -} - -void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData) -{ - m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax); - m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin); - m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization); - - m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex; - m_useQuantization = quantizedBvhFloatData.m_useQuantization!=0; - - { - int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes; - m_contiguousNodes.resize(numElem); - - if (numElem) - { - b3OptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr; - - for (int i=0;i<numElem;i++,memPtr++) - { - m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg); - m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg); - m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; - m_contiguousNodes[i].m_subPart = memPtr->m_subPart; - m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; - } - } - } - - { - int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes; - m_quantizedContiguousNodes.resize(numElem); - - if (numElem) - { - b3QuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr; - for (int i=0;i<numElem;i++,memPtr++) - { - m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - } - } - } - - m_traversalMode = b3TraversalMode(quantizedBvhFloatData.m_traversalMode); - - { - int numElem = quantizedBvhFloatData.m_numSubtreeHeaders; - m_SubtreeHeaders.resize(numElem); - if (numElem) - { - b3BvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr; - for (int i=0;i<numElem;i++,memPtr++) - { - m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ; - m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; - m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; - } - } - } -} - -void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData) -{ - m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax); - m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin); - m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization); - - m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex; - m_useQuantization = quantizedBvhDoubleData.m_useQuantization!=0; - - { - int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes; - m_contiguousNodes.resize(numElem); - - if (numElem) - { - b3OptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr; - - for (int i=0;i<numElem;i++,memPtr++) - { - m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg); - m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg); - m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; - m_contiguousNodes[i].m_subPart = memPtr->m_subPart; - m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; - } - } - } - - { - int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes; - m_quantizedContiguousNodes.resize(numElem); - - if (numElem) - { - b3QuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr; - for (int i=0;i<numElem;i++,memPtr++) - { - m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - } - } - } - - m_traversalMode = b3TraversalMode(quantizedBvhDoubleData.m_traversalMode); - - { - int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders; - m_SubtreeHeaders.resize(numElem); - if (numElem) - { - b3BvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr; - for (int i=0;i<numElem;i++,memPtr++) - { - m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0] ; - m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; - m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; - } - } - } - -} - - - -///fills the dataBuffer and returns the struct name (and 0 on failure) -const char* b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const -{ - b3Assert(0); - return 0; -} - - - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h deleted file mode 100644 index 63c523c758..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h +++ /dev/null @@ -1,556 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_QUANTIZED_BVH_H -#define B3_QUANTIZED_BVH_H - -class b3Serializer; - -//#define DEBUG_CHECK_DEQUANTIZATION 1 -#ifdef DEBUG_CHECK_DEQUANTIZATION -#ifdef __SPU__ -#define printf spu_printf -#endif //__SPU__ - -#include <stdio.h> -#include <stdlib.h> -#endif //DEBUG_CHECK_DEQUANTIZATION - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3AlignedAllocator.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3QuantizedBvhData b3QuantizedBvhDoubleData -#define b3OptimizedBvhNodeData b3OptimizedBvhNodeDoubleData -#define b3QuantizedBvhDataName "b3QuantizedBvhDoubleData" -#else -#define b3QuantizedBvhData b3QuantizedBvhFloatData -#define b3OptimizedBvhNodeData b3OptimizedBvhNodeFloatData -#define b3QuantizedBvhDataName "b3QuantizedBvhFloatData" -#endif - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" - - - -//http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp - - -//Note: currently we have 16 bytes per quantized node -#define MAX_SUBTREE_SIZE_IN_BYTES 2048 - -// 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one -// actually) triangles each (since the sign bit is reserved -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -B3_ATTRIBUTE_ALIGNED16 (struct) b3QuantizedBvhNode : public b3QuantizedBvhNodeData -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - bool isLeafNode() const - { - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (m_escapeIndexOrTriangleIndex >= 0); - } - int getEscapeIndex() const - { - b3Assert(!isLeafNode()); - return -m_escapeIndexOrTriangleIndex; - } - int getTriangleIndex() const - { - b3Assert(isLeafNode()); - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (m_escapeIndexOrTriangleIndex&~(y)); - } - int getPartId() const - { - b3Assert(isLeafNode()); - // Get only the highest bits where the part index is stored - return (m_escapeIndexOrTriangleIndex>>(31-MAX_NUM_PARTS_IN_BITS)); - } -} -; - -/// b3OptimizedBvhNode contains both internal and leaf node information. -/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes. -B3_ATTRIBUTE_ALIGNED16 (struct) b3OptimizedBvhNode -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - //32 bytes - b3Vector3 m_aabbMinOrg; - b3Vector3 m_aabbMaxOrg; - - //4 - int m_escapeIndex; - - //8 - //for child nodes - int m_subPart; - int m_triangleIndex; - -//pad the size to 64 bytes - char m_padding[20]; -}; - - -///b3BvhSubtreeInfo provides info to gather a subtree of limited size -B3_ATTRIBUTE_ALIGNED16(class) b3BvhSubtreeInfo : public b3BvhSubtreeInfoData -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3BvhSubtreeInfo() - { - //memset(&m_padding[0], 0, sizeof(m_padding)); - } - - - void setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode) - { - m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0]; - m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1]; - m_quantizedAabbMin[2] = quantizedNode.m_quantizedAabbMin[2]; - m_quantizedAabbMax[0] = quantizedNode.m_quantizedAabbMax[0]; - m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1]; - m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2]; - } -} -; - - -class b3NodeOverlapCallback -{ -public: - virtual ~b3NodeOverlapCallback() {}; - - virtual void processNode(int subPart, int triangleIndex) = 0; -}; - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - - - -///for code readability: -typedef b3AlignedObjectArray<b3OptimizedBvhNode> NodeArray; -typedef b3AlignedObjectArray<b3QuantizedBvhNode> QuantizedNodeArray; -typedef b3AlignedObjectArray<b3BvhSubtreeInfo> BvhSubtreeInfoArray; - - -///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU. -///It is used by the b3BvhTriangleMeshShape as midphase -///It is recommended to use quantization for better performance and lower memory requirements. -B3_ATTRIBUTE_ALIGNED16(class) b3QuantizedBvh -{ -public: - enum b3TraversalMode - { - TRAVERSAL_STACKLESS = 0, - TRAVERSAL_STACKLESS_CACHE_FRIENDLY, - TRAVERSAL_RECURSIVE - }; - - - - - b3Vector3 m_bvhAabbMin; - b3Vector3 m_bvhAabbMax; - b3Vector3 m_bvhQuantization; - -protected: - int m_bulletVersion; //for serialization versioning. It could also be used to detect endianess. - - int m_curNodeIndex; - //quantization data - bool m_useQuantization; - - - - NodeArray m_leafNodes; - NodeArray m_contiguousNodes; - QuantizedNodeArray m_quantizedLeafNodes; - QuantizedNodeArray m_quantizedContiguousNodes; - - b3TraversalMode m_traversalMode; - BvhSubtreeInfoArray m_SubtreeHeaders; - - //This is only used for serialization so we don't have to add serialization directly to b3AlignedObjectArray - mutable int m_subtreeHeaderCount; - - - - - - ///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!) - ///this might be refactored into a virtual, it is usually not calculated at run-time - void setInternalNodeAabbMin(int nodeIndex, const b3Vector3& aabbMin) - { - if (m_useQuantization) - { - quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] ,aabbMin,0); - } else - { - m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin; - - } - } - void setInternalNodeAabbMax(int nodeIndex,const b3Vector3& aabbMax) - { - if (m_useQuantization) - { - quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0],aabbMax,1); - } else - { - m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax; - } - } - - b3Vector3 getAabbMin(int nodeIndex) const - { - if (m_useQuantization) - { - return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMin[0]); - } - //non-quantized - return m_leafNodes[nodeIndex].m_aabbMinOrg; - - } - b3Vector3 getAabbMax(int nodeIndex) const - { - if (m_useQuantization) - { - return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]); - } - //non-quantized - return m_leafNodes[nodeIndex].m_aabbMaxOrg; - - } - - - void setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex) - { - if (m_useQuantization) - { - m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex; - } - else - { - m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex; - } - - } - - void mergeInternalNodeAabb(int nodeIndex,const b3Vector3& newAabbMin,const b3Vector3& newAabbMax) - { - if (m_useQuantization) - { - unsigned short int quantizedAabbMin[3]; - unsigned short int quantizedAabbMax[3]; - quantize(quantizedAabbMin,newAabbMin,0); - quantize(quantizedAabbMax,newAabbMax,1); - for (int i=0;i<3;i++) - { - if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i]) - m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i]; - - if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i]) - m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i]; - - } - } else - { - //non-quantized - m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin); - m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax); - } - } - - void swapLeafNodes(int firstIndex,int secondIndex); - - void assignInternalNodeFromLeafNode(int internalNode,int leafNodeIndex); - -protected: - - - - void buildTree (int startIndex,int endIndex); - - int calcSplittingAxis(int startIndex,int endIndex); - - int sortAndCalcSplittingIndex(int startIndex,int endIndex,int splitAxis); - - void walkStacklessTree(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; - - void walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const; - void walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax,int startNodeIndex,int endNodeIndex) const; - void walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex,int endNodeIndex) const; - - ///tree traversal designed for small-memory processors like PS3 SPU - void walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; - - ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal - void walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode,b3NodeOverlapCallback* nodeCallback,unsigned short int* quantizedQueryAabbMin,unsigned short int* quantizedQueryAabbMax) const; - - ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal - void walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA,const b3QuantizedBvhNode* treeNodeB,b3NodeOverlapCallback* nodeCallback) const; - - - - - void updateSubtreeHeaders(int leftChildNodexIndex,int rightChildNodexIndex); - -public: - - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3QuantizedBvh(); - - virtual ~b3QuantizedBvh(); - - - ///***************************************** expert/internal use only ************************* - void setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0)); - QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } - ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized - void buildInternal(); - ///***************************************** expert/internal use only ************************* - - void reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; - void reportRayOverlappingNodex (b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const; - void reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; - - B3_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point,int isMax) const - { - - b3Assert(m_useQuantization); - - b3Assert(point.getX() <= m_bvhAabbMax.getX()); - b3Assert(point.getY() <= m_bvhAabbMax.getY()); - b3Assert(point.getZ() <= m_bvhAabbMax.getZ()); - - b3Assert(point.getX() >= m_bvhAabbMin.getX()); - b3Assert(point.getY() >= m_bvhAabbMin.getY()); - b3Assert(point.getZ() >= m_bvhAabbMin.getZ()); - - b3Vector3 v = (point - m_bvhAabbMin) * m_bvhQuantization; - ///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative - ///end-points always set the first bit, so that they are sorted properly (so that neighbouring AABBs overlap properly) - ///@todo: double-check this - if (isMax) - { - out[0] = (unsigned short) (((unsigned short)(v.getX()+b3Scalar(1.)) | 1)); - out[1] = (unsigned short) (((unsigned short)(v.getY()+b3Scalar(1.)) | 1)); - out[2] = (unsigned short) (((unsigned short)(v.getZ()+b3Scalar(1.)) | 1)); - } else - { - out[0] = (unsigned short) (((unsigned short)(v.getX()) & 0xfffe)); - out[1] = (unsigned short) (((unsigned short)(v.getY()) & 0xfffe)); - out[2] = (unsigned short) (((unsigned short)(v.getZ()) & 0xfffe)); - } - - -#ifdef DEBUG_CHECK_DEQUANTIZATION - b3Vector3 newPoint = unQuantize(out); - if (isMax) - { - if (newPoint.getX() < point.getX()) - { - printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); - } - if (newPoint.getY() < point.getY()) - { - printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); - } - if (newPoint.getZ() < point.getZ()) - { - - printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); - } - } else - { - if (newPoint.getX() > point.getX()) - { - printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n",newPoint.getX()-point.getX(), newPoint.getX(),point.getX()); - } - if (newPoint.getY() > point.getY()) - { - printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n",newPoint.getY()-point.getY(), newPoint.getY(),point.getY()); - } - if (newPoint.getZ() > point.getZ()) - { - printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n",newPoint.getZ()-point.getZ(), newPoint.getZ(),point.getZ()); - } - } -#endif //DEBUG_CHECK_DEQUANTIZATION - - } - - - B3_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2,int isMax) const - { - - b3Assert(m_useQuantization); - - b3Vector3 clampedPoint(point2); - clampedPoint.setMax(m_bvhAabbMin); - clampedPoint.setMin(m_bvhAabbMax); - - quantize(out,clampedPoint,isMax); - - } - - B3_FORCE_INLINE b3Vector3 unQuantize(const unsigned short* vecIn) const - { - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (m_bvhQuantization.getX()), - (b3Scalar)(vecIn[1]) / (m_bvhQuantization.getY()), - (b3Scalar)(vecIn[2]) / (m_bvhQuantization.getZ())); - vecOut += m_bvhAabbMin; - return vecOut; - } - - ///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees. - void setTraversalMode(b3TraversalMode traversalMode) - { - m_traversalMode = traversalMode; - } - - - B3_FORCE_INLINE QuantizedNodeArray& getQuantizedNodeArray() - { - return m_quantizedContiguousNodes; - } - - - B3_FORCE_INLINE BvhSubtreeInfoArray& getSubtreeInfoArray() - { - return m_SubtreeHeaders; - } - -//////////////////////////////////////////////////////////////////// - - /////Calculate space needed to store BVH for serialization - unsigned calculateSerializeBufferSize() const; - - /// Data buffer MUST be 16 byte aligned - virtual bool serialize(void *o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const; - - ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' - static b3QuantizedBvh *deSerializeInPlace(void *i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); - - static unsigned int getAlignmentSerializationPadding(); -////////////////////////////////////////////////////////////////////// - - - virtual int calculateSerializeBufferSizeNew() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; - - virtual void deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData); - - virtual void deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData); - - -//////////////////////////////////////////////////////////////////// - - B3_FORCE_INLINE bool isQuantized() - { - return m_useQuantization; - } - -private: - // Special "copy" constructor that allows for in-place deserialization - // Prevents b3Vector3's default constructor from being called, but doesn't inialize much else - // ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need) - b3QuantizedBvh(b3QuantizedBvh &other, bool ownsMemory); - -} -; - - -struct b3OptimizedBvhNodeFloatData -{ - b3Vector3FloatData m_aabbMinOrg; - b3Vector3FloatData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -struct b3OptimizedBvhNodeDoubleData -{ - b3Vector3DoubleData m_aabbMinOrg; - b3Vector3DoubleData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - - - -struct b3QuantizedBvhFloatData -{ - b3Vector3FloatData m_bvhAabbMin; - b3Vector3FloatData m_bvhAabbMax; - b3Vector3FloatData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeFloatData *m_contiguousNodesPtr; - b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr; - b3BvhSubtreeInfoData *m_subTreeInfoPtr; - int m_traversalMode; - int m_numSubtreeHeaders; - -}; - -struct b3QuantizedBvhDoubleData -{ - b3Vector3DoubleData m_bvhAabbMin; - b3Vector3DoubleData m_bvhAabbMax; - b3Vector3DoubleData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeDoubleData *m_contiguousNodesPtr; - b3QuantizedBvhNodeData *m_quantizedContiguousNodesPtr; - - int m_traversalMode; - int m_numSubtreeHeaders; - b3BvhSubtreeInfoData *m_subTreeInfoPtr; -}; - - -B3_FORCE_INLINE int b3QuantizedBvh::calculateSerializeBufferSizeNew() const -{ - return sizeof(b3QuantizedBvhData); -} - - - -#endif //B3_QUANTIZED_BVH_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp deleted file mode 100644 index 4d97f7f62b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp +++ /dev/null @@ -1,214 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3StridingMeshInterface.h" - - -b3StridingMeshInterface::~b3StridingMeshInterface() -{ - -} - - -void b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - (void)aabbMin; - (void)aabbMax; - int numtotalphysicsverts = 0; - int part,graphicssubparts = getNumSubParts(); - const unsigned char * vertexbase; - const unsigned char * indexbase; - int indexstride; - PHY_ScalarType type; - PHY_ScalarType gfxindextype; - int stride,numverts,numtriangles; - int gfxindex; - b3Vector3 triangle[3]; - - b3Vector3 meshScaling = getScaling(); - - ///if the number of parts is big, the performance might drop due to the innerloop switch on indextype - for (part=0;part<graphicssubparts ;part++) - { - getLockedReadOnlyVertexIndexBase(&vertexbase,numverts,type,stride,&indexbase,indexstride,numtriangles,gfxindextype,part); - numtotalphysicsverts+=numtriangles*3; //upper bound - - ///unlike that developers want to pass in double-precision meshes in single-precision Bullet build - ///so disable this feature by default - ///see patch http://code.google.com/p/bullet/issues/detail?id=213 - - switch (type) - { - case PHY_FLOAT: - { - - float* graphicsbase; - - switch (gfxindextype) - { - case PHY_INTEGER: - { - for (gfxindex=0;gfxindex<numtriangles;gfxindex++) - { - unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride); - graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); - triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); - triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); - triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle,part,gfxindex); - } - break; - } - case PHY_SHORT: - { - for (gfxindex=0;gfxindex<numtriangles;gfxindex++) - { - unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride); - graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); - triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); - triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); - triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle,part,gfxindex); - } - break; - } - case PHY_UCHAR: - { - for (gfxindex=0;gfxindex<numtriangles;gfxindex++) - { - unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride); - graphicsbase = (float*)(vertexbase+tri_indices[0]*stride); - triangle[0].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(),graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (float*)(vertexbase+tri_indices[1]*stride); - triangle[1].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (float*)(vertexbase+tri_indices[2]*stride); - triangle[2].setValue(graphicsbase[0]*meshScaling.getX(),graphicsbase[1]*meshScaling.getY(), graphicsbase[2]*meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle,part,gfxindex); - } - break; - } - default: - b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); - } - break; - } - - case PHY_DOUBLE: - { - double* graphicsbase; - - switch (gfxindextype) - { - case PHY_INTEGER: - { - for (gfxindex=0;gfxindex<numtriangles;gfxindex++) - { - unsigned int* tri_indices= (unsigned int*)(indexbase+gfxindex*indexstride); - graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); - triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); - triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); - triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle,part,gfxindex); - } - break; - } - case PHY_SHORT: - { - for (gfxindex=0;gfxindex<numtriangles;gfxindex++) - { - unsigned short int* tri_indices= (unsigned short int*)(indexbase+gfxindex*indexstride); - graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); - triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); - triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); - triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle,part,gfxindex); - } - break; - } - case PHY_UCHAR: - { - for (gfxindex=0;gfxindex<numtriangles;gfxindex++) - { - unsigned char* tri_indices= (unsigned char*)(indexbase+gfxindex*indexstride); - graphicsbase = (double*)(vertexbase+tri_indices[0]*stride); - triangle[0].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(),(b3Scalar)graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (double*)(vertexbase+tri_indices[1]*stride); - triangle[1].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); - graphicsbase = (double*)(vertexbase+tri_indices[2]*stride); - triangle[2].setValue((b3Scalar)graphicsbase[0]*meshScaling.getX(),(b3Scalar)graphicsbase[1]*meshScaling.getY(), (b3Scalar)graphicsbase[2]*meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle,part,gfxindex); - } - break; - } - default: - b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); - } - break; - } - default: - b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE)); - } - - unLockReadOnlyVertexBase(part); - } -} - -void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax) -{ - - struct AabbCalculationCallback : public b3InternalTriangleIndexCallback - { - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - - AabbCalculationCallback() - { - m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) - { - (void)partId; - (void)triangleIndex; - - m_aabbMin.setMin(triangle[0]); - m_aabbMax.setMax(triangle[0]); - m_aabbMin.setMin(triangle[1]); - m_aabbMax.setMax(triangle[1]); - m_aabbMin.setMin(triangle[2]); - m_aabbMax.setMax(triangle[2]); - } - }; - - //first calculate the total aabb for all triangles - AabbCalculationCallback aabbCallback; - aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT),b3Scalar(-B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - InternalProcessAllTriangles(&aabbCallback,aabbMin,aabbMax); - - aabbMin = aabbCallback.m_aabbMin; - aabbMax = aabbCallback.m_aabbMax; -} - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h deleted file mode 100644 index 9513f68f77..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h +++ /dev/null @@ -1,167 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_STRIDING_MESHINTERFACE_H -#define B3_STRIDING_MESHINTERFACE_H - -#include "Bullet3Common/b3Vector3.h" -#include "b3TriangleCallback.h" -//#include "b3ConcaveShape.h" - - -enum PHY_ScalarType { - PHY_FLOAT, PHY_DOUBLE, PHY_INTEGER, PHY_SHORT, - PHY_FIXEDPOINT88, PHY_UCHAR -}; - - -/// The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with b3BvhTriangleMeshShape and some other collision shapes. -/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips. -/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory. -B3_ATTRIBUTE_ALIGNED16(class ) b3StridingMeshInterface -{ - protected: - - b3Vector3 m_scaling; - - public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3StridingMeshInterface() :m_scaling(b3MakeVector3(b3Scalar(1.),b3Scalar(1.),b3Scalar(1.))) - { - - } - - virtual ~b3StridingMeshInterface(); - - - - virtual void InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const; - - ///brute force method to calculate aabb - void calculateAabbBruteForce(b3Vector3& aabbMin,b3Vector3& aabbMax); - - /// get read and write access to a subpart of a triangle mesh - /// this subpart has a continuous array of vertices and indices - /// in this way the mesh can be handled as chunks of memory with striding - /// very similar to OpenGL vertexarray support - /// make a call to unLockVertexBase when the read and write access is finished - virtual void getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0)=0; - - virtual void getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& stride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const=0; - - /// unLockVertexBase finishes the access to a subpart of the triangle mesh - /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished - virtual void unLockVertexBase(int subpart)=0; - - virtual void unLockReadOnlyVertexBase(int subpart) const=0; - - - /// getNumSubParts returns the number of seperate subparts - /// each subpart has a continuous array of vertices and indices - virtual int getNumSubParts() const=0; - - virtual void preallocateVertices(int numverts)=0; - virtual void preallocateIndices(int numindices)=0; - - virtual bool hasPremadeAabb() const { return false; } - virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const - { - (void) aabbMin; - (void) aabbMax; - } - virtual void getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const - { - (void) aabbMin; - (void) aabbMax; - } - - const b3Vector3& getScaling() const { - return m_scaling; - } - void setScaling(const b3Vector3& scaling) - { - m_scaling = scaling; - } - - virtual int calculateSerializeBufferSize() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - //virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; - - -}; - -struct b3IntIndexData -{ - int m_value; -}; - -struct b3ShortIntIndexData -{ - short m_value; - char m_pad[2]; -}; - -struct b3ShortIntIndexTripletData -{ - short m_values[3]; - char m_pad[2]; -}; - -struct b3CharIndexTripletData -{ - unsigned char m_values[3]; - char m_pad; -}; - - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3MeshPartData -{ - b3Vector3FloatData *m_vertices3f; - b3Vector3DoubleData *m_vertices3d; - - b3IntIndexData *m_indices32; - b3ShortIntIndexTripletData *m_3indices16; - b3CharIndexTripletData *m_3indices8; - - b3ShortIntIndexData *m_indices16;//backwards compatibility - - int m_numTriangles;//length of m_indices = m_numTriangles - int m_numVertices; -}; - - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3StridingMeshInterfaceData -{ - b3MeshPartData *m_meshPartsPtr; - b3Vector3FloatData m_scaling; - int m_numMeshParts; - char m_padding[4]; -}; - - - - -B3_FORCE_INLINE int b3StridingMeshInterface::calculateSerializeBufferSize() const -{ - return sizeof(b3StridingMeshInterfaceData); -} - - - -#endif //B3_STRIDING_MESHINTERFACE_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h deleted file mode 100644 index d073ee57c3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h +++ /dev/null @@ -1,38 +0,0 @@ - -#ifndef B3_SUPPORT_MAPPINGS_H -#define B3_SUPPORT_MAPPINGS_H - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "b3VectorFloat4.h" - - -struct b3GjkPairDetector; - - - -inline b3Vector3 localGetSupportVertexWithMargin(const float4& supportVec,const struct b3ConvexPolyhedronData* hull, - const b3AlignedObjectArray<b3Vector3>& verticesA, b3Scalar margin) -{ - b3Vector3 supVec = b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - b3Scalar maxDot = b3Scalar(-B3_LARGE_FLOAT); - - // Here we take advantage of dot(a, b*c) = dot(a*b, c). Note: This is true mathematically, but not numerically. - if( 0 < hull->m_numVertices ) - { - const b3Vector3 scaled = supportVec; - int index = (int) scaled.maxDot( &verticesA[hull->m_vertexOffset], hull->m_numVertices, maxDot); - return verticesA[hull->m_vertexOffset+index]; - } - - return supVec; - -} - -inline b3Vector3 localGetSupportVertexWithoutMargin(const float4& supportVec,const struct b3ConvexPolyhedronData* hull, - const b3AlignedObjectArray<b3Vector3>& verticesA) -{ - return localGetSupportVertexWithMargin(supportVec,hull,verticesA,0.f); -} - -#endif //B3_SUPPORT_MAPPINGS_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp deleted file mode 100644 index 9066451884..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp +++ /dev/null @@ -1,28 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TriangleCallback.h" - -b3TriangleCallback::~b3TriangleCallback() -{ - -} - - -b3InternalTriangleIndexCallback::~b3InternalTriangleIndexCallback() -{ - -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h deleted file mode 100644 index 3059fa4f21..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h +++ /dev/null @@ -1,42 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRIANGLE_CALLBACK_H -#define B3_TRIANGLE_CALLBACK_H - -#include "Bullet3Common/b3Vector3.h" - - -///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles. -///This callback is called by processAllTriangles for all b3ConcaveShape derived class, such as b3BvhTriangleMeshShape, b3StaticPlaneShape and b3HeightfieldTerrainShape. -class b3TriangleCallback -{ -public: - - virtual ~b3TriangleCallback(); - virtual void processTriangle(b3Vector3* triangle, int partId, int triangleIndex) = 0; -}; - -class b3InternalTriangleIndexCallback -{ -public: - - virtual ~b3InternalTriangleIndexCallback(); - virtual void internalProcessTriangleIndex(b3Vector3* triangle,int partId,int triangleIndex) = 0; -}; - - - -#endif //B3_TRIANGLE_CALLBACK_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp deleted file mode 100644 index a0f59babbe..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TriangleIndexVertexArray.h" - -b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride) -: m_hasAabb(0) -{ - b3IndexedMesh mesh; - - mesh.m_numTriangles = numTriangles; - mesh.m_triangleIndexBase = (const unsigned char *)triangleIndexBase; - mesh.m_triangleIndexStride = triangleIndexStride; - mesh.m_numVertices = numVertices; - mesh.m_vertexBase = (const unsigned char *)vertexBase; - mesh.m_vertexStride = vertexStride; - - addIndexedMesh(mesh); - -} - -b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray() -{ - -} - -void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) -{ - b3Assert(subpart< getNumSubParts() ); - - b3IndexedMesh& mesh = m_indexedMeshes[subpart]; - - numverts = mesh.m_numVertices; - (*vertexbase) = (unsigned char *) mesh.m_vertexBase; - - type = mesh.m_vertexType; - - vertexStride = mesh.m_vertexStride; - - numfaces = mesh.m_numTriangles; - - (*indexbase) = (unsigned char *)mesh.m_triangleIndexBase; - indexstride = mesh.m_triangleIndexStride; - indicestype = mesh.m_indexType; -} - -void b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart) const -{ - const b3IndexedMesh& mesh = m_indexedMeshes[subpart]; - - numverts = mesh.m_numVertices; - (*vertexbase) = (const unsigned char *)mesh.m_vertexBase; - - type = mesh.m_vertexType; - - vertexStride = mesh.m_vertexStride; - - numfaces = mesh.m_numTriangles; - (*indexbase) = (const unsigned char *)mesh.m_triangleIndexBase; - indexstride = mesh.m_triangleIndexStride; - indicestype = mesh.m_indexType; -} - -bool b3TriangleIndexVertexArray::hasPremadeAabb() const -{ - return (m_hasAabb == 1); -} - - -void b3TriangleIndexVertexArray::setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const -{ - m_aabbMin = aabbMin; - m_aabbMax = aabbMax; - m_hasAabb = 1; // this is intentionally an int see notes in header -} - -void b3TriangleIndexVertexArray::getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const -{ - *aabbMin = m_aabbMin; - *aabbMax = m_aabbMax; -} - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h deleted file mode 100644 index d26b2893bc..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h +++ /dev/null @@ -1,133 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRIANGLE_INDEX_VERTEX_ARRAY_H -#define B3_TRIANGLE_INDEX_VERTEX_ARRAY_H - -#include "b3StridingMeshInterface.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Scalar.h" - - -///The b3IndexedMesh indexes a single vertex and index array. Multiple b3IndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh. -///Instead of the number of indices, we pass the number of triangles. -B3_ATTRIBUTE_ALIGNED16( struct) b3IndexedMesh -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_numTriangles; - const unsigned char * m_triangleIndexBase; - // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) - int m_triangleIndexStride; - int m_numVertices; - const unsigned char * m_vertexBase; - // Size of a vertex, in bytes - int m_vertexStride; - - // The index type is set when adding an indexed mesh to the - // b3TriangleIndexVertexArray, do not set it manually - PHY_ScalarType m_indexType; - - // The vertex type has a default type similar to Bullet's precision mode (float or double) - // but can be set manually if you for example run Bullet with double precision but have - // mesh data in single precision.. - PHY_ScalarType m_vertexType; - - - b3IndexedMesh() - :m_indexType(PHY_INTEGER), -#ifdef B3_USE_DOUBLE_PRECISION - m_vertexType(PHY_DOUBLE) -#else // B3_USE_DOUBLE_PRECISION - m_vertexType(PHY_FLOAT) -#endif // B3_USE_DOUBLE_PRECISION - { - } -} -; - - -typedef b3AlignedObjectArray<b3IndexedMesh> IndexedMeshArray; - -///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays. -///Additional meshes can be added using addIndexedMesh -///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays. -///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray. -B3_ATTRIBUTE_ALIGNED16( class) b3TriangleIndexVertexArray : public b3StridingMeshInterface -{ -protected: - IndexedMeshArray m_indexedMeshes; - int m_pad[2]; - mutable int m_hasAabb; // using int instead of bool to maintain alignment - mutable b3Vector3 m_aabbMin; - mutable b3Vector3 m_aabbMax; - -public: - - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3TriangleIndexVertexArray() : m_hasAabb(0) - { - } - - virtual ~b3TriangleIndexVertexArray(); - - //just to be backwards compatible - b3TriangleIndexVertexArray(int numTriangles,int* triangleIndexBase,int triangleIndexStride,int numVertices,b3Scalar* vertexBase,int vertexStride); - - void addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER) - { - m_indexedMeshes.push_back(mesh); - m_indexedMeshes[m_indexedMeshes.size()-1].m_indexType = indexType; - } - - - virtual void getLockedVertexIndexBase(unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0); - - virtual void getLockedReadOnlyVertexIndexBase(const unsigned char **vertexbase, int& numverts,PHY_ScalarType& type, int& vertexStride,const unsigned char **indexbase,int & indexstride,int& numfaces,PHY_ScalarType& indicestype,int subpart=0) const; - - /// unLockVertexBase finishes the access to a subpart of the triangle mesh - /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished - virtual void unLockVertexBase(int subpart) {(void)subpart;} - - virtual void unLockReadOnlyVertexBase(int subpart) const {(void)subpart;} - - /// getNumSubParts returns the number of seperate subparts - /// each subpart has a continuous array of vertices and indices - virtual int getNumSubParts() const { - return (int)m_indexedMeshes.size(); - } - - IndexedMeshArray& getIndexedMeshArray() - { - return m_indexedMeshes; - } - - const IndexedMeshArray& getIndexedMeshArray() const - { - return m_indexedMeshes; - } - - virtual void preallocateVertices(int numverts){(void) numverts;} - virtual void preallocateIndices(int numindices){(void) numindices;} - - virtual bool hasPremadeAabb() const; - virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax ) const; - virtual void getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax ) const; - -} -; - -#endif //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h deleted file mode 100644 index f6f65f7719..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h +++ /dev/null @@ -1,11 +0,0 @@ -#ifndef B3_VECTOR_FLOAT4_H -#define B3_VECTOR_FLOAT4_H - -#include "Bullet3Common/b3Transform.h" - -//#define cross3(a,b) (a.cross(b)) -#define float4 b3Vector3 -//#define make_float4(x,y,z,w) b3Vector4(x,y,z,w) - - -#endif //B3_VECTOR_FLOAT4_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp deleted file mode 100644 index cf3d5ef49d..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp +++ /dev/null @@ -1,609 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. - - Elsevier CDROM license agreements grants nonexclusive license to use the software - for any purpose, commercial or non-commercial as long as the following credit is included - identifying the original source of the software: - - Parts of the source are "from the book Real-Time Collision Detection by - Christer Ericson, published by Morgan Kaufmann Publishers, - (c) 2005 Elsevier Inc." - -*/ - - -#include "b3VoronoiSimplexSolver.h" - -#define VERTA 0 -#define VERTB 1 -#define VERTC 2 -#define VERTD 3 - -#define B3_CATCH_DEGENERATE_TETRAHEDRON 1 -void b3VoronoiSimplexSolver::removeVertex(int index) -{ - - b3Assert(m_numVertices>0); - m_numVertices--; - m_simplexVectorW[index] = m_simplexVectorW[m_numVertices]; - m_simplexPointsP[index] = m_simplexPointsP[m_numVertices]; - m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices]; -} - -void b3VoronoiSimplexSolver::reduceVertices (const b3UsageBitfield& usedVerts) -{ - if ((numVertices() >= 4) && (!usedVerts.usedVertexD)) - removeVertex(3); - - if ((numVertices() >= 3) && (!usedVerts.usedVertexC)) - removeVertex(2); - - if ((numVertices() >= 2) && (!usedVerts.usedVertexB)) - removeVertex(1); - - if ((numVertices() >= 1) && (!usedVerts.usedVertexA)) - removeVertex(0); - -} - - - - - -//clear the simplex, remove all the vertices -void b3VoronoiSimplexSolver::reset() -{ - m_cachedValidClosest = false; - m_numVertices = 0; - m_needsUpdate = true; - m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT),b3Scalar(B3_LARGE_FLOAT)); - m_cachedBC.reset(); -} - - - - //add a vertex -void b3VoronoiSimplexSolver::addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q) -{ - m_lastW = w; - m_needsUpdate = true; - - m_simplexVectorW[m_numVertices] = w; - m_simplexPointsP[m_numVertices] = p; - m_simplexPointsQ[m_numVertices] = q; - - m_numVertices++; -} - -bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() -{ - - if (m_needsUpdate) - { - m_cachedBC.reset(); - - m_needsUpdate = false; - - switch (numVertices()) - { - case 0: - m_cachedValidClosest = false; - break; - case 1: - { - m_cachedP1 = m_simplexPointsP[0]; - m_cachedP2 = m_simplexPointsQ[0]; - m_cachedV = m_cachedP1-m_cachedP2; //== m_simplexVectorW[0] - m_cachedBC.reset(); - m_cachedBC.setBarycentricCoordinates(b3Scalar(1.),b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - m_cachedValidClosest = m_cachedBC.isValid(); - break; - }; - case 2: - { - //closest point origin from line segment - const b3Vector3& from = m_simplexVectorW[0]; - const b3Vector3& to = m_simplexVectorW[1]; - b3Vector3 nearest; - - b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - b3Vector3 diff = p - from; - b3Vector3 v = to - from; - b3Scalar t = v.dot(diff); - - if (t > 0) { - b3Scalar dotVV = v.dot(v); - if (t < dotVV) { - t /= dotVV; - diff -= t*v; - m_cachedBC.m_usedVertices.usedVertexA = true; - m_cachedBC.m_usedVertices.usedVertexB = true; - } else { - t = 1; - diff -= v; - //reduce to 1 point - m_cachedBC.m_usedVertices.usedVertexB = true; - } - } else - { - t = 0; - //reduce to 1 point - m_cachedBC.m_usedVertices.usedVertexA = true; - } - m_cachedBC.setBarycentricCoordinates(1-t,t); - nearest = from + t*v; - - m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]); - m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]); - m_cachedV = m_cachedP1 - m_cachedP2; - - reduceVertices(m_cachedBC.m_usedVertices); - - m_cachedValidClosest = m_cachedBC.isValid(); - break; - } - case 3: - { - //closest point origin from triangle - b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - - const b3Vector3& a = m_simplexVectorW[0]; - const b3Vector3& b = m_simplexVectorW[1]; - const b3Vector3& c = m_simplexVectorW[2]; - - closestPtPointTriangle(p,a,b,c,m_cachedBC); - m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2]; - - m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2]; - - m_cachedV = m_cachedP1-m_cachedP2; - - reduceVertices (m_cachedBC.m_usedVertices); - m_cachedValidClosest = m_cachedBC.isValid(); - - break; - } - case 4: - { - - - b3Vector3 p =b3MakeVector3(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - - const b3Vector3& a = m_simplexVectorW[0]; - const b3Vector3& b = m_simplexVectorW[1]; - const b3Vector3& c = m_simplexVectorW[2]; - const b3Vector3& d = m_simplexVectorW[3]; - - bool hasSeperation = closestPtPointTetrahedron(p,a,b,c,d,m_cachedBC); - - if (hasSeperation) - { - - m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] + - m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3]; - - m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] + - m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3]; - - m_cachedV = m_cachedP1-m_cachedP2; - reduceVertices (m_cachedBC.m_usedVertices); - } else - { -// printf("sub distance got penetration\n"); - - if (m_cachedBC.m_degenerate) - { - m_cachedValidClosest = false; - } else - { - m_cachedValidClosest = true; - //degenerate case == false, penetration = true + zero - m_cachedV.setValue(b3Scalar(0.),b3Scalar(0.),b3Scalar(0.)); - } - break; - } - - m_cachedValidClosest = m_cachedBC.isValid(); - - //closest point origin from tetrahedron - break; - } - default: - { - m_cachedValidClosest = false; - } - }; - } - - return m_cachedValidClosest; - -} - -//return/calculate the closest vertex -bool b3VoronoiSimplexSolver::closest(b3Vector3& v) -{ - bool succes = updateClosestVectorAndPoints(); - v = m_cachedV; - return succes; -} - - - -b3Scalar b3VoronoiSimplexSolver::maxVertex() -{ - int i, numverts = numVertices(); - b3Scalar maxV = b3Scalar(0.); - for (i=0;i<numverts;i++) - { - b3Scalar curLen2 = m_simplexVectorW[i].length2(); - if (maxV < curLen2) - maxV = curLen2; - } - return maxV; -} - - - - //return the current simplex -int b3VoronoiSimplexSolver::getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vector3 *yBuf) const -{ - int i; - for (i=0;i<numVertices();i++) - { - yBuf[i] = m_simplexVectorW[i]; - pBuf[i] = m_simplexPointsP[i]; - qBuf[i] = m_simplexPointsQ[i]; - } - return numVertices(); -} - - - - -bool b3VoronoiSimplexSolver::inSimplex(const b3Vector3& w) -{ - bool found = false; - int i, numverts = numVertices(); - //b3Scalar maxV = b3Scalar(0.); - - //w is in the current (reduced) simplex - for (i=0;i<numverts;i++) - { -#ifdef BT_USE_EQUAL_VERTEX_THRESHOLD - if ( m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold) -#else - if (m_simplexVectorW[i] == w) -#endif - found = true; - } - - //check in case lastW is already removed - if (w == m_lastW) - return true; - - return found; -} - -void b3VoronoiSimplexSolver::backup_closest(b3Vector3& v) -{ - v = m_cachedV; -} - - -bool b3VoronoiSimplexSolver::emptySimplex() const -{ - return (numVertices() == 0); - -} - -void b3VoronoiSimplexSolver::compute_points(b3Vector3& p1, b3Vector3& p2) -{ - updateClosestVectorAndPoints(); - p1 = m_cachedP1; - p2 = m_cachedP2; - -} - - - - -bool b3VoronoiSimplexSolver::closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c,b3SubSimplexClosestResult& result) -{ - result.m_usedVertices.reset(); - - // Check if P in vertex region outside A - b3Vector3 ab = b - a; - b3Vector3 ac = c - a; - b3Vector3 ap = p - a; - b3Scalar d1 = ab.dot(ap); - b3Scalar d2 = ac.dot(ap); - if (d1 <= b3Scalar(0.0) && d2 <= b3Scalar(0.0)) - { - result.m_closestPointOnSimplex = a; - result.m_usedVertices.usedVertexA = true; - result.setBarycentricCoordinates(1,0,0); - return true;// a; // barycentric coordinates (1,0,0) - } - - // Check if P in vertex region outside B - b3Vector3 bp = p - b; - b3Scalar d3 = ab.dot(bp); - b3Scalar d4 = ac.dot(bp); - if (d3 >= b3Scalar(0.0) && d4 <= d3) - { - result.m_closestPointOnSimplex = b; - result.m_usedVertices.usedVertexB = true; - result.setBarycentricCoordinates(0,1,0); - - return true; // b; // barycentric coordinates (0,1,0) - } - // Check if P in edge region of AB, if so return projection of P onto AB - b3Scalar vc = d1*d4 - d3*d2; - if (vc <= b3Scalar(0.0) && d1 >= b3Scalar(0.0) && d3 <= b3Scalar(0.0)) { - b3Scalar v = d1 / (d1 - d3); - result.m_closestPointOnSimplex = a + v * ab; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexB = true; - result.setBarycentricCoordinates(1-v,v,0); - return true; - //return a + v * ab; // barycentric coordinates (1-v,v,0) - } - - // Check if P in vertex region outside C - b3Vector3 cp = p - c; - b3Scalar d5 = ab.dot(cp); - b3Scalar d6 = ac.dot(cp); - if (d6 >= b3Scalar(0.0) && d5 <= d6) - { - result.m_closestPointOnSimplex = c; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(0,0,1); - return true;//c; // barycentric coordinates (0,0,1) - } - - // Check if P in edge region of AC, if so return projection of P onto AC - b3Scalar vb = d5*d2 - d1*d6; - if (vb <= b3Scalar(0.0) && d2 >= b3Scalar(0.0) && d6 <= b3Scalar(0.0)) { - b3Scalar w = d2 / (d2 - d6); - result.m_closestPointOnSimplex = a + w * ac; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(1-w,0,w); - return true; - //return a + w * ac; // barycentric coordinates (1-w,0,w) - } - - // Check if P in edge region of BC, if so return projection of P onto BC - b3Scalar va = d3*d6 - d5*d4; - if (va <= b3Scalar(0.0) && (d4 - d3) >= b3Scalar(0.0) && (d5 - d6) >= b3Scalar(0.0)) { - b3Scalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6)); - - result.m_closestPointOnSimplex = b + w * (c - b); - result.m_usedVertices.usedVertexB = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(0,1-w,w); - return true; - // return b + w * (c - b); // barycentric coordinates (0,1-w,w) - } - - // P inside face region. Compute Q through its barycentric coordinates (u,v,w) - b3Scalar denom = b3Scalar(1.0) / (va + vb + vc); - b3Scalar v = vb * denom; - b3Scalar w = vc * denom; - - result.m_closestPointOnSimplex = a + ab * v + ac * w; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexB = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(1-v-w,v,w); - - return true; -// return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = b3Scalar(1.0) - v - w - -} - - - - - -/// Test if point p and d lie on opposite sides of plane through abc -int b3VoronoiSimplexSolver::pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d) -{ - b3Vector3 normal = (b-a).cross(c-a); - - b3Scalar signp = (p - a).dot(normal); // [AP AB AC] - b3Scalar signd = (d - a).dot( normal); // [AD AB AC] - -#ifdef B3_CATCH_DEGENERATE_TETRAHEDRON -#ifdef BT_USE_DOUBLE_PRECISION -if (signd * signd < (b3Scalar(1e-8) * b3Scalar(1e-8))) - { - return -1; - } -#else - if (signd * signd < (b3Scalar(1e-4) * b3Scalar(1e-4))) - { -// printf("affine dependent/degenerate\n");// - return -1; - } -#endif - -#endif - // Points on opposite sides if expression signs are opposite - return signp * signd < b3Scalar(0.); -} - - -bool b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult) -{ - b3SubSimplexClosestResult tempResult; - - // Start out assuming point inside all halfspaces, so closest to itself - finalResult.m_closestPointOnSimplex = p; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = true; - finalResult.m_usedVertices.usedVertexB = true; - finalResult.m_usedVertices.usedVertexC = true; - finalResult.m_usedVertices.usedVertexD = true; - - int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d); - int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b); - int pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c); - int pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a); - - if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0) - { - finalResult.m_degenerate = true; - return false; - } - - if (!pointOutsideABC && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC) - { - return false; - } - - - b3Scalar bestSqDist = FLT_MAX; - // If point outside face abc then compute closest point on abc - if (pointOutsideABC) - { - closestPtPointTriangle(p, a, b, c,tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - - b3Scalar sqDist = (q - p).dot( q - p); - // Update best closest point if (squared) distance is less than current best - if (sqDist < bestSqDist) { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - //convert result bitmask! - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB; - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTB], - tempResult.m_barycentricCoords[VERTC], - 0 - ); - - } - } - - - // Repeat test for face acd - if (pointOutsideACD) - { - closestPtPointTriangle(p, a, c, d,tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - - b3Scalar sqDist = (q - p).dot( q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB; - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - 0, - tempResult.m_barycentricCoords[VERTB], - tempResult.m_barycentricCoords[VERTC] - ); - - } - } - // Repeat test for face adb - - - if (pointOutsideADB) - { - closestPtPointTriangle(p, a, d, b,tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - - b3Scalar sqDist = (q - p).dot( q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC; - - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTC], - 0, - tempResult.m_barycentricCoords[VERTB] - ); - - } - } - // Repeat test for face bdc - - - if (pointOutsideBDC) - { - closestPtPointTriangle(p, b, d, c,tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - b3Scalar sqDist = (q - p).dot( q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - // - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; - - finalResult.setBarycentricCoordinates( - 0, - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTC], - tempResult.m_barycentricCoords[VERTB] - ); - - } - } - - //help! we ended up full ! - - if (finalResult.m_usedVertices.usedVertexA && - finalResult.m_usedVertices.usedVertexB && - finalResult.m_usedVertices.usedVertexC && - finalResult.m_usedVertices.usedVertexD) - { - return true; - } - - return true; -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h deleted file mode 100644 index a6e27667d8..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h +++ /dev/null @@ -1,177 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - - -#ifndef B3_VORONOI_SIMPLEX_SOLVER_H -#define B3_VORONOI_SIMPLEX_SOLVER_H - -#include "Bullet3Common/b3Vector3.h" - - -#define VORONOI_SIMPLEX_MAX_VERTS 5 - -///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure -//#define BT_USE_EQUAL_VERTEX_THRESHOLD -#define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f - - -struct b3UsageBitfield{ - b3UsageBitfield() - { - reset(); - } - - void reset() - { - usedVertexA = false; - usedVertexB = false; - usedVertexC = false; - usedVertexD = false; - } - unsigned short usedVertexA : 1; - unsigned short usedVertexB : 1; - unsigned short usedVertexC : 1; - unsigned short usedVertexD : 1; - unsigned short unused1 : 1; - unsigned short unused2 : 1; - unsigned short unused3 : 1; - unsigned short unused4 : 1; -}; - - -struct b3SubSimplexClosestResult -{ - b3Vector3 m_closestPointOnSimplex; - //MASK for m_usedVertices - //stores the simplex vertex-usage, using the MASK, - // if m_usedVertices & MASK then the related vertex is used - b3UsageBitfield m_usedVertices; - b3Scalar m_barycentricCoords[4]; - bool m_degenerate; - - void reset() - { - m_degenerate = false; - setBarycentricCoordinates(); - m_usedVertices.reset(); - } - bool isValid() - { - bool valid = (m_barycentricCoords[0] >= b3Scalar(0.)) && - (m_barycentricCoords[1] >= b3Scalar(0.)) && - (m_barycentricCoords[2] >= b3Scalar(0.)) && - (m_barycentricCoords[3] >= b3Scalar(0.)); - - - return valid; - } - void setBarycentricCoordinates(b3Scalar a=b3Scalar(0.),b3Scalar b=b3Scalar(0.),b3Scalar c=b3Scalar(0.),b3Scalar d=b3Scalar(0.)) - { - m_barycentricCoords[0] = a; - m_barycentricCoords[1] = b; - m_barycentricCoords[2] = c; - m_barycentricCoords[3] = d; - } - -}; - -/// b3VoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin. -/// Can be used with GJK, as an alternative to Johnson distance algorithm. - -B3_ATTRIBUTE_ALIGNED16(class) b3VoronoiSimplexSolver -{ -public: - - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_numVertices; - - b3Vector3 m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS]; - b3Vector3 m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS]; - b3Vector3 m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS]; - - - - b3Vector3 m_cachedP1; - b3Vector3 m_cachedP2; - b3Vector3 m_cachedV; - b3Vector3 m_lastW; - - b3Scalar m_equalVertexThreshold; - bool m_cachedValidClosest; - - - b3SubSimplexClosestResult m_cachedBC; - - bool m_needsUpdate; - - void removeVertex(int index); - void reduceVertices (const b3UsageBitfield& usedVerts); - bool updateClosestVectorAndPoints(); - - bool closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult); - int pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d); - bool closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c,b3SubSimplexClosestResult& result); - -public: - - b3VoronoiSimplexSolver() - : m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD) - { - } - void reset(); - - void addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q); - - void setEqualVertexThreshold(b3Scalar threshold) - { - m_equalVertexThreshold = threshold; - } - - b3Scalar getEqualVertexThreshold() const - { - return m_equalVertexThreshold; - } - - bool closest(b3Vector3& v); - - b3Scalar maxVertex(); - - bool fullSimplex() const - { - return (m_numVertices == 4); - } - - int getSimplex(b3Vector3 *pBuf, b3Vector3 *qBuf, b3Vector3 *yBuf) const; - - bool inSimplex(const b3Vector3& w); - - void backup_closest(b3Vector3& v) ; - - bool emptySimplex() const ; - - void compute_points(b3Vector3& p1, b3Vector3& p2) ; - - int numVertices() const - { - return m_numVertices; - } - - -}; - -#endif //B3_VORONOI_SIMPLEX_SOLVER_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl deleted file mode 100644 index faa413441c..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl +++ /dev/null @@ -1,283 +0,0 @@ -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - -typedef unsigned int u32; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///btQuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} btQuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - -int getTriangleIndex(const btQuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeaf(const btQuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const btQuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} btBvhSubtreeInfo; - -///keep this in sync with btCollidable.h -typedef struct -{ - int m_numChildShapes; - int blaat2; - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -int testQuantizedAabbAgainstQuantizedAabb( - const unsigned short int* aabbMin1, - const unsigned short int* aabbMax1, - const unsigned short int* aabbMin2, - const unsigned short int* aabbMax2) -{ - //int overlap = 1; - if (aabbMin1[0] > aabbMax2[0]) - return 0; - if (aabbMax1[0] < aabbMin2[0]) - return 0; - if (aabbMin1[1] > aabbMax2[1]) - return 0; - if (aabbMax1[1] < aabbMin2[1]) - return 0; - if (aabbMin1[2] > aabbMax2[2]) - return 0; - if (aabbMax1[2] < aabbMin2[2]) - return 0; - return 1; - //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; - //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; - //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; - //return overlap; -} - - -void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization) -{ - float4 clampedPoint = max(point2,bvhAabbMin); - clampedPoint = min (clampedPoint, bvhAabbMax); - - float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; - if (isMax) - { - out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1)); - out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1)); - out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1)); - } else - { - out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe)); - out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe)); - out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe)); - } - -} - - -// work-in-progress -__kernel void bvhTraversalKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global btAabbCL* aabbs, - __global int4* concavePairsOut, - __global volatile int* numConcavePairsOut, - __global const btBvhSubtreeInfo* subtreeHeadersRoot, - __global const btQuantizedBvhNode* quantizedNodesRoot, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumConcavePairsCapacity) -{ - int id = get_global_id(0); - if (id>=numPairs) - return; - - int bodyIndexA = pairs[id].x; - int bodyIndexB = pairs[id].y; - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - return; - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - if (shapeTypeB!=SHAPE_CONVEX_HULL && - shapeTypeB!=SHAPE_SPHERE && - shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS - ) - return; - - b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes]; - - float4 bvhAabbMin = bvhInfo.m_aabbMin; - float4 bvhAabbMax = bvhInfo.m_aabbMax; - float4 bvhQuantization = bvhInfo.m_quantization; - int numSubtreeHeaders = bvhInfo.m_numSubTrees; - __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset]; - __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset]; - - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization); - quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization); - - for (int i=0;i<numSubtreeHeaders;i++) - { - btBvhSubtreeInfo subtree = subtreeHeaders[i]; - - int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); - if (overlap != 0) - { - int startNodeIndex = subtree.m_rootNodeIndex; - int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize; - int curIndex = startNodeIndex; - int escapeIndex; - int isLeafNode; - int aabbOverlap; - while (curIndex < endNodeIndex) - { - btQuantizedBvhNode rootNode = quantizedNodes[curIndex]; - aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax); - isLeafNode = isLeaf(&rootNode); - if (aabbOverlap) - { - if (isLeafNode) - { - int triangleIndex = getTriangleIndex(&rootNode); - if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - int pairIdx = atomic_add(numConcavePairsOut,numChildrenB); - for (int b=0;b<numChildrenB;b++) - { - if ((pairIdx+b)<maxNumConcavePairsCapacity) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB); - concavePairsOut[pairIdx+b] = newPair; - } - } - } else - { - int pairIdx = atomic_inc(numConcavePairsOut); - if (pairIdx<maxNumConcavePairsCapacity) - { - int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0); - concavePairsOut[pairIdx] = newPair; - } - } - } - curIndex++; - } else - { - if (isLeafNode) - { - curIndex++; - } else - { - escapeIndex = getEscapeIndex(&rootNode); - curIndex += escapeIndex; - } - } - } - } - } - -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h deleted file mode 100644 index 4b3b49eae8..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ /dev/null @@ -1,258 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* bvhTraversalKernelCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"typedef unsigned int u32;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -" //12 bytes\n" -" unsigned short int m_quantizedAabbMin[3];\n" -" unsigned short int m_quantizedAabbMax[3];\n" -" //4 bytes\n" -" int m_escapeIndexOrTriangleIndex;\n" -"} btQuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -" float4 m_aabbMin;\n" -" float4 m_aabbMax;\n" -" float4 m_quantization;\n" -" int m_numNodes;\n" -" int m_numSubTrees;\n" -" int m_nodeOffset;\n" -" int m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" -"{\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeaf(const btQuantizedBvhNode* rootNode)\n" -"{\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -" \n" -"int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" -"{\n" -" return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -" //12 bytes\n" -" unsigned short int m_quantizedAabbMin[3];\n" -" unsigned short int m_quantizedAabbMax[3];\n" -" //4 bytes, points to the root of the subtree\n" -" int m_rootNodeIndex;\n" -" //4 bytes\n" -" int m_subtreeSize;\n" -" int m_padding[3];\n" -"} btBvhSubtreeInfo;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -" int m_numChildShapes;\n" -" int blaat2;\n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -" \n" -"} btCollidableGpu;\n" -"typedef struct\n" -"{\n" -" float4 m_childPosition;\n" -" float4 m_childOrientation;\n" -" int m_shapeIndex;\n" -" int m_unused0;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} btAabbCL;\n" -"int testQuantizedAabbAgainstQuantizedAabb(\n" -" const unsigned short int* aabbMin1,\n" -" const unsigned short int* aabbMax1,\n" -" const unsigned short int* aabbMin2,\n" -" const unsigned short int* aabbMax2)\n" -"{\n" -" //int overlap = 1;\n" -" if (aabbMin1[0] > aabbMax2[0])\n" -" return 0;\n" -" if (aabbMax1[0] < aabbMin2[0])\n" -" return 0;\n" -" if (aabbMin1[1] > aabbMax2[1])\n" -" return 0;\n" -" if (aabbMax1[1] < aabbMin2[1])\n" -" return 0;\n" -" if (aabbMin1[2] > aabbMax2[2])\n" -" return 0;\n" -" if (aabbMax1[2] < aabbMin2[2])\n" -" return 0;\n" -" return 1;\n" -" //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" -" //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" -" //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" -" //return overlap;\n" -"}\n" -"void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" -"{\n" -" float4 clampedPoint = max(point2,bvhAabbMin);\n" -" clampedPoint = min (clampedPoint, bvhAabbMax);\n" -" float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" -" if (isMax)\n" -" {\n" -" out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" -" out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" -" out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" -" } else\n" -" {\n" -" out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" -" out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" -" out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" -" }\n" -"}\n" -"// work-in-progress\n" -"__kernel void bvhTraversalKernel( __global const int4* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global btAabbCL* aabbs,\n" -" __global int4* concavePairsOut,\n" -" __global volatile int* numConcavePairsOut,\n" -" __global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" -" __global const btQuantizedBvhNode* quantizedNodesRoot,\n" -" __global const b3BvhInfo* bvhInfos,\n" -" int numPairs,\n" -" int maxNumConcavePairsCapacity)\n" -"{\n" -" int id = get_global_id(0);\n" -" if (id>=numPairs)\n" -" return;\n" -" \n" -" int bodyIndexA = pairs[id].x;\n" -" int bodyIndexB = pairs[id].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" //once the broadphase avoids static-static pairs, we can remove this test\n" -" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -" {\n" -" return;\n" -" }\n" -" \n" -" if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" -" return;\n" -" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -" \n" -" if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" -" shapeTypeB!=SHAPE_SPHERE &&\n" -" shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" -" )\n" -" return;\n" -" b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" -" float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" -" float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" -" float4 bvhQuantization = bvhInfo.m_quantization;\n" -" int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" -" __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" -" __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" -" \n" -" unsigned short int quantizedQueryAabbMin[3];\n" -" unsigned short int quantizedQueryAabbMax[3];\n" -" quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" -" quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" -" \n" -" for (int i=0;i<numSubtreeHeaders;i++)\n" -" {\n" -" btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" -" \n" -" int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" -" if (overlap != 0)\n" -" {\n" -" int startNodeIndex = subtree.m_rootNodeIndex;\n" -" int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" -" int curIndex = startNodeIndex;\n" -" int escapeIndex;\n" -" int isLeafNode;\n" -" int aabbOverlap;\n" -" while (curIndex < endNodeIndex)\n" -" {\n" -" btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" -" aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" -" isLeafNode = isLeaf(&rootNode);\n" -" if (aabbOverlap)\n" -" {\n" -" if (isLeafNode)\n" -" {\n" -" int triangleIndex = getTriangleIndex(&rootNode);\n" -" if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -" int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" -" for (int b=0;b<numChildrenB;b++)\n" -" {\n" -" if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" -" {\n" -" int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -" int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" -" concavePairsOut[pairIdx+b] = newPair;\n" -" }\n" -" }\n" -" } else\n" -" {\n" -" int pairIdx = atomic_inc(numConcavePairsOut);\n" -" if (pairIdx<maxNumConcavePairsCapacity)\n" -" {\n" -" int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" -" concavePairsOut[pairIdx] = newPair;\n" -" }\n" -" }\n" -" } \n" -" curIndex++;\n" -" } else\n" -" {\n" -" if (isLeafNode)\n" -" {\n" -" curIndex++;\n" -" } else\n" -" {\n" -" escapeIndex = getEscapeIndex(&rootNode);\n" -" curIndex += escapeIndex;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl deleted file mode 100644 index e754f4e1da..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl +++ /dev/null @@ -1,311 +0,0 @@ - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#define AppendInc(x, out) out = atomic_inc(x) -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w -#ifdef cl_ext_atomic_counters_32 - #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else - #define counter32_t volatile __global int* -#endif - - -__kernel void mprPenetrationKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int contactCapacity, - int numPairs) -{ - int i = get_global_id(0); - int pairIndex = i; - if (i<numPairs) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - return; - } - - float depthOut; - b3Float4 dirOut; - b3Float4 posOut; - - - int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut); - - - - - - if (res==0) - { - //add a contact - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx<contactCapacity) - { - pairs[pairIndex].z = dstIdx; - __global struct b3Contact4Data* c = globalContactsOut + dstIdx; - c->m_worldNormalOnB = -dirOut;//normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - //for (int i=0;i<nContacts;i++) - posOut.w = -depthOut; - c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]]; - GET_NPOINTS(*c) = 1;//nContacts; - } - } - - } -} - -typedef float4 Quaternion; -#define make_float4 (float4) - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - -inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - - -bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - float4* sep, - float* dmin) -{ - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test unit sphere directions - for (int i=0;i<numUnitSphereDirections;i++) - { - - float4 crossje; - crossje = unitSphereDirections[i]; - - if (dot3F4(DeltaC2,crossje)>0) - crossje *= -1.f; - { - float dist; - bool result = true; - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* unitSphereDirections, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global float* dmins, - int numUnitSphereDirections, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = dmins[i]; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = separatingNormals[i]; - - int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; - if (numEdgeEdgeDirections>numUnitSphereDirections) - { - bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - } //if (hasSeparatingAxis[i]) - }//(i<numPairs) -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h deleted file mode 100644 index 7ed4b382c3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h +++ /dev/null @@ -1,1446 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* mprKernelsCL= \ -"/***\n" -" * ---------------------------------\n" -" * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" -" *\n" -" * This file was ported from mpr.c file, part of libccd.\n" -" * The Minkoski Portal Refinement implementation was ported \n" -" * to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" -" * at http://github.com/erwincoumans/bullet3\n" -" *\n" -" * Distributed under the OSI-approved BSD License (the \"License\");\n" -" * see <http://www.opensource.org/licenses/bsd-license.php>.\n" -" * This software is distributed WITHOUT ANY WARRANTY; without even the\n" -" * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" -" * See the License for more information.\n" -" */\n" -"#ifndef B3_MPR_PENETRATION_H\n" -"#define B3_MPR_PENETRATION_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" -"#define B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"typedef struct b3GpuFace b3GpuFace_t;\n" -"struct b3GpuFace\n" -"{\n" -" b3Float4 m_plane;\n" -" int m_indexOffset;\n" -" int m_numIndices;\n" -" int m_unusedPadding1;\n" -" int m_unusedPadding2;\n" -"};\n" -"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" -"struct b3ConvexPolyhedronData\n" -"{\n" -" b3Float4 m_localCenter;\n" -" b3Float4 m_extents;\n" -" b3Float4 mC;\n" -" b3Float4 mE;\n" -" float m_radius;\n" -" int m_faceOffset;\n" -" int m_numFaces;\n" -" int m_numVertices;\n" -" int m_vertexOffset;\n" -" int m_uniqueEdgesOffset;\n" -" int m_numUniqueEdges;\n" -" int m_unused;\n" -"};\n" -"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -" SHAPE_HEIGHT_FIELD=1,\n" -" SHAPE_CONVEX_HULL=3,\n" -" SHAPE_PLANE=4,\n" -" SHAPE_CONCAVE_TRIMESH=5,\n" -" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -" SHAPE_SPHERE=7,\n" -" MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" union\n" -" {\n" -" float m_radius;\n" -" int m_compoundBvhIndex;\n" -" };\n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -" b3Float4 m_childPosition;\n" -" b3Quat m_childOrientation;\n" -" int m_shapeIndex;\n" -" int m_unused0;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -" int m_bodyIndexA;\n" -" int m_bodyIndexB;\n" -"// int m_pairType;\n" -" int m_childShapeIndexA;\n" -" int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define B3_MPR_SQRT sqrt\n" -"#endif\n" -"#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" -"#define B3_MPR_FABS fabs\n" -"#define B3_MPR_TOLERANCE 1E-6f\n" -"#define B3_MPR_MAX_ITERATIONS 1000\n" -"struct _b3MprSupport_t \n" -"{\n" -" b3Float4 v; //!< Support point in minkowski sum\n" -" b3Float4 v1; //!< Support point in obj1\n" -" b3Float4 v2; //!< Support point in obj2\n" -"};\n" -"typedef struct _b3MprSupport_t b3MprSupport_t;\n" -"struct _b3MprSimplex_t \n" -"{\n" -" b3MprSupport_t ps[4];\n" -" int last; //!< index of last added point\n" -"};\n" -"typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" -"inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" -"{\n" -" return &s->ps[idx];\n" -"}\n" -"inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" -"{\n" -" s->last = size - 1;\n" -"}\n" -"inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" -"{\n" -" return s->last + 1;\n" -"}\n" -"inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" -"{\n" -" // here is no check on boundaries\n" -" return &s->ps[idx];\n" -"}\n" -"inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" -"{\n" -" *d = *s;\n" -"}\n" -"inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" -"{\n" -" b3MprSupportCopy(s->ps + pos, a);\n" -"}\n" -"inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" -"{\n" -" b3MprSupport_t supp;\n" -" b3MprSupportCopy(&supp, &s->ps[pos1]);\n" -" b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" -" b3MprSupportCopy(&s->ps[pos2], &supp);\n" -"}\n" -"inline int b3MprIsZero(float val)\n" -"{\n" -" return B3_MPR_FABS(val) < FLT_EPSILON;\n" -"}\n" -"inline int b3MprEq(float _a, float _b)\n" -"{\n" -" float ab;\n" -" float a, b;\n" -" ab = B3_MPR_FABS(_a - _b);\n" -" if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" -" return 1;\n" -" a = B3_MPR_FABS(_a);\n" -" b = B3_MPR_FABS(_b);\n" -" if (b > a){\n" -" return ab < FLT_EPSILON * b;\n" -" }else{\n" -" return ab < FLT_EPSILON * a;\n" -" }\n" -"}\n" -"inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" -"{\n" -" return b3MprEq((*a).x, (*b).x)\n" -" && b3MprEq((*a).y, (*b).y)\n" -" && b3MprEq((*a).z, (*b).z);\n" -"}\n" -"inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, b3ConstArray(b3Float4) verticesA)\n" -"{\n" -" b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" -" float maxDot = -B3_LARGE_FLOAT;\n" -" if( 0 < hull->m_numVertices )\n" -" {\n" -" const b3Float4 scaled = supportVec;\n" -" int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" -" return verticesA[hull->m_vertexOffset+index];\n" -" }\n" -" return supVec;\n" -"}\n" -"B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -" b3ConstArray(b3Collidable_t) cpuCollidables,\n" -" b3ConstArray(b3Float4) cpuVertices,\n" -" __global b3Float4* sepAxis,\n" -" const b3Float4* _dir, b3Float4* outp, int logme)\n" -"{\n" -" //dir is in worldspace, move to local space\n" -" \n" -" b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" -" b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" -" \n" -" b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" -" \n" -" const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" -" \n" -" //find local support vertex\n" -" int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" -" \n" -" b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" -" __global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" -" \n" -" b3Float4 pInA;\n" -" if (logme)\n" -" {\n" -" b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" -" float maxDot = -B3_LARGE_FLOAT;\n" -" if( 0 < hull->m_numVertices )\n" -" {\n" -" const b3Float4 scaled = localDir;\n" -" int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" -" pInA = cpuVertices[hull->m_vertexOffset+index];\n" -" \n" -" }\n" -" } else\n" -" {\n" -" pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" -" }\n" -" //move vertex to world space\n" -" *outp = b3TransformPoint(pInA,pos,orn);\n" -" \n" -"}\n" -"inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -" b3ConstArray(b3Collidable_t) cpuCollidables,\n" -" b3ConstArray(b3Float4) cpuVertices,\n" -" __global b3Float4* sepAxis,\n" -" const b3Float4* _dir, b3MprSupport_t *supp)\n" -"{\n" -" b3Float4 dir;\n" -" dir = *_dir;\n" -" b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" -" dir = *_dir*-1.f;\n" -" b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" -" supp->v = supp->v1 - supp->v2;\n" -"}\n" -"inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" -"{\n" -" center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" -" center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" -" center->v = center->v1 - center->v2;\n" -"}\n" -"inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" -"{\n" -" (*v).x = x;\n" -" (*v).y = y;\n" -" (*v).z = z;\n" -" (*v).w = 0.f;\n" -"}\n" -"inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" -"{\n" -" (*v).x += (*w).x;\n" -" (*v).y += (*w).y;\n" -" (*v).z += (*w).z;\n" -"}\n" -"inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" -"{\n" -" *v = *w;\n" -"}\n" -"inline void b3MprVec3Scale(b3Float4 *d, float k)\n" -"{\n" -" *d *= k;\n" -"}\n" -"inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -" float dot;\n" -" dot = b3Dot3F4(*a,*b);\n" -" return dot;\n" -"}\n" -"inline float b3MprVec3Len2(const b3Float4 *v)\n" -"{\n" -" return b3MprVec3Dot(v, v);\n" -"}\n" -"inline void b3MprVec3Normalize(b3Float4 *d)\n" -"{\n" -" float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" -" b3MprVec3Scale(d, k);\n" -"}\n" -"inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -" *d = b3Cross3(*a,*b);\n" -" \n" -"}\n" -"inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" -"{\n" -" *d = *v - *w;\n" -"}\n" -"inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" -"{\n" -" b3Float4 v2v1, v3v1;\n" -" b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" -" &b3MprSimplexPoint(portal, 1)->v);\n" -" b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" -" &b3MprSimplexPoint(portal, 1)->v);\n" -" b3MprVec3Cross(dir, &v2v1, &v3v1);\n" -" b3MprVec3Normalize(dir);\n" -"}\n" -"inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" -" const b3Float4 *dir)\n" -"{\n" -" float dot;\n" -" dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" -" return b3MprIsZero(dot) || dot > 0.f;\n" -"}\n" -"inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" -" const b3MprSupport_t *v4,\n" -" const b3Float4 *dir)\n" -"{\n" -" float dv1, dv2, dv3, dv4;\n" -" float dot1, dot2, dot3;\n" -" // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" -" dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" -" dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" -" dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" -" dv4 = b3MprVec3Dot(&v4->v, dir);\n" -" dot1 = dv4 - dv1;\n" -" dot2 = dv4 - dv2;\n" -" dot3 = dv4 - dv3;\n" -" dot1 = B3_MPR_FMIN(dot1, dot2);\n" -" dot1 = B3_MPR_FMIN(dot1, dot3);\n" -" return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" -"}\n" -"inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal, \n" -" const b3MprSupport_t *v4,\n" -" const b3Float4 *dir)\n" -"{\n" -" float dot;\n" -" dot = b3MprVec3Dot(&v4->v, dir);\n" -" return b3MprIsZero(dot) || dot > 0.f;\n" -"}\n" -"inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" -" const b3MprSupport_t *v4)\n" -"{\n" -" float dot;\n" -" b3Float4 v4v0;\n" -" b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" -" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" -" if (dot > 0.f){\n" -" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" -" if (dot > 0.f){\n" -" b3MprSimplexSet(portal, 1, v4);\n" -" }else{\n" -" b3MprSimplexSet(portal, 3, v4);\n" -" }\n" -" }else{\n" -" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" -" if (dot > 0.f){\n" -" b3MprSimplexSet(portal, 2, v4);\n" -" }else{\n" -" b3MprSimplexSet(portal, 1, v4);\n" -" }\n" -" }\n" -"}\n" -"B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -" b3ConstArray(b3Collidable_t) cpuCollidables,\n" -" b3ConstArray(b3Float4) cpuVertices,\n" -" __global b3Float4* sepAxis,\n" -" __global int* hasSepAxis,\n" -" b3MprSimplex_t *portal)\n" -"{\n" -" b3Float4 dir, va, vb;\n" -" float dot;\n" -" int cont;\n" -" \n" -" \n" -" // vertex 0 is center of portal\n" -" b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" -" // vertex 0 is center of portal\n" -" b3MprSimplexSetSize(portal, 1);\n" -" \n" -" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -" b3Float4* b3mpr_vec3_origin = &zero;\n" -" if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" -" // Portal's center lies on origin (0,0,0) => we know that objects\n" -" // intersect but we would need to know penetration info.\n" -" // So move center little bit...\n" -" b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" -" b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" -" }\n" -" // vertex 1 = support in direction of origin\n" -" b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" -" b3MprVec3Scale(&dir, -1.f);\n" -" b3MprVec3Normalize(&dir);\n" -" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" -" b3MprSimplexSetSize(portal, 2);\n" -" // test if origin isn't outside of v1\n" -" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" -" \n" -" if (b3MprIsZero(dot) || dot < 0.f)\n" -" return -1;\n" -" // vertex 2\n" -" b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" -" &b3MprSimplexPoint(portal, 1)->v);\n" -" if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" -" if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" -" // origin lies on v1\n" -" return 1;\n" -" }else{\n" -" // origin lies on v0-v1 segment\n" -" return 2;\n" -" }\n" -" }\n" -" b3MprVec3Normalize(&dir);\n" -" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" -" \n" -" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" -" if (b3MprIsZero(dot) || dot < 0.f)\n" -" return -1;\n" -" b3MprSimplexSetSize(portal, 3);\n" -" // vertex 3 direction\n" -" b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -" &b3MprSimplexPoint(portal, 0)->v);\n" -" b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" -" &b3MprSimplexPoint(portal, 0)->v);\n" -" b3MprVec3Cross(&dir, &va, &vb);\n" -" b3MprVec3Normalize(&dir);\n" -" // it is better to form portal faces to be oriented \"outside\" origin\n" -" dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" -" if (dot > 0.f){\n" -" b3MprSimplexSwap(portal, 1, 2);\n" -" b3MprVec3Scale(&dir, -1.f);\n" -" }\n" -" while (b3MprSimplexSize(portal) < 4){\n" -" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" -" \n" -" dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" -" if (b3MprIsZero(dot) || dot < 0.f)\n" -" return -1;\n" -" cont = 0;\n" -" // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" -" // continue\n" -" b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -" &b3MprSimplexPoint(portal, 3)->v);\n" -" dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" -" if (dot < 0.f && !b3MprIsZero(dot)){\n" -" b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" -" cont = 1;\n" -" }\n" -" if (!cont){\n" -" // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" -" // continue\n" -" b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" -" &b3MprSimplexPoint(portal, 2)->v);\n" -" dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" -" if (dot < 0.f && !b3MprIsZero(dot)){\n" -" b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" -" cont = 1;\n" -" }\n" -" }\n" -" if (cont){\n" -" b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" -" &b3MprSimplexPoint(portal, 0)->v);\n" -" b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" -" &b3MprSimplexPoint(portal, 0)->v);\n" -" b3MprVec3Cross(&dir, &va, &vb);\n" -" b3MprVec3Normalize(&dir);\n" -" }else{\n" -" b3MprSimplexSetSize(portal, 4);\n" -" }\n" -" }\n" -" return 0;\n" -"}\n" -"B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -" b3ConstArray(b3Collidable_t) cpuCollidables,\n" -" b3ConstArray(b3Float4) cpuVertices,\n" -" __global b3Float4* sepAxis,\n" -" b3MprSimplex_t *portal)\n" -"{\n" -" b3Float4 dir;\n" -" b3MprSupport_t v4;\n" -" for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" -" //while (1)\n" -" {\n" -" // compute direction outside the portal (from v0 throught v1,v2,v3\n" -" // face)\n" -" b3PortalDir(portal, &dir);\n" -" // test if origin is inside the portal\n" -" if (portalEncapsulesOrigin(portal, &dir))\n" -" return 0;\n" -" // get next support point\n" -" \n" -" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" -" // test if v4 can expand portal to contain origin and if portal\n" -" // expanding doesn't reach given tolerance\n" -" if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" -" || portalReachTolerance(portal, &v4, &dir))\n" -" {\n" -" return -1;\n" -" }\n" -" // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" -" // difference (direction from v0).\n" -" b3ExpandPortal(portal, &v4);\n" -" }\n" -" return -1;\n" -"}\n" -"B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" -"{\n" -" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -" b3Float4* b3mpr_vec3_origin = &zero;\n" -" b3Float4 dir;\n" -" size_t i;\n" -" float b[4], sum, inv;\n" -" b3Float4 vec, p1, p2;\n" -" b3PortalDir(portal, &dir);\n" -" // use barycentric coordinates of tetrahedron to find origin\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" -" &b3MprSimplexPoint(portal, 2)->v);\n" -" b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" -" &b3MprSimplexPoint(portal, 2)->v);\n" -" b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" -" &b3MprSimplexPoint(portal, 1)->v);\n" -" b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" -" &b3MprSimplexPoint(portal, 1)->v);\n" -" b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" -" sum = b[0] + b[1] + b[2] + b[3];\n" -" if (b3MprIsZero(sum) || sum < 0.f){\n" -" b[0] = 0.f;\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" -" &b3MprSimplexPoint(portal, 3)->v);\n" -" b[1] = b3MprVec3Dot(&vec, &dir);\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" -" &b3MprSimplexPoint(portal, 1)->v);\n" -" b[2] = b3MprVec3Dot(&vec, &dir);\n" -" b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" -" &b3MprSimplexPoint(portal, 2)->v);\n" -" b[3] = b3MprVec3Dot(&vec, &dir);\n" -" sum = b[1] + b[2] + b[3];\n" -" }\n" -" inv = 1.f / sum;\n" -" b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" -" b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" -" for (i = 0; i < 4; i++){\n" -" b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" -" b3MprVec3Scale(&vec, b[i]);\n" -" b3MprVec3Add(&p1, &vec);\n" -" b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" -" b3MprVec3Scale(&vec, b[i]);\n" -" b3MprVec3Add(&p2, &vec);\n" -" }\n" -" b3MprVec3Scale(&p1, inv);\n" -" b3MprVec3Scale(&p2, inv);\n" -" b3MprVec3Copy(pos, &p1);\n" -" b3MprVec3Add(pos, &p2);\n" -" b3MprVec3Scale(pos, 0.5);\n" -"}\n" -"inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" -"{\n" -" b3Float4 ab;\n" -" b3MprVec3Sub2(&ab, a, b);\n" -" return b3MprVec3Len2(&ab);\n" -"}\n" -"inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" -" const b3Float4 *x0,\n" -" const b3Float4 *b,\n" -" b3Float4 *witness)\n" -"{\n" -" // The computation comes from solving equation of segment:\n" -" // S(t) = x0 + t.d\n" -" // where - x0 is initial point of segment\n" -" // - d is direction of segment from x0 (|d| > 0)\n" -" // - t belongs to <0, 1> interval\n" -" // \n" -" // Than, distance from a segment to some point P can be expressed:\n" -" // D(t) = |x0 + t.d - P|^2\n" -" // which is distance from any point on segment. Minimization\n" -" // of this function brings distance from P to segment.\n" -" // Minimization of D(t) leads to simple quadratic equation that's\n" -" // solving is straightforward.\n" -" //\n" -" // Bonus of this method is witness point for free.\n" -" float dist, t;\n" -" b3Float4 d, a;\n" -" // direction of segment\n" -" b3MprVec3Sub2(&d, b, x0);\n" -" // precompute vector from P to x0\n" -" b3MprVec3Sub2(&a, x0, P);\n" -" t = -1.f * b3MprVec3Dot(&a, &d);\n" -" t /= b3MprVec3Len2(&d);\n" -" if (t < 0.f || b3MprIsZero(t)){\n" -" dist = b3MprVec3Dist2(x0, P);\n" -" if (witness)\n" -" b3MprVec3Copy(witness, x0);\n" -" }else if (t > 1.f || b3MprEq(t, 1.f)){\n" -" dist = b3MprVec3Dist2(b, P);\n" -" if (witness)\n" -" b3MprVec3Copy(witness, b);\n" -" }else{\n" -" if (witness){\n" -" b3MprVec3Copy(witness, &d);\n" -" b3MprVec3Scale(witness, t);\n" -" b3MprVec3Add(witness, x0);\n" -" dist = b3MprVec3Dist2(witness, P);\n" -" }else{\n" -" // recycling variables\n" -" b3MprVec3Scale(&d, t);\n" -" b3MprVec3Add(&d, &a);\n" -" dist = b3MprVec3Len2(&d);\n" -" }\n" -" }\n" -" return dist;\n" -"}\n" -"inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" -" const b3Float4 *x0, const b3Float4 *B,\n" -" const b3Float4 *C,\n" -" b3Float4 *witness)\n" -"{\n" -" // Computation comes from analytic expression for triangle (x0, B, C)\n" -" // T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" -" // Then equation for distance is:\n" -" // D(s, t) = | T(s, t) - P |^2\n" -" // This leads to minimization of quadratic function of two variables.\n" -" // The solution from is taken only if s is between 0 and 1, t is\n" -" // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" -" // computed.\n" -" b3Float4 d1, d2, a;\n" -" float u, v, w, p, q, r;\n" -" float s, t, dist, dist2;\n" -" b3Float4 witness2;\n" -" b3MprVec3Sub2(&d1, B, x0);\n" -" b3MprVec3Sub2(&d2, C, x0);\n" -" b3MprVec3Sub2(&a, x0, P);\n" -" u = b3MprVec3Dot(&a, &a);\n" -" v = b3MprVec3Dot(&d1, &d1);\n" -" w = b3MprVec3Dot(&d2, &d2);\n" -" p = b3MprVec3Dot(&a, &d1);\n" -" q = b3MprVec3Dot(&a, &d2);\n" -" r = b3MprVec3Dot(&d1, &d2);\n" -" s = (q * r - w * p) / (w * v - r * r);\n" -" t = (-s * r - q) / w;\n" -" if ((b3MprIsZero(s) || s > 0.f)\n" -" && (b3MprEq(s, 1.f) || s < 1.f)\n" -" && (b3MprIsZero(t) || t > 0.f)\n" -" && (b3MprEq(t, 1.f) || t < 1.f)\n" -" && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" -" if (witness){\n" -" b3MprVec3Scale(&d1, s);\n" -" b3MprVec3Scale(&d2, t);\n" -" b3MprVec3Copy(witness, x0);\n" -" b3MprVec3Add(witness, &d1);\n" -" b3MprVec3Add(witness, &d2);\n" -" dist = b3MprVec3Dist2(witness, P);\n" -" }else{\n" -" dist = s * s * v;\n" -" dist += t * t * w;\n" -" dist += 2.f * s * t * r;\n" -" dist += 2.f * s * p;\n" -" dist += 2.f * t * q;\n" -" dist += u;\n" -" }\n" -" }else{\n" -" dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" -" dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" -" if (dist2 < dist){\n" -" dist = dist2;\n" -" if (witness)\n" -" b3MprVec3Copy(witness, &witness2);\n" -" }\n" -" dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" -" if (dist2 < dist){\n" -" dist = dist2;\n" -" if (witness)\n" -" b3MprVec3Copy(witness, &witness2);\n" -" }\n" -" }\n" -" return dist;\n" -"}\n" -"B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" -" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -" b3ConstArray(b3Collidable_t) cpuCollidables,\n" -" b3ConstArray(b3Float4) cpuVertices,\n" -" __global b3Float4* sepAxis,\n" -" b3MprSimplex_t *portal,\n" -" float *depth, b3Float4 *pdir, b3Float4 *pos)\n" -"{\n" -" b3Float4 dir;\n" -" b3MprSupport_t v4;\n" -" unsigned long iterations;\n" -" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -" b3Float4* b3mpr_vec3_origin = &zero;\n" -" iterations = 1UL;\n" -" for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" -" //while (1)\n" -" {\n" -" // compute portal direction and obtain next support point\n" -" b3PortalDir(portal, &dir);\n" -" \n" -" b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" -" // reached tolerance -> find penetration info\n" -" if (portalReachTolerance(portal, &v4, &dir)\n" -" || iterations ==B3_MPR_MAX_ITERATIONS)\n" -" {\n" -" *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" -" *depth = B3_MPR_SQRT(*depth);\n" -" \n" -" if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" -" {\n" -" \n" -" *pdir = dir;\n" -" } \n" -" b3MprVec3Normalize(pdir);\n" -" \n" -" // barycentric coordinates:\n" -" b3FindPos(portal, pos);\n" -" return;\n" -" }\n" -" b3ExpandPortal(portal, &v4);\n" -" iterations++;\n" -" }\n" -"}\n" -"B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" -"{\n" -" // Touching contact on portal's v1 - so depth is zero and direction\n" -" // is unimportant and pos can be guessed\n" -" *depth = 0.f;\n" -" b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" -" b3Float4* b3mpr_vec3_origin = &zero;\n" -" b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" -" b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" -" b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" -" b3MprVec3Scale(pos, 0.5);\n" -"}\n" -"B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" -" float *depth, b3Float4 *dir, b3Float4 *pos)\n" -"{\n" -" \n" -" // Origin lies on v0-v1 segment.\n" -" // Depth is distance to v1, direction also and position must be\n" -" // computed\n" -" b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" -" b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" -" b3MprVec3Scale(pos, 0.5f);\n" -" \n" -" b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" -" *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" -" b3MprVec3Normalize(dir);\n" -"}\n" -"inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" -" b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" -" b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" -" b3ConstArray(b3Collidable_t) cpuCollidables,\n" -" b3ConstArray(b3Float4) cpuVertices,\n" -" __global b3Float4* sepAxis,\n" -" __global int* hasSepAxis,\n" -" float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" -"{\n" -" \n" -" b3MprSimplex_t portal;\n" -" \n" -"// if (!hasSepAxis[pairIndex])\n" -" // return -1;\n" -" \n" -" hasSepAxis[pairIndex] = 0;\n" -" int res;\n" -" // Phase 1: Portal discovery\n" -" res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" -" \n" -" \n" -" //sepAxis[pairIndex] = *pdir;//or -dir?\n" -" switch (res)\n" -" {\n" -" case 0:\n" -" {\n" -" // Phase 2: Portal refinement\n" -" \n" -" res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" -" if (res < 0)\n" -" return -1;\n" -" // Phase 3. Penetration info\n" -" b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" -" hasSepAxis[pairIndex] = 1;\n" -" sepAxis[pairIndex] = -*dirOut;\n" -" break;\n" -" }\n" -" case 1:\n" -" {\n" -" // Touching contact on portal's v1.\n" -" b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" -" break;\n" -" }\n" -" case 2:\n" -" {\n" -" \n" -" b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" -" break;\n" -" }\n" -" default:\n" -" {\n" -" hasSepAxis[pairIndex]=0;\n" -" //if (res < 0)\n" -" //{\n" -" // Origin isn't inside portal - no collision.\n" -" return -1;\n" -" //}\n" -" }\n" -" };\n" -" \n" -" return 0;\n" -"};\n" -"#endif //B3_MPR_PENETRATION_H\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"#ifdef cl_ext_atomic_counters_32\n" -" #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -" #define counter32_t volatile __global int*\n" -"#endif\n" -"__kernel void mprPenetrationKernel( __global int4* pairs,\n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const b3Collidable_t* collidables,\n" -" __global const b3ConvexPolyhedronData_t* convexShapes, \n" -" __global const float4* vertices,\n" -" __global float4* separatingNormals,\n" -" __global int* hasSeparatingAxis,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int contactCapacity,\n" -" int numPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" if (i<numPairs)\n" -" {\n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" //once the broadphase avoids static-static pairs, we can remove this test\n" -" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -" {\n" -" return;\n" -" }\n" -" \n" -" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -" {\n" -" return;\n" -" }\n" -" float depthOut;\n" -" b3Float4 dirOut;\n" -" b3Float4 posOut;\n" -" int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" -" \n" -" \n" -" \n" -" \n" -" if (res==0)\n" -" {\n" -" //add a contact\n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" if (dstIdx<contactCapacity)\n" -" {\n" -" pairs[pairIndex].z = dstIdx;\n" -" __global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" -" c->m_worldNormalOnB = -dirOut;//normal;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = pairs[pairIndex].x;\n" -" int bodyB = pairs[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" //for (int i=0;i<nContacts;i++)\n" -" posOut.w = -depthOut;\n" -" c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" -" GET_NPOINTS(*c) = 1;//nContacts;\n" -" }\n" -" }\n" -" }\n" -"}\n" -"typedef float4 Quaternion;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -" return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -" min[0] = FLT_MAX;\n" -" max[0] = -FLT_MAX;\n" -" int numVerts = hull->m_numVertices;\n" -" const float4 localDir = qtInvRotate(orn,*dir);\n" -" float offset = dot(pos,*dir);\n" -" for(int i=0;i<numVerts;i++)\n" -" {\n" -" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -" if(dp < min[0]) \n" -" min[0] = dp;\n" -" if(dp > max[0]) \n" -" max[0] = dp;\n" -" }\n" -" if(min[0]>max[0])\n" -" {\n" -" float tmp = min[0];\n" -" min[0] = max[0];\n" -" max[0] = tmp;\n" -" }\n" -" min[0] += offset;\n" -" max[0] += offset;\n" -"}\n" -"bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" __global const float4* vertices,\n" -" __global const float4* unitSphereDirections,\n" -" int numUnitSphereDirections,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" \n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" int curEdgeEdge = 0;\n" -" // Test unit sphere directions\n" -" for (int i=0;i<numUnitSphereDirections;i++)\n" -" {\n" -" float4 crossje;\n" -" crossje = unitSphereDirections[i]; \n" -" if (dot3F4(DeltaC2,crossje)>0)\n" -" crossje *= -1.f;\n" -" {\n" -" float dist;\n" -" bool result = true;\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -" \n" -" if(Max0<Min1 || Max1<Min0)\n" -" return false;\n" -" \n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" dist = d0<d1 ? d0:d1;\n" -" result = true;\n" -" \n" -" if(dist<*dmin)\n" -" {\n" -" *dmin = dist;\n" -" *sep = crossje;\n" -" }\n" -" }\n" -" }\n" -" \n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const b3Collidable_t* collidables,\n" -" __global const b3ConvexPolyhedronData_t* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* unitSphereDirections,\n" -" __global float4* separatingNormals,\n" -" __global int* hasSeparatingAxis,\n" -" __global float* dmins,\n" -" int numUnitSphereDirections,\n" -" int numPairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" if (hasSeparatingAxis[i])\n" -" {\n" -" \n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" \n" -" float dmin = dmins[i];\n" -" \n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" float4 sepNormal = separatingNormals[i];\n" -" \n" -" int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" -" if (numEdgeEdgeDirections>numUnitSphereDirections)\n" -" {\n" -" bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" -" if (!sepEE)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis[i] = 1;\n" -" separatingNormals[i] = sepNormal;\n" -" }\n" -" }\n" -" } //if (hasSeparatingAxis[i])\n" -" }//(i<numPairs)\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl deleted file mode 100644 index 9c9e920f13..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl +++ /dev/null @@ -1,1374 +0,0 @@ -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_PLANE 4 -#define SHAPE_CONCAVE_TRIMESH 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - -#define max2 max -#define min2 min - -typedef unsigned int u32; - - - - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -///keep this in sync with btCollidable.h -typedef struct -{ - int m_numChildShapes; - float m_radius; - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; - -} ConvexPolyhedronCL; - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -//#define dot3F4 dot - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - -void trMul(float4 translationA, Quaternion orientationA, - float4 translationB, Quaternion orientationB, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtMul(orientationA,orientationB); - *translationOut = transform(&translationB,&translationA,&orientationA); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - - -__inline float4 lerp3(const float4 a,const float4 b, float t) -{ - return make_float4( a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - - -float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace) -{ - float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0); - float dist = dot3F4(n, point) + planeEqn.w; - *closestPointOnFace = point - dist * n; - return dist; -} - - - -inline bool IsPointInPolygon(float4 p, - const btGpuFace* face, - __global const float4* baseVertex, - __global const int* convexIndices, - float4* out) -{ - float4 a; - float4 b; - float4 ab; - float4 ap; - float4 v; - - float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); - - if (face->m_numIndices<2) - return false; - - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; - - b = v0; - - for(unsigned i=0; i != face->m_numIndices; ++i) - { - a = b; - float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; - b = vi; - ab = b-a; - ap = p-a; - v = cross3(ab,plane); - - if (dot(ap, v) > 0.f) - { - float ab_m2 = dot(ab, ab); - float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; - out[0].x = s * a.x + rt * b.x; - out[0].y = s * a.y + rt * b.y; - out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; -} - - - - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* convexVertices, - __global const int* convexIndices, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 spherePos2, - float radius, - float4 pos, - float4 quat - ) -{ - - float4 invPos; - float4 invOrn; - - trInverse(pos,quat, &invPos,&invOrn); - - float4 spherePos = transform(&spherePos2,&invPos,&invOrn); - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = (float4)(0, 0, 0, 0); - float4 hitNormalWorld = (float4)(0, 0, 0, 0); - float minDist = -1000000.f; - bool bCollide = true; - - for ( int f = 0; f < numFaces; f++ ) - { - btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; - - // set up a plane equation - float4 planeEqn; - float4 n1 = face.m_plane; - n1.w = 0.f; - planeEqn = n1; - planeEqn.w = face.m_plane.w; - - - // compute a signed distance from the vertex in cloth to the face of rigidbody. - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - // If the distance is positive, the plane is a separating plane. - if ( dist > radius ) - { - bCollide = false; - break; - } - - - if (dist>0) - { - //might hit an edge or vertex - float4 out; - float4 zeroPos = make_float4(0,0,0,0); - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist>minDist) - { - minDist = dist; - closestPnt = pntReturn; - hitNormalWorld = planeEqn; - - } - } else - { - float4 tmp = spherePos-out; - float l2 = dot(tmp,tmp); - if (l2<radius*radius) - { - dist = sqrt(l2); - if (dist>minDist) - { - minDist = dist; - closestPnt = out; - hitNormalWorld = tmp/dist; - - } - - } else - { - bCollide = false; - break; - } - } - } else - { - if ( dist > minDist ) - { - minDist = dist; - closestPnt = pntReturn; - hitNormalWorld.xyz = planeEqn.xyz; - } - } - - } - - - - if (bCollide && minDist > -10000) - { - float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); - float4 pOnB1 = transform(&closestPnt,&pos,&quat); - - float actualDepth = minDist-radius; - if (actualDepth<=0.f) - { - - - pOnB1.w = actualDepth; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - - if (1)//dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - GET_NPOINTS(*c) = 1; - } - - } - }//if (hasCollision) - -} - - - -int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0.f); - { - - for (int i=0;i<nPoints;i++) - center += p[i]; - center /= (float)nPoints; - } - - - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} - -#define MAX_PLANE_CONVEX_POINTS 64 - -int computeContactPlaneConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu*collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* convexVertices, - __global const int* convexIndices, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 posB, - Quaternion ornB - ) -{ - int resultIndex=-1; - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex]; - - float4 posA; - posA = rigidBodies[bodyIndexA].m_pos; - Quaternion ornA; - ornA = rigidBodies[bodyIndexA].m_quat; - - int numContactsOut = 0; - int numWorldVertsB1= 0; - - float4 planeEq; - planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); - float4 planeNormalWorld; - planeNormalWorld = qtRotate(ornA,planeNormal); - float planeConstant = planeEq.w; - - float4 invPosA;Quaternion invOrnA; - float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; - { - - trInverse(posA,ornA,&invPosA,&invOrnA); - trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - } - float4 invPosB;Quaternion invOrnB; - float4 planeInConvexPos1; Quaternion planeInConvexOrn1; - { - - trInverse(posB,ornB,&invPosB,&invOrnB); - trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); - } - - - float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal); - float maxDot = -1e30; - int hitVertex=-1; - float4 hitVtx; - - - - float4 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - int4 contactIdx; - contactIdx=make_int4(0,1,2,3); - - - for (int i=0;i<hullB->m_numVertices;i++) - { - float4 vtx = convexVertices[hullB->m_vertexOffset+i]; - float curDot = dot(vtx,planeNormalInConvex); - - - if (curDot>maxDot) - { - hitVertex=i; - maxDot=curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints<MAX_PLANE_CONVEX_POINTS) - { - float4 vtxWorld = transform(&vtx, &posB, &ornB); - float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld; - float dist = dot(planeNormal,vtxInPlane)-planeConstant; - if (dist<0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - - } - - int numReducedPoints = numPoints; - if (numPoints>4) - { - numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - - if (numReducedPoints>0) - { - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - resultIndex = dstIdx; - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - //c->setFrictionCoeff(0.7); - //c->setRestituitionCoeff(0.f); - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - switch (numReducedPoints) - { - case 4: - c->m_worldPosB[3] = contactPoints[contactIdx.w]; - case 3: - c->m_worldPosB[2] = contactPoints[contactIdx.z]; - case 2: - c->m_worldPosB[1] = contactPoints[contactIdx.y]; - case 1: - c->m_worldPosB[0] = contactPoints[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = numReducedPoints; - }//if (dstIdx < numPairs) - } - - return resultIndex; -} - - -void computeContactPlaneSphere(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity) -{ - float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - float radius = collidables[collidableIndexB].m_radius; - float4 posA1 = rigidBodies[bodyIndexA].m_pos; - float4 ornA1 = rigidBodies[bodyIndexA].m_quat; - float4 posB1 = rigidBodies[bodyIndexB].m_pos; - float4 ornB1 = rigidBodies[bodyIndexB].m_quat; - - bool hasCollision = false; - float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); - float planeConstant = planeEq.w; - float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; - { - float4 invPosA;Quaternion invOrnA; - trInverse(posA1,ornA1,&invPosA,&invOrnA); - trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - } - float4 planeInConvexPos1; Quaternion planeInConvexOrn1; - { - float4 invPosB;Quaternion invOrnB; - trInverse(posB1,ornB1,&invPosB,&invOrnB); - trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); - } - float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius; - float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant; - hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold(); - if (hasCollision) - { - float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1; - float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1); - float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1); - float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance; - pOnB1.w = distance; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if (hasCollision) -} - - -__kernel void primitiveContactsKernel( __global int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numPairs, int maxContactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 posB; - posB = rigidBodies[bodyIndexB].m_pos; - Quaternion ornB; - ornB = rigidBodies[bodyIndexB].m_quat; - int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB); - if (contactIndex>=0) - pairs[pairIndex].z = contactIndex; - - return; - } - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - - float4 posA; - posA = rigidBodies[bodyIndexA].m_pos; - Quaternion ornA; - ornA = rigidBodies[bodyIndexA].m_quat; - - - int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); - - if (contactIndex>=0) - pairs[pairIndex].z = contactIndex; - - return; - } - - if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); - return; - } - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - - - computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); - - return; - } - - - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 spherePos = rigidBodies[bodyIndexA].m_pos; - float sphereRadius = collidables[collidableIndexA].m_radius; - float4 convexPos = rigidBodies[bodyIndexB].m_pos; - float4 convexOrn = rigidBodies[bodyIndexB].m_quat; - - computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - - if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = rigidBodies[bodyIndexA].m_pos; - float4 convexOrn = rigidBodies[bodyIndexA].m_quat; - - computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - return; - } - - - - - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - //sphere-sphere - float radiusA = collidables[collidableIndexA].m_radius; - float radiusB = collidables[collidableIndexB].m_radius; - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - float4 diff = posA-posB; - float len = length(diff); - - ///iff distance positive, don't generate a new contact - if ( len <= (radiusA+radiusB)) - { - ///distance (negative means penetration) - float dist = len - (radiusA+radiusB); - float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); - if (len > 0.00001) - { - normalOnSurfaceB = diff / len; - } - float4 contactPosB = posB + normalOnSurfaceB*radiusB; - contactPosB.w = dist; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_worldPosB[0] = contactPosB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if ( len <= (radiusA+radiusB)) - - return; - }//SHAPE_SPHERE SHAPE_SPHERE - - }// if (i<numPairs) - -} - - -// work-in-progress -__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global const btGpuChildShape* gpuChildShapes, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity - ) -{ - - int i = get_global_id(0); - if (i<numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - int pairIndex = i; - if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL)) - { - - computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB); - return; - } - - if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE)) - { - - computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); - return; - } - - if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE)) - { - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = posA; - float4 convexOrn = ornA; - - computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - - if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL)) - { - - float4 spherePos = rigidBodies[bodyIndexA].m_pos; - float sphereRadius = collidables[collidableIndexA].m_radius; - float4 convexPos = posB; - float4 convexOrn = ornB; - - - computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - }// if (i<numCompoundPairs) -} - - -bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p ) -{ - - const float4* p1 = &vertices[0]; - const float4* p2 = &vertices[1]; - const float4* p3 = &vertices[2]; - - float4 edge1; edge1 = (*p2 - *p1); - float4 edge2; edge2 = ( *p3 - *p2 ); - float4 edge3; edge3 = ( *p1 - *p3 ); - - - float4 p1_to_p; p1_to_p = ( *p - *p1 ); - float4 p2_to_p; p2_to_p = ( *p - *p2 ); - float4 p3_to_p; p3_to_p = ( *p - *p3 ); - - float4 edge1_normal; edge1_normal = ( cross(edge1,*normal)); - float4 edge2_normal; edge2_normal = ( cross(edge2,*normal)); - float4 edge3_normal; edge3_normal = ( cross(edge3,*normal)); - - - - float r1, r2, r3; - r1 = dot(edge1_normal,p1_to_p ); - r2 = dot(edge2_normal,p2_to_p ); - r3 = dot(edge3_normal,p3_to_p ); - - if ( r1 > 0 && r2 > 0 && r3 > 0 ) - return true; - if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) - return true; - return false; - -} - - -float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) -{ - float4 diff = p - from; - float4 v = to - from; - float t = dot(v,diff); - - if (t > 0) - { - float dotVV = dot(v,v); - if (t < dotVV) - { - t /= dotVV; - diff -= t*v; - } else - { - t = 1; - diff -= v; - } - } else - { - t = 0; - } - *nearest = from + t*v; - return dot(diff,diff); -} - - -void computeContactSphereTriangle(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - const float4* triangleVertices, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 spherePos2, - float radius, - float4 pos, - float4 quat, - int faceIndex - ) -{ - - float4 invPos; - float4 invOrn; - - trInverse(pos,quat, &invPos,&invOrn); - float4 spherePos = transform(&spherePos2,&invPos,&invOrn); - int numFaces = 3; - float4 closestPnt = (float4)(0, 0, 0, 0); - float4 hitNormalWorld = (float4)(0, 0, 0, 0); - float minDist = -1000000.f; - bool bCollide = false; - - - ////////////////////////////////////// - - float4 sphereCenter; - sphereCenter = spherePos; - - const float4* vertices = triangleVertices; - float contactBreakingThreshold = 0.f;//todo? - float radiusWithThreshold = radius + contactBreakingThreshold; - float4 edge10; - edge10 = vertices[1]-vertices[0]; - edge10.w = 0.f;//is this needed? - float4 edge20; - edge20 = vertices[2]-vertices[0]; - edge20.w = 0.f;//is this needed? - float4 normal = cross3(edge10,edge20); - normal = normalize(normal); - float4 p1ToCenter; - p1ToCenter = sphereCenter - vertices[0]; - - float distanceFromPlane = dot(p1ToCenter,normal); - - if (distanceFromPlane < 0.f) - { - //triangle facing the other way - distanceFromPlane *= -1.f; - normal *= -1.f; - } - hitNormalWorld = normal; - - bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold; - - // Check for contact / intersection - bool hasContact = false; - float4 contactPoint; - if (isInsideContactPlane) - { - - if (pointInTriangle(vertices,&normal, &sphereCenter)) - { - // Inside the contact wedge - touches a point on the shell plane - hasContact = true; - contactPoint = sphereCenter - normal*distanceFromPlane; - - } else { - // Could be inside one of the contact capsules - float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold; - float4 nearestOnEdge; - int numEdges = 3; - for (int i = 0; i < numEdges; i++) - { - float4 pa =vertices[i]; - float4 pb = vertices[(i+1)%3]; - - float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge); - if (distanceSqr < contactCapsuleRadiusSqr) - { - // Yep, we're inside a capsule - hasContact = true; - contactPoint = nearestOnEdge; - - } - - } - } - } - - if (hasContact) - { - - closestPnt = contactPoint; - float4 contactToCenter = sphereCenter - contactPoint; - minDist = length(contactToCenter); - if (minDist>FLT_EPSILON) - { - hitNormalWorld = normalize(contactToCenter);//*(1./minDist); - bCollide = true; - } - - } - - - ///////////////////////////////////// - - if (bCollide && minDist > -10000) - { - - float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); - float4 pOnB1 = transform(&closestPnt,&pos,&quat); - float actualDepth = minDist-radius; - - - if (actualDepth<=0.f) - { - pOnB1.w = actualDepth; - int dstIdx; - - - float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1); - if (lenSqr>FLT_EPSILON) - { - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - - c->m_childIndexA = -1; - c->m_childIndexB = faceIndex; - - GET_NPOINTS(*c) = 1; - } - } - - } - }//if (hasCollision) - -} - - - -// work-in-progress -__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numConcavePairs, int maxContactCapacity - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE) - { - int f = concavePairs[i].z; - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - } - - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = rigidBodies[bodyIndexA].m_pos; - float4 convexOrn = rigidBodies[bodyIndexA].m_quat; - - computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, - rigidBodies,collidables, - verticesA, - globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn, f); - - return; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h deleted file mode 100644 index b0103fe674..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ /dev/null @@ -1,1289 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* primitiveContactsKernelsCL= \ -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_PLANE 4\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define max2 max\n" -"#define min2 min\n" -"typedef unsigned int u32;\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} btAabbCL;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -" int m_numChildShapes;\n" -" float m_radius;\n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -" \n" -"} btCollidableGpu;\n" -"typedef struct\n" -"{\n" -" float4 m_childPosition;\n" -" float4 m_childOrientation;\n" -" int m_shapeIndex;\n" -" int m_unused0;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"} btGpuChildShape;\n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_collidableIdx; \n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct \n" -"{\n" -" float4 m_localCenter;\n" -" float4 m_extents;\n" -" float4 mC;\n" -" float4 mE;\n" -" \n" -" float m_radius;\n" -" int m_faceOffset;\n" -" int m_numFaces;\n" -" int m_numVertices;\n" -" \n" -" int m_vertexOffset;\n" -" int m_uniqueEdgesOffset;\n" -" int m_numUniqueEdges;\n" -" int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct\n" -"{\n" -" float4 m_plane;\n" -" int m_indexOffset;\n" -" int m_numIndices;\n" -"} btGpuFace;\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"//#define dot3F4 dot\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -" return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"void trInverse(float4 translationIn, Quaternion orientationIn,\n" -" float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -" *orientationOut = qtInvert(orientationIn);\n" -" *translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"void trMul(float4 translationA, Quaternion orientationA,\n" -" float4 translationB, Quaternion orientationB,\n" -" float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -" *orientationOut = qtMul(orientationA,orientationB);\n" -" *translationOut = transform(&translationB,&translationA,&orientationA);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"}\n" -"__inline float4 lerp3(const float4 a,const float4 b, float t)\n" -"{\n" -" return make_float4( a.x + (b.x - a.x) * t,\n" -" a.y + (b.y - a.y) * t,\n" -" a.z + (b.z - a.z) * t,\n" -" 0.f);\n" -"}\n" -"float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" -"{\n" -" float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" -" float dist = dot3F4(n, point) + planeEqn.w;\n" -" *closestPointOnFace = point - dist * n;\n" -" return dist;\n" -"}\n" -"inline bool IsPointInPolygon(float4 p, \n" -" const btGpuFace* face,\n" -" __global const float4* baseVertex,\n" -" __global const int* convexIndices,\n" -" float4* out)\n" -"{\n" -" float4 a;\n" -" float4 b;\n" -" float4 ab;\n" -" float4 ap;\n" -" float4 v;\n" -" float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" -" \n" -" if (face->m_numIndices<2)\n" -" return false;\n" -" \n" -" float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" -" \n" -" b = v0;\n" -" for(unsigned i=0; i != face->m_numIndices; ++i)\n" -" {\n" -" a = b;\n" -" float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" -" b = vi;\n" -" ab = b-a;\n" -" ap = p-a;\n" -" v = cross3(ab,plane);\n" -" if (dot(ap, v) > 0.f)\n" -" {\n" -" float ab_m2 = dot(ab, ab);\n" -" float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" -" if (rt <= 0.f)\n" -" {\n" -" *out = a;\n" -" }\n" -" else if (rt >= 1.f) \n" -" {\n" -" *out = b;\n" -" }\n" -" else\n" -" {\n" -" float s = 1.f - rt;\n" -" out[0].x = s * a.x + rt * b.x;\n" -" out[0].y = s * a.y + rt * b.y;\n" -" out[0].z = s * a.z + rt * b.z;\n" -" }\n" -" return false;\n" -" }\n" -" }\n" -" return true;\n" -"}\n" -"void computeContactSphereConvex(int pairIndex,\n" -" int bodyIndexA, int bodyIndexB, \n" -" int collidableIndexA, int collidableIndexB, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes,\n" -" __global const float4* convexVertices,\n" -" __global const int* convexIndices,\n" -" __global const btGpuFace* faces,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int maxContactCapacity,\n" -" float4 spherePos2,\n" -" float radius,\n" -" float4 pos,\n" -" float4 quat\n" -" )\n" -"{\n" -" float4 invPos;\n" -" float4 invOrn;\n" -" trInverse(pos,quat, &invPos,&invOrn);\n" -" float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -" int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" -" int numFaces = convexShapes[shapeIndex].m_numFaces;\n" -" float4 closestPnt = (float4)(0, 0, 0, 0);\n" -" float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" -" float minDist = -1000000.f;\n" -" bool bCollide = true;\n" -" for ( int f = 0; f < numFaces; f++ )\n" -" {\n" -" btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" -" // set up a plane equation \n" -" float4 planeEqn;\n" -" float4 n1 = face.m_plane;\n" -" n1.w = 0.f;\n" -" planeEqn = n1;\n" -" planeEqn.w = face.m_plane.w;\n" -" \n" -" \n" -" // compute a signed distance from the vertex in cloth to the face of rigidbody.\n" -" float4 pntReturn;\n" -" float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" -" // If the distance is positive, the plane is a separating plane. \n" -" if ( dist > radius )\n" -" {\n" -" bCollide = false;\n" -" break;\n" -" }\n" -" if (dist>0)\n" -" {\n" -" //might hit an edge or vertex\n" -" float4 out;\n" -" float4 zeroPos = make_float4(0,0,0,0);\n" -" bool isInPoly = IsPointInPolygon(spherePos,\n" -" &face,\n" -" &convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" -" convexIndices,\n" -" &out);\n" -" if (isInPoly)\n" -" {\n" -" if (dist>minDist)\n" -" {\n" -" minDist = dist;\n" -" closestPnt = pntReturn;\n" -" hitNormalWorld = planeEqn;\n" -" \n" -" }\n" -" } else\n" -" {\n" -" float4 tmp = spherePos-out;\n" -" float l2 = dot(tmp,tmp);\n" -" if (l2<radius*radius)\n" -" {\n" -" dist = sqrt(l2);\n" -" if (dist>minDist)\n" -" {\n" -" minDist = dist;\n" -" closestPnt = out;\n" -" hitNormalWorld = tmp/dist;\n" -" \n" -" }\n" -" \n" -" } else\n" -" {\n" -" bCollide = false;\n" -" break;\n" -" }\n" -" }\n" -" } else\n" -" {\n" -" if ( dist > minDist )\n" -" {\n" -" minDist = dist;\n" -" closestPnt = pntReturn;\n" -" hitNormalWorld.xyz = planeEqn.xyz;\n" -" }\n" -" }\n" -" \n" -" }\n" -" \n" -" if (bCollide && minDist > -10000)\n" -" {\n" -" float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" -" float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" -" \n" -" float actualDepth = minDist-radius;\n" -" if (actualDepth<=0.f)\n" -" {\n" -" \n" -" pOnB1.w = actualDepth;\n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" \n" -" \n" -" if (1)//dstIdx < maxContactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -" c->m_worldPosB[0] = pOnB1;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" GET_NPOINTS(*c) = 1;\n" -" } \n" -" }\n" -" }//if (hasCollision)\n" -"}\n" -" \n" -"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" -"{\n" -" if( nPoints == 0 )\n" -" return 0;\n" -" \n" -" if (nPoints <=4)\n" -" return nPoints;\n" -" \n" -" \n" -" if (nPoints >64)\n" -" nPoints = 64;\n" -" \n" -" float4 center = make_float4(0.f);\n" -" {\n" -" \n" -" for (int i=0;i<nPoints;i++)\n" -" center += p[i];\n" -" center /= (float)nPoints;\n" -" }\n" -" \n" -" \n" -" \n" -" // sample 4 directions\n" -" \n" -" float4 aVector = p[0] - center;\n" -" float4 u = cross3( nearNormal, aVector );\n" -" float4 v = cross3( nearNormal, u );\n" -" u = normalize3( u );\n" -" v = normalize3( v );\n" -" \n" -" \n" -" //keep point with deepest penetration\n" -" float minW= FLT_MAX;\n" -" \n" -" int minIndex=-1;\n" -" \n" -" float4 maxDots;\n" -" maxDots.x = FLT_MIN;\n" -" maxDots.y = FLT_MIN;\n" -" maxDots.z = FLT_MIN;\n" -" maxDots.w = FLT_MIN;\n" -" \n" -" // idx, distance\n" -" for(int ie = 0; ie<nPoints; ie++ )\n" -" {\n" -" if (p[ie].w<minW)\n" -" {\n" -" minW = p[ie].w;\n" -" minIndex=ie;\n" -" }\n" -" float f;\n" -" float4 r = p[ie]-center;\n" -" f = dot3F4( u, r );\n" -" if (f<maxDots.x)\n" -" {\n" -" maxDots.x = f;\n" -" contactIdx[0].x = ie;\n" -" }\n" -" \n" -" f = dot3F4( -u, r );\n" -" if (f<maxDots.y)\n" -" {\n" -" maxDots.y = f;\n" -" contactIdx[0].y = ie;\n" -" }\n" -" \n" -" \n" -" f = dot3F4( v, r );\n" -" if (f<maxDots.z)\n" -" {\n" -" maxDots.z = f;\n" -" contactIdx[0].z = ie;\n" -" }\n" -" \n" -" f = dot3F4( -v, r );\n" -" if (f<maxDots.w)\n" -" {\n" -" maxDots.w = f;\n" -" contactIdx[0].w = ie;\n" -" }\n" -" \n" -" }\n" -" \n" -" if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" -" {\n" -" //replace the first contact with minimum (todo: replace contact with least penetration)\n" -" contactIdx[0].x = minIndex;\n" -" }\n" -" \n" -" return 4;\n" -" \n" -"}\n" -"#define MAX_PLANE_CONVEX_POINTS 64\n" -"int computeContactPlaneConvex(int pairIndex,\n" -" int bodyIndexA, int bodyIndexB, \n" -" int collidableIndexA, int collidableIndexB, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu*collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes,\n" -" __global const float4* convexVertices,\n" -" __global const int* convexIndices,\n" -" __global const btGpuFace* faces,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int maxContactCapacity,\n" -" float4 posB,\n" -" Quaternion ornB\n" -" )\n" -"{\n" -" int resultIndex=-1;\n" -" int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" -" __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" -" \n" -" float4 posA;\n" -" posA = rigidBodies[bodyIndexA].m_pos;\n" -" Quaternion ornA;\n" -" ornA = rigidBodies[bodyIndexA].m_quat;\n" -" int numContactsOut = 0;\n" -" int numWorldVertsB1= 0;\n" -" float4 planeEq;\n" -" planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" -" float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" -" float4 planeNormalWorld;\n" -" planeNormalWorld = qtRotate(ornA,planeNormal);\n" -" float planeConstant = planeEq.w;\n" -" \n" -" float4 invPosA;Quaternion invOrnA;\n" -" float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" -" {\n" -" \n" -" trInverse(posA,ornA,&invPosA,&invOrnA);\n" -" trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -" }\n" -" float4 invPosB;Quaternion invOrnB;\n" -" float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" -" {\n" -" \n" -" trInverse(posB,ornB,&invPosB,&invOrnB);\n" -" trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); \n" -" }\n" -" \n" -" float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" -" float maxDot = -1e30;\n" -" int hitVertex=-1;\n" -" float4 hitVtx;\n" -" float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" -" int numPoints = 0;\n" -" int4 contactIdx;\n" -" contactIdx=make_int4(0,1,2,3);\n" -" \n" -" \n" -" for (int i=0;i<hullB->m_numVertices;i++)\n" -" {\n" -" float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" -" float curDot = dot(vtx,planeNormalInConvex);\n" -" if (curDot>maxDot)\n" -" {\n" -" hitVertex=i;\n" -" maxDot=curDot;\n" -" hitVtx = vtx;\n" -" //make sure the deepest points is always included\n" -" if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" -" numPoints--;\n" -" }\n" -" if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" -" {\n" -" float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" -" float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" -" float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" -" if (dist<0.f)\n" -" {\n" -" vtxWorld.w = dist;\n" -" contactPoints[numPoints] = vtxWorld;\n" -" numPoints++;\n" -" }\n" -" }\n" -" }\n" -" int numReducedPoints = numPoints;\n" -" if (numPoints>4)\n" -" {\n" -" numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" -" }\n" -" if (numReducedPoints>0)\n" -" {\n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" if (dstIdx < maxContactCapacity)\n" -" {\n" -" resultIndex = dstIdx;\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = -planeNormalWorld;\n" -" //c->setFrictionCoeff(0.7);\n" -" //c->setRestituitionCoeff(0.f);\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" switch (numReducedPoints)\n" -" {\n" -" case 4:\n" -" c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" -" case 3:\n" -" c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" -" case 2:\n" -" c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" -" case 1:\n" -" c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" -" default:\n" -" {\n" -" }\n" -" };\n" -" \n" -" GET_NPOINTS(*c) = numReducedPoints;\n" -" }//if (dstIdx < numPairs)\n" -" } \n" -" return resultIndex;\n" -"}\n" -"void computeContactPlaneSphere(int pairIndex,\n" -" int bodyIndexA, int bodyIndexB, \n" -" int collidableIndexA, int collidableIndexB, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const btGpuFace* faces,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int maxContactCapacity)\n" -"{\n" -" float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" -" float radius = collidables[collidableIndexB].m_radius;\n" -" float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" -" float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" -" float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" -" float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" -" \n" -" bool hasCollision = false;\n" -" float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" -" float planeConstant = planeEq.w;\n" -" float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" -" {\n" -" float4 invPosA;Quaternion invOrnA;\n" -" trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" -" trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -" }\n" -" float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" -" {\n" -" float4 invPosB;Quaternion invOrnB;\n" -" trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" -" trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); \n" -" }\n" -" float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" -" float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" -" float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" -" hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" -" if (hasCollision)\n" -" {\n" -" float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1;\n" -" float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" -" float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" -" float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" -" pOnB1.w = distance;\n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" \n" -" if (dstIdx < maxContactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -" c->m_worldPosB[0] = pOnB1;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" GET_NPOINTS(*c) = 1;\n" -" }//if (dstIdx < numPairs)\n" -" }//if (hasCollision)\n" -"}\n" -"__kernel void primitiveContactsKernel( __global int4* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int numPairs, int maxContactCapacity)\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" float4 worldVertsB1[64];\n" -" float4 worldVertsB2[64];\n" -" int capacityWorldVerts = 64; \n" -" float4 localContactsOut[64];\n" -" int localContactCapacity=64;\n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" if (i<numPairs)\n" -" {\n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" -" {\n" -" float4 posB;\n" -" posB = rigidBodies[bodyIndexB].m_pos;\n" -" Quaternion ornB;\n" -" ornB = rigidBodies[bodyIndexB].m_quat;\n" -" int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,\n" -" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" -" if (contactIndex>=0)\n" -" pairs[pairIndex].z = contactIndex;\n" -" return;\n" -" }\n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" -" {\n" -" float4 posA;\n" -" posA = rigidBodies[bodyIndexA].m_pos;\n" -" Quaternion ornA;\n" -" ornA = rigidBodies[bodyIndexA].m_quat;\n" -" int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,\n" -" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -" if (contactIndex>=0)\n" -" pairs[pairIndex].z = contactIndex;\n" -" return;\n" -" }\n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -" {\n" -" computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -" rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -" return;\n" -" }\n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" -" {\n" -" computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" -" rigidBodies,collidables,\n" -" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" -" return;\n" -" }\n" -" \n" -" \n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" -" {\n" -" \n" -" float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" -" float sphereRadius = collidables[collidableIndexA].m_radius;\n" -" float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" -" float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" -" computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -" spherePos,sphereRadius,convexPos,convexOrn);\n" -" return;\n" -" }\n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -" {\n" -" \n" -" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -" float sphereRadius = collidables[collidableIndexB].m_radius;\n" -" float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" -" float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -" computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -" spherePos,sphereRadius,convexPos,convexOrn);\n" -" return;\n" -" }\n" -" \n" -" \n" -" \n" -" \n" -" \n" -" \n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -" {\n" -" //sphere-sphere\n" -" float radiusA = collidables[collidableIndexA].m_radius;\n" -" float radiusB = collidables[collidableIndexB].m_radius;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" float4 diff = posA-posB;\n" -" float len = length(diff);\n" -" \n" -" ///iff distance positive, don't generate a new contact\n" -" if ( len <= (radiusA+radiusB))\n" -" {\n" -" ///distance (negative means penetration)\n" -" float dist = len - (radiusA+radiusB);\n" -" float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" -" if (len > 0.00001)\n" -" {\n" -" normalOnSurfaceB = diff / len;\n" -" }\n" -" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" -" contactPosB.w = dist;\n" -" \n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" \n" -" if (dstIdx < maxContactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = normalOnSurfaceB;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = pairs[pairIndex].x;\n" -" int bodyB = pairs[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -" c->m_worldPosB[0] = contactPosB;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" GET_NPOINTS(*c) = 1;\n" -" }//if (dstIdx < numPairs)\n" -" }//if ( len <= (radiusA+radiusB))\n" -" return;\n" -" }//SHAPE_SPHERE SHAPE_SPHERE\n" -" }// if (i<numPairs)\n" -"}\n" -"// work-in-progress\n" -"__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" -" __global const btGpuChildShape* gpuChildShapes,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int numCompoundPairs, int maxContactCapacity\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i<numCompoundPairs)\n" -" {\n" -" int bodyIndexA = gpuCompoundPairs[i].x;\n" -" int bodyIndexB = gpuCompoundPairs[i].y;\n" -" int childShapeIndexA = gpuCompoundPairs[i].z;\n" -" int childShapeIndexB = gpuCompoundPairs[i].w;\n" -" \n" -" int collidableIndexA = -1;\n" -" int collidableIndexB = -1;\n" -" \n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" \n" -" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" \n" -" if (childShapeIndexA >= 0)\n" -" {\n" -" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -" float4 newOrnA = qtMul(ornA,childOrnA);\n" -" posA = newPosA;\n" -" ornA = newOrnA;\n" -" } else\n" -" {\n" -" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" }\n" -" \n" -" if (childShapeIndexB>=0)\n" -" {\n" -" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" } else\n" -" {\n" -" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" -" }\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" -" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -" int pairIndex = i;\n" -" if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" -" {\n" -" computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,\n" -" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" -" return;\n" -" }\n" -" if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" -" {\n" -" computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,\n" -" faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" -" return;\n" -" }\n" -" if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" -" {\n" -" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -" float sphereRadius = collidables[collidableIndexB].m_radius;\n" -" float4 convexPos = posA;\n" -" float4 convexOrn = ornA;\n" -" \n" -" computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -" spherePos,sphereRadius,convexPos,convexOrn);\n" -" \n" -" return;\n" -" }\n" -" if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" -" {\n" -" float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" -" float sphereRadius = collidables[collidableIndexA].m_radius;\n" -" float4 convexPos = posB;\n" -" float4 convexOrn = ornB;\n" -" \n" -" computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" -" rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -" spherePos,sphereRadius,convexPos,convexOrn);\n" -" \n" -" return;\n" -" }\n" -" }// if (i<numCompoundPairs)\n" -"}\n" -"bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" -"{\n" -" const float4* p1 = &vertices[0];\n" -" const float4* p2 = &vertices[1];\n" -" const float4* p3 = &vertices[2];\n" -" float4 edge1; edge1 = (*p2 - *p1);\n" -" float4 edge2; edge2 = ( *p3 - *p2 );\n" -" float4 edge3; edge3 = ( *p1 - *p3 );\n" -" \n" -" float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" -" float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" -" float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" -" float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" -" float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" -" float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" -" \n" -" \n" -" float r1, r2, r3;\n" -" r1 = dot(edge1_normal,p1_to_p );\n" -" r2 = dot(edge2_normal,p2_to_p );\n" -" r3 = dot(edge3_normal,p3_to_p );\n" -" \n" -" if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" -" return true;\n" -" if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" -" return true;\n" -" return false;\n" -"}\n" -"float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" -"{\n" -" float4 diff = p - from;\n" -" float4 v = to - from;\n" -" float t = dot(v,diff);\n" -" \n" -" if (t > 0) \n" -" {\n" -" float dotVV = dot(v,v);\n" -" if (t < dotVV) \n" -" {\n" -" t /= dotVV;\n" -" diff -= t*v;\n" -" } else \n" -" {\n" -" t = 1;\n" -" diff -= v;\n" -" }\n" -" } else\n" -" {\n" -" t = 0;\n" -" }\n" -" *nearest = from + t*v;\n" -" return dot(diff,diff); \n" -"}\n" -"void computeContactSphereTriangle(int pairIndex,\n" -" int bodyIndexA, int bodyIndexB,\n" -" int collidableIndexA, int collidableIndexB, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" const float4* triangleVertices,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int maxContactCapacity,\n" -" float4 spherePos2,\n" -" float radius,\n" -" float4 pos,\n" -" float4 quat,\n" -" int faceIndex\n" -" )\n" -"{\n" -" float4 invPos;\n" -" float4 invOrn;\n" -" trInverse(pos,quat, &invPos,&invOrn);\n" -" float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" -" int numFaces = 3;\n" -" float4 closestPnt = (float4)(0, 0, 0, 0);\n" -" float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" -" float minDist = -1000000.f;\n" -" bool bCollide = false;\n" -" \n" -" //////////////////////////////////////\n" -" float4 sphereCenter;\n" -" sphereCenter = spherePos;\n" -" const float4* vertices = triangleVertices;\n" -" float contactBreakingThreshold = 0.f;//todo?\n" -" float radiusWithThreshold = radius + contactBreakingThreshold;\n" -" float4 edge10;\n" -" edge10 = vertices[1]-vertices[0];\n" -" edge10.w = 0.f;//is this needed?\n" -" float4 edge20;\n" -" edge20 = vertices[2]-vertices[0];\n" -" edge20.w = 0.f;//is this needed?\n" -" float4 normal = cross3(edge10,edge20);\n" -" normal = normalize(normal);\n" -" float4 p1ToCenter;\n" -" p1ToCenter = sphereCenter - vertices[0];\n" -" \n" -" float distanceFromPlane = dot(p1ToCenter,normal);\n" -" if (distanceFromPlane < 0.f)\n" -" {\n" -" //triangle facing the other way\n" -" distanceFromPlane *= -1.f;\n" -" normal *= -1.f;\n" -" }\n" -" hitNormalWorld = normal;\n" -" bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" -" \n" -" // Check for contact / intersection\n" -" bool hasContact = false;\n" -" float4 contactPoint;\n" -" if (isInsideContactPlane) \n" -" {\n" -" \n" -" if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" -" {\n" -" // Inside the contact wedge - touches a point on the shell plane\n" -" hasContact = true;\n" -" contactPoint = sphereCenter - normal*distanceFromPlane;\n" -" \n" -" } else {\n" -" // Could be inside one of the contact capsules\n" -" float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" -" float4 nearestOnEdge;\n" -" int numEdges = 3;\n" -" for (int i = 0; i < numEdges; i++) \n" -" {\n" -" float4 pa =vertices[i];\n" -" float4 pb = vertices[(i+1)%3];\n" -" float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" -" if (distanceSqr < contactCapsuleRadiusSqr) \n" -" {\n" -" // Yep, we're inside a capsule\n" -" hasContact = true;\n" -" contactPoint = nearestOnEdge;\n" -" \n" -" }\n" -" \n" -" }\n" -" }\n" -" }\n" -" if (hasContact) \n" -" {\n" -" closestPnt = contactPoint;\n" -" float4 contactToCenter = sphereCenter - contactPoint;\n" -" minDist = length(contactToCenter);\n" -" if (minDist>FLT_EPSILON)\n" -" {\n" -" hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" -" bCollide = true;\n" -" }\n" -" \n" -" }\n" -" /////////////////////////////////////\n" -" if (bCollide && minDist > -10000)\n" -" {\n" -" \n" -" float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" -" float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" -" float actualDepth = minDist-radius;\n" -" \n" -" if (actualDepth<=0.f)\n" -" {\n" -" pOnB1.w = actualDepth;\n" -" int dstIdx;\n" -" \n" -" float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" -" if (lenSqr>FLT_EPSILON)\n" -" {\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" \n" -" if (dstIdx < maxContactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = -normalOnSurfaceB1;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" -" c->m_worldPosB[0] = pOnB1;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = faceIndex;\n" -" GET_NPOINTS(*c) = 1;\n" -" } \n" -" }\n" -" }\n" -" }//if (hasCollision)\n" -"}\n" -"// work-in-progress\n" -"__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n" -" __global const BodyData* rigidBodies,\n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int numConcavePairs, int maxContactCapacity\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConcavePairs)\n" -" return;\n" -" int pairIdx = i;\n" -" int bodyIndexA = concavePairs[i].x;\n" -" int bodyIndexB = concavePairs[i].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" -" {\n" -" int f = concavePairs[i].z;\n" -" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -" \n" -" float4 verticesA[3];\n" -" for (int i=0;i<3;i++)\n" -" {\n" -" int index = indices[face.m_indexOffset+i];\n" -" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -" verticesA[i] = vert;\n" -" }\n" -" float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" -" float sphereRadius = collidables[collidableIndexB].m_radius;\n" -" float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" -" float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" -" computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" -" rigidBodies,collidables,\n" -" verticesA,\n" -" globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" -" spherePos,sphereRadius,convexPos,convexOrn, f);\n" -" return;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl deleted file mode 100644 index a6565fd6fa..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ /dev/null @@ -1,2018 +0,0 @@ -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 - -#define B3_MAX_STACK_DEPTH 256 - - -typedef unsigned int u32; - -///keep this in sync with btCollidable.h -typedef struct -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - union - { - float m_radius; - int m_compoundBvhIndex; - }; - - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} b3QuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - - -int getTriangleIndex(const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeafNode(const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} b3BvhSubtreeInfo; - - - - - - - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -} ConvexPolyhedronCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int2.h" - - - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define make_float4 (float4) - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); - - -// float4 a1 = make_float4(a.xyz,0.f); -// float4 b1 = make_float4(b.xyz,0.f); - -// return cross(a1,b1); - -//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); - - // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); - - //return c; -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = make_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - -inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0<d1 ? d0:d1; - return true; -} - - - - -inline bool IsAlmostZero(const float4 v) -{ - if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) - return false; - return true; -} - - - -bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - -bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* verticesA, - __global const float4* uniqueEdgesA, - __global const btGpuFace* facesA, - __global const int* indicesA, - const float4* verticesB, - const float4* uniqueEdgesB, - const btGpuFace* facesB, - const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, __global const float4* vertices,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0<d1 ? d0:d1; - return true; -} - - -bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = faces[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - - curPlaneTests++; - - float d; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d)) - return false; - - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - - return true; -} - - - - -bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - float4* sep, - float* dmin) -{ - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test unit sphere directions - for (int i=0;i<numUnitSphereDirections;i++) - { - - float4 crossje; - crossje = unitSphereDirections[i]; - - if (dot3F4(DeltaC2,crossje)>0) - crossje *= -1.f; - { - float dist; - bool result = true; - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje*=-1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -// work-in-progress -__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global const btGpuChildShape* gpuChildShapes, - __global volatile float4* gpuCompoundSepNormalsOut, - __global volatile int* gpuHasCompoundSepNormalsOut, - int numCompoundPairs - ) -{ - - int i = get_global_id(0); - if (i<numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - gpuHasCompoundSepNormalsOut[i] = 0; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) - { - return; - } - - int hasSeparatingAxis = 5; - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - posA.w = 0.f; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1,0,0,0); - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else//(!sepB) - { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - if (sepEE) - { - gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - }//sepEE - }//(!sepB) - }//(!sepA) - - - } - -} - - -inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) -{ - b3Float4 vecOut; - vecOut = b3MakeFloat4( - (float)(vecIn[0]) / (quantization.x), - (float)(vecIn[1]) / (quantization.y), - (float)(vecIn[2]) / (quantization.z), - 0.f); - - vecOut += bvhAabbMin; - return vecOut; -} - -inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) -{ - b3Float4 vecOut; - vecOut = b3MakeFloat4( - (float)(vecIn[0]) / (quantization.x), - (float)(vecIn[1]) / (quantization.y), - (float)(vecIn[2]) / (quantization.z), - 0.f); - - vecOut += bvhAabbMin; - return vecOut; -} - - -// work-in-progress -__kernel void findCompoundPairsKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global b3Aabb_t* aabbLocalSpace, - __global const btGpuChildShape* gpuChildShapes, - __global volatile int4* gpuCompoundPairsOut, - __global volatile int* numCompoundPairsOut, - __global const b3BvhSubtreeInfo* subtrees, - __global const b3QuantizedBvhNode* quantizedNodes, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumCompoundPairsCapacity - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; - int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; - int numSubTreesA = bvhInfos[bvhA].m_numSubTrees; - int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset; - int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset; - - - int numSubTreesB = bvhInfos[bvhB].m_numSubTrees; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - - for (int p=0;p<numSubTreesA;p++) - { - b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p]; - //bvhInfos[bvhA].m_quantization - b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - - b3Float4 aabbAMinOut,aabbAMaxOut; - float margin=0.f; - b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut); - - for (int q=0;q<numSubTreesB;q++) - { - b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q]; - - b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - - b3Float4 aabbBMinOut,aabbBMaxOut; - float margin=0.f; - b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut); - - - - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (aabbOverlap) - { - - int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset; - int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; - - int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset; - int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; - - - b3Int2 nodeStack[B3_MAX_STACK_DEPTH]; - b3Int2 node0; - node0.x = startNodeIndexA; - node0.y = startNodeIndexB; - int maxStackDepth = B3_MAX_STACK_DEPTH; - int depth=0; - nodeStack[depth++]=node0; - - do - { - b3Int2 node = nodeStack[--depth]; - - b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - - b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - - float margin=0.f; - b3Float4 aabbAMinOut,aabbAMaxOut; - b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut); - - b3Float4 aabbBMinOut,aabbBMaxOut; - b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut); - - - bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (nodeOverlap) - { - bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]); - bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]); - bool isInternalA = !isLeafA; - bool isInternalB = !isLeafB; - - //fail, even though it might hit two leaf nodes - if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) - { - //printf("Error: traversal exceeded maxStackDepth"); - continue; - } - - if(isInternalA) - { - int nodeAleftChild = node.x+1; - bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]); - int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]); - - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); - - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); - } - else - { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); - } - } - else - { - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); - } - else - { - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]); - int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]); - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); - } - } - } - } - } while (depth); - } - } - } - - return; - } - - - - - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - - if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - - int numChildrenA = collidables[collidableIndexA].m_numChildShapes; - for (int c=0;c<numChildrenA;c++) - { - int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - - int shapeIndexA = collidables[childColIndexA].m_shapeIndex; - b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA]; - float margin = 0.f; - - b3Float4 aabbAMinWS; - b3Float4 aabbAMaxWS; - - b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin, - newPosA, - newOrnA, - &aabbAMinWS,&aabbAMaxWS); - - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB]; - - b3Float4 aabbBMinWS; - b3Float4 aabbBMaxWS; - - b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin, - newPosB, - newOrnB, - &aabbBMinWS,&aabbBMaxWS); - - - - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS); - if (aabbOverlap) - { - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - {// - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); - } - }// - }//fi(1) - } //for (int b=0 - }//if (collidables[collidableIndexB]. - else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - if (1) - { - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - { - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1); - }//if (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - }//for (int b=0;b<numChildrenB;b++) - return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = qtRotate(ornB,childPosB)+posB; - float4 newOrnB = qtMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - - ////////////////////////////////////// - - if (1) - { - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - {// - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB); - }//fi (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//for (int b=0;b<numChildrenB;b++) - return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - return; - }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - }//i<numPairs -} - -// work-in-progress -__kernel void findSeparatingAxisKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global volatile float4* separatingNormals, - __global volatile int* hasSeparatingAxis, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - hasSeparatingAxis[i] = 0; - return; - } - - - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - hasSeparatingAxis[i] = 0; - return; - } - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH)) - { - hasSeparatingAxis[i] = 0; - return; - } - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = FLT_MAX; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal; - - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - hasSeparatingAxis[i] = 4; - if (!sepA) - { - hasSeparatingAxis[i] = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - - if (!sepB) - { - hasSeparatingAxis[i] = 0; - } else - { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - } - - } - -} - - -__kernel void findSeparatingAxisVertexFaceKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global volatile float4* separatingNormals, - __global volatile int* hasSeparatingAxis, - __global float* dmins, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - hasSeparatingAxis[i] = 0; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - return; - } - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = FLT_MAX; - - dmins[i] = dmin; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal; - - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - hasSeparatingAxis[i] = 4; - if (!sepA) - { - hasSeparatingAxis[i] = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - - if (sepB) - { - dmins[i] = dmin; - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - - } - -} - - -__kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global float* dmins, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = dmins[i]; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = separatingNormals[i]; - - - - bool sepEE = false; - int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; - if (numEdgeEdgeDirections<=numUnitSphereDirections) - { - sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - /* - ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy - else - { - sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,unitSphereDirections,numUnitSphereDirections, - &sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - */ - } //if (hasSeparatingAxis[i]) - }//(i<numPairs) -} - - - - - -inline int findClippingFaces(const float4 separatingNormal, - const ConvexPolyhedronCL* hullA, - __global const ConvexPolyhedronCL* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=0; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; - int numVertices = polyB.m_numIndices; - if (numVertices>capacityWorldVerts) - numVertices = capacityWorldVerts; - - for(int e0=0;e0<numVertices;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - } - - int closestFaceA=0; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; - if (numVerticesA>capacityWorldVerts) - numVerticesA = capacityWorldVerts; - - for(int e0=0;e0<numVerticesA;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - concaveHasSeparatingNormals[i] = 0; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& - collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - float4 c0local = convexPolyhedronA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - - bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA, - posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces,indices, - verticesA,uniqueEdgesA,facesA,indicesA, - &sepAxis,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else - { - bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - - if (!sepEE) - { - hasSeparatingAxis = 0; - } else - { - hasSeparatingAxis = 1; - } - } - } - - if (hasSeparatingAxis) - { - sepAxis.w = dmin; - concaveSeparatingNormalsOut[pairIdx]=sepAxis; - concaveHasSeparatingNormals[i]=1; - - - float minDist = -1e30f; - float maxDist = 0.02f; - - - - findClippingFaces(sepAxis, - &convexPolyhedronA, - &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsA1GPU, - worldNormalsAGPU, - worldVertsB1GPU, - vertexFaceCapacity, - minDist, maxDist, - verticesA, - facesA, - indicesA, - vertices, - faces, - indices, - clippingFacesOut, pairIdx); - - - } else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - - concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts -} - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl deleted file mode 100644 index f433971741..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl +++ /dev/null @@ -1,1888 +0,0 @@ - -#define TRIANGLE_NUM_CONVEX_FACES 5 - - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - -#define max2 max -#define min2 min - -typedef unsigned int u32; - - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - - - -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w - - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -//#define dot3F4 dot - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - - -__inline float4 lerp3(const float4 a,const float4 b, float t) -{ - return make_float4( a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - - - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut) -{ - - int ve; - float ds, de; - int numVertsOut = 0; - //double-check next test - if (numVertsIn < 2) - return 0; - - float4 firstVertex=pVtxIn[numVertsIn-1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex=pVtxIn[ve]; - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; - if (ds<0) - { - if (de<0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut) -{ - - int ve; - float ds, de; - int numVertsOut = 0; -//double-check next test - if (numVertsIn < 2) - return 0; - - float4 firstVertex=pVtxIn[numVertsIn-1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex=pVtxIn[ve]; - - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; - - if (ds<0) - { - if (de<0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - -int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, - const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - faces[hullA->m_faceOffset+face].m_plane.x, - faces[hullA->m_faceOffset+face].m_plane.y, - faces[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]]; - const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = qtRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;i<numVertsIn;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - } - - return numContactsOut; -} - - - -int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, - const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const float4* verticesA, - const b3GpuFace_t* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const b3GpuFace_t* facesB, - __global const int* indicesB, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; - const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = qtRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;i<numVertsIn;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - } - - return numContactsOut; -} - -int clipHullAgainstHull(const float4 separatingNormal, - __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - float4* localContactsOut, - int localContactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, - faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices, - faces, - indices,localContactsOut,localContactCapacity); - } - - return numContactsOut; -} - - -int clipHullAgainstHullLocalA(const float4 separatingNormal, - const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const b3GpuFace_t* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const b3GpuFace_t* facesB, - __global const int* indicesB, - float4* localContactsOut, - int localContactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, - verticesA,facesA,indicesA, - verticesB,facesB,indicesB, - localContactsOut,localContactCapacity); - } - - return numContactsOut; -} - -#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} -#define REDUCE_MAX(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } -#define REDUCE_MIN(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } - -int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0.f); - { - - for (int i=0;i<nPoints;i++) - center += p[i]; - center /= (float)nPoints; - } - - - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} - - -int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx) -{ - contactIdx[0] = 0; - contactIdx[1] = 1; - contactIdx[2] = 2; - contactIdx[3] = 3; - - if( nPoints == 0 ) return 0; - - nPoints = min2( nPoints, 4 ); - return nPoints; - -} - - - -int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx) -{ - if( nPoints == 0 ) return 0; - - nPoints = min2( nPoints, 64 ); - - float4 center = make_float4(0.f); - { - float4 v[64]; - for (int i=0;i<nPoints;i++) - v[i] = p[i]; - //memcpy( v, p, nPoints*sizeof(float4) ); - PARALLEL_SUM( v, nPoints ); - center = v[0]/(float)nPoints; - } - - - - { // sample 4 directions - if( nPoints < 4 ) - { - for(int i=0; i<nPoints; i++) - contactIdx[i] = i; - return nPoints; - } - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - int idx[4]; - - float2 max00 = make_float2(0,FLT_MAX); - { - // idx, distance - { - { - int4 a[64]; - for(int ie = 0; ie<nPoints; ie++ ) - { - - - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - - f = dot3F4( -u, r ); - a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - - f = dot3F4( v, r ); - a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - - f = dot3F4( -v, r ); - a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - } - - for(int ie=0; ie<nPoints; ie++) - { - a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x; - a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; - a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; - a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; - } - - idx[0] = (int)a[0].x & 0xff; - idx[1] = (int)a[0].y & 0xff; - idx[2] = (int)a[0].z & 0xff; - idx[3] = (int)a[0].w & 0xff; - } - } - - { - float2 h[64]; - PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); - REDUCE_MIN( h, nPoints ); - max00 = h[0]; - } - } - - contactIdx[0] = idx[0]; - contactIdx[1] = idx[1]; - contactIdx[2] = idx[2]; - contactIdx[3] = idx[3]; - - - return 4; - } -} - - - -__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* closestPointsWorld, - __global const float4* separatingNormalsWorld, - __global const int* contactCounts, - __global const int* contactOffsets, - __global struct b3Contact4Data* restrict contactsOut, - counter32_t nContactsOut, - int contactCapacity, - int numPairs, - int pairIndex - ) -{ - int idx = get_global_id(0); - - if (idx<numPairs) - { - float4 normal = separatingNormalsWorld[idx]; - int nPoints = contactCounts[idx]; - __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]]; - float4 localPoints[64]; - for (int i=0;i<nPoints;i++) - { - localPoints[i] = pointsIn[i]; - } - - int contactIdx[4];// = {-1,-1,-1,-1}; - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nContactsOut, dstIdx ); - if (dstIdx<contactCapacity) - { - __global struct b3Contact4Data* c = contactsOut + dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = idx; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - for (int i=0;i<nContacts;i++) - { - c->m_worldPosB[i] = localPoints[contactIdx[i]]; - } - GET_NPOINTS(*c) = nContacts; - } - } -} - - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - -void trMul(float4 translationA, Quaternion orientationA, - float4 translationB, Quaternion orientationB, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtMul(orientationA,orientationB); - *translationOut = transform(&translationB,&translationA,&orientationA); -} - - - - -__kernel void clipHullHullKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numPairs, - int contactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - if (hasSeparatingAxis[i]) - { - - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - - - int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, - rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -separatingNormals[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - - int mprContactIndex = pairs[pairIndex].z; - - int dstIdx = mprContactIndex; - if (dstIdx<0) - { - AppendInc( nGlobalContactsOut, dstIdx ); - } - - if (dstIdx<contactCapacity) - { - pairs[pairIndex].z = dstIdx; - - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - for (int i=0;i<nReducedContacts;i++) - { - //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact - if (i>0||(mprContactIndex<0)) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - -} - - -__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const b3GpuChildShape_t* gpuChildShapes, - __global const float4* gpuCompoundSepNormalsOut, - __global const int* gpuHasCompoundSepNormalsOut, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numCompoundPairs) - { - - if (gpuHasCompoundSepNormalsOut[i]) - { - - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -gpuCompoundSepNormalsOut[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if ((dstIdx+nReducedContacts) < maxContactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = gpuCompoundPairs[pairIndex].x; - int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;i<nReducedContacts;i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (gpuHasCompoundSepNormalsOut[i]) - }// if (i<numCompoundPairs) - -} - - - -__kernel void sphereSphereCollisionKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int contactCapacity, - int numPairs) -{ - - int i = get_global_id(0); - int pairIndex = i; - - if (i<numPairs) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - //sphere-sphere - float radiusA = collidables[collidableIndexA].m_radius; - float radiusB = collidables[collidableIndexB].m_radius; - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - float4 diff = posA-posB; - float len = length(diff); - - ///iff distance positive, don't generate a new contact - if ( len <= (radiusA+radiusB)) - { - ///distance (negative means penetration) - float dist = len - (radiusA+radiusB); - float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); - if (len > 0.00001) - { - normalOnSurfaceB = diff / len; - } - float4 contactPosB = posB + normalOnSurfaceB*radiusB; - contactPosB.w = dist; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx < contactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_worldPosB[0] = contactPosB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if ( len <= (radiusA+radiusB)) - }//SHAPE_SPHERE SHAPE_SPHERE - }//if (i<numPairs) -} - -__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const b3GpuChildShape_t* gpuChildShapes, - __global const float4* separatingNormals, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int contactCapacity, - int numConcavePairs) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numConcavePairs) - { - //negative value means that the pair is invalid - if (concavePairsIn[i].w<0) - return; - - int bodyIndexA = concavePairsIn[i].x; - int bodyIndexB = concavePairsIn[i].y; - int f = concavePairsIn[i].z; - int childShapeIndexA = f; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - /////////////////////////////////////////////////////////////// - - - bool overlap = false; - - b3ConvexPolyhedronData_t convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - } - - float dmin = FLT_MAX; - - int localCC=0; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot3F4(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot3F4(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - float4 sepAxis = separatingNormals[i]; - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - int childShapeIndexB =-1; - if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - /////////////////// - ///compound shape support - - childShapeIndexB = concavePairsIn[pairIndex].w; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - - } - - //////////////////////////////////////// - - - - int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis, - &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - &verticesA,&facesA,&indicesA, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -separatingNormals[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx<contactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = concavePairsIn[pairIndex].x; - int bodyB = concavePairsIn[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;i<nReducedContacts;i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (i<numPairs) -} - - - - - - -int findClippingFaces(const float4 separatingNormal, - __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, - faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - faces[hullA->m_faceOffset+face].m_plane.x, - faces[hullA->m_faceOffset+face].m_plane.y, - faces[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - -int clipFaces(__global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - __global float4* worldVertsB2, - int capacityWorldVertsB2, - const float minDist, float maxDist, - __global int4* clippingFaces, - int pairIndex) -{ - int numContactsOut = 0; - - int closestFaceA = clippingFaces[pairIndex].x; - int closestFaceB = clippingFaces[pairIndex].y; - int numVertsInA = clippingFaces[pairIndex].z; - int numVertsInB = clippingFaces[pairIndex].w; - - int numVertsOut = 0; - - if (closestFaceA<0) - return numContactsOut; - - __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; - __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; - - - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for(int e0=0;e0<numVertsInA;e0++) - { - const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; - const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; - const float4 WorldEdge0 = aw - bw; - float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = aw; - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); - __global float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsInB = numVertsOut; - numVertsOut = 0; - } - - //float4 planeNormalWS = worldNormalsA1[pairIndex]; - //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); - - - - /*for (int i=0;i<numVertsInB;i++) - { - pVtxOut[i] = pVtxIn[i]; - }*/ - - - - - //numVertsInB=0; - - float4 planeNormalWS = worldNormalsA1[pairIndex]; - float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); - - for (int i=0;i<numVertsInB;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - - clippingFaces[pairIndex].w =numContactsOut; - - - return numContactsOut; - -} - - - - -__kernel void findClippingFacesKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global int4* clippingFacesOut, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - int numPairs - ) -{ - - int i = get_global_id(0); - int pairIndex = i; - - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - - int numLocalContactsOut = findClippingFaces(separatingNormals[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, - rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, - worldVertsA1, - worldNormalsA1, - worldVertsB1,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - clippingFacesOut,i); - - - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - -} - - - - -__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global int4* clippingFacesOut, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - __global float4* worldVertsB2, - int vertexFaceCapacity, - int numPairs, - int debugMode - ) -{ - int i = get_global_id(0); - int pairIndex = i; - - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - -// int bodyIndexA = pairs[i].x; - // int bodyIndexB = pairs[i].y; - - int numLocalContactsOut = 0; - - int capacityWorldVertsB2 = vertexFaceCapacity; - - __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; - __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; - - - { - __global int4* clippingFaces = clippingFacesOut; - - - int closestFaceA = clippingFaces[pairIndex].x; - int closestFaceB = clippingFaces[pairIndex].y; - int numVertsInA = clippingFaces[pairIndex].z; - int numVertsInB = clippingFaces[pairIndex].w; - - int numVertsOut = 0; - - if (closestFaceA>=0) - { - - - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for(int e0=0;e0<numVertsInA;e0++) - { - const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; - const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; - const float4 WorldEdge0 = aw - bw; - float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = aw; - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); - __global float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsInB = numVertsOut; - numVertsOut = 0; - } - - float4 planeNormalWS = worldNormalsA1[pairIndex]; - float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); - - for (int i=0;i<numVertsInB;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - - } - clippingFaces[pairIndex].w =numLocalContactsOut; - - - } - - for (int i=0;i<numLocalContactsOut;i++) - pVtxIn[i] = pVtxOut[i]; - - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - -} - - - - - -__kernel void newContactReductionKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* globalContactsOut, - __global int4* clippingFaces, - __global float4* worldVertsB2, - volatile __global int* nGlobalContactsOut, - int vertexFaceCapacity, - int contactCapacity, - int numPairs - ) -{ - int i = get_global_id(0); - int pairIndex = i; - - int4 contactIdx; - contactIdx=make_int4(0,1,2,3); - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - - - - int nPoints = clippingFaces[pairIndex].w; - - if (nPoints>0) - { - - __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity]; - float4 normal = -separatingNormals[i]; - - int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int mprContactIndex = pairs[pairIndex].z; - - int dstIdx = mprContactIndex; - - if (dstIdx<0) - { - AppendInc( nGlobalContactsOut, dstIdx ); - } -//#if 0 - - if (dstIdx < contactCapacity) - { - - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - - pairs[pairIndex].w = dstIdx; - - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA =-1; - c->m_childIndexB =-1; - - switch (nReducedContacts) - { - case 4: - c->m_worldPosB[3] = pointsIn[contactIdx.w]; - case 3: - c->m_worldPosB[2] = pointsIn[contactIdx.z]; - case 2: - c->m_worldPosB[1] = pointsIn[contactIdx.y]; - case 1: - if (mprContactIndex<0)//test - c->m_worldPosB[0] = pointsIn[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = nReducedContacts; - - } - - -//#endif - - }// if (numContactsOut>0) - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - - - -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h deleted file mode 100644 index f0ecfc7851..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ /dev/null @@ -1,2099 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satClipKernelsCL= \ -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define max2 max\n" -"#define min2 min\n" -"typedef unsigned int u32;\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" -"#define B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"typedef struct b3GpuFace b3GpuFace_t;\n" -"struct b3GpuFace\n" -"{\n" -" b3Float4 m_plane;\n" -" int m_indexOffset;\n" -" int m_numIndices;\n" -" int m_unusedPadding1;\n" -" int m_unusedPadding2;\n" -"};\n" -"typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" -"struct b3ConvexPolyhedronData\n" -"{\n" -" b3Float4 m_localCenter;\n" -" b3Float4 m_extents;\n" -" b3Float4 mC;\n" -" b3Float4 mE;\n" -" float m_radius;\n" -" int m_faceOffset;\n" -" int m_numFaces;\n" -" int m_numVertices;\n" -" int m_vertexOffset;\n" -" int m_uniqueEdgesOffset;\n" -" int m_numUniqueEdges;\n" -" int m_unused;\n" -"};\n" -"#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -" SHAPE_HEIGHT_FIELD=1,\n" -" SHAPE_CONVEX_HULL=3,\n" -" SHAPE_PLANE=4,\n" -" SHAPE_CONCAVE_TRIMESH=5,\n" -" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -" SHAPE_SPHERE=7,\n" -" MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" union\n" -" {\n" -" float m_radius;\n" -" int m_compoundBvhIndex;\n" -" };\n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -" b3Float4 m_childPosition;\n" -" b3Quat m_childOrientation;\n" -" int m_shapeIndex;\n" -" int m_unused0;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -" int m_bodyIndexA;\n" -" int m_bodyIndexB;\n" -"// int m_pairType;\n" -" int m_childShapeIndexA;\n" -" int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"//#define dot3F4 dot\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -" return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"}\n" -"__inline float4 lerp3(const float4 a,const float4 b, float t)\n" -"{\n" -" return make_float4( a.x + (b.x - a.x) * t,\n" -" a.y + (b.y - a.y) * t,\n" -" a.z + (b.z - a.z) * t,\n" -" 0.f);\n" -"}\n" -"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" -"int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" -"{\n" -" \n" -" int ve;\n" -" float ds, de;\n" -" int numVertsOut = 0;\n" -" //double-check next test\n" -" if (numVertsIn < 2)\n" -" return 0;\n" -" \n" -" float4 firstVertex=pVtxIn[numVertsIn-1];\n" -" float4 endVertex = pVtxIn[0];\n" -" \n" -" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -" \n" -" for (ve = 0; ve < numVertsIn; ve++)\n" -" {\n" -" endVertex=pVtxIn[ve];\n" -" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -" if (ds<0)\n" -" {\n" -" if (de<0)\n" -" {\n" -" // Start < 0, end < 0, so output endVertex\n" -" ppVtxOut[numVertsOut++] = endVertex;\n" -" }\n" -" else\n" -" {\n" -" // Start < 0, end >= 0, so output intersection\n" -" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -" }\n" -" }\n" -" else\n" -" {\n" -" if (de<0)\n" -" {\n" -" // Start >= 0, end < 0 so output intersection and end\n" -" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -" ppVtxOut[numVertsOut++] = endVertex;\n" -" }\n" -" }\n" -" firstVertex = endVertex;\n" -" ds = de;\n" -" }\n" -" return numVertsOut;\n" -"}\n" -"// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" -"int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" -"{\n" -" \n" -" int ve;\n" -" float ds, de;\n" -" int numVertsOut = 0;\n" -"//double-check next test\n" -" if (numVertsIn < 2)\n" -" return 0;\n" -" float4 firstVertex=pVtxIn[numVertsIn-1];\n" -" float4 endVertex = pVtxIn[0];\n" -" \n" -" ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" -" for (ve = 0; ve < numVertsIn; ve++)\n" -" {\n" -" endVertex=pVtxIn[ve];\n" -" de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" -" if (ds<0)\n" -" {\n" -" if (de<0)\n" -" {\n" -" // Start < 0, end < 0, so output endVertex\n" -" ppVtxOut[numVertsOut++] = endVertex;\n" -" }\n" -" else\n" -" {\n" -" // Start < 0, end >= 0, so output intersection\n" -" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -" }\n" -" }\n" -" else\n" -" {\n" -" if (de<0)\n" -" {\n" -" // Start >= 0, end < 0 so output intersection and end\n" -" ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" -" ppVtxOut[numVertsOut++] = endVertex;\n" -" }\n" -" }\n" -" firstVertex = endVertex;\n" -" ds = de;\n" -" }\n" -" return numVertsOut;\n" -"}\n" -"int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, \n" -" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" -" float4* worldVertsB2, int capacityWorldVertsB2,\n" -" const float minDist, float maxDist,\n" -" __global const float4* vertices,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" float4* contactsOut,\n" -" int contactCapacity)\n" -"{\n" -" int numContactsOut = 0;\n" -" float4* pVtxIn = worldVertsB1;\n" -" float4* pVtxOut = worldVertsB2;\n" -" \n" -" int numVertsIn = numWorldVertsB1;\n" -" int numVertsOut = 0;\n" -" int closestFaceA=-1;\n" -" {\n" -" float dmin = FLT_MAX;\n" -" for(int face=0;face<hullA->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(\n" -" faces[hullA->m_faceOffset+face].m_plane.x, \n" -" faces[hullA->m_faceOffset+face].m_plane.y, \n" -" faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -" \n" -" float d = dot3F4(faceANormalWS,separatingNormal);\n" -" if (d < dmin)\n" -" {\n" -" dmin = d;\n" -" closestFaceA = face;\n" -" }\n" -" }\n" -" }\n" -" if (closestFaceA<0)\n" -" return numContactsOut;\n" -" b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" -" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -" int numVerticesA = polyA.m_numIndices;\n" -" for(int e0=0;e0<numVerticesA;e0++)\n" -" {\n" -" const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" -" const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" -" const float4 edge0 = a - b;\n" -" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" -" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" -" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -" float4 worldA1 = transform(&a,&posA,&ornA);\n" -" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -" \n" -" float4 planeNormalWS = planeNormalWS1;\n" -" float planeEqWS=planeEqWS1;\n" -" \n" -" //clip face\n" -" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" -" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" -" //btSwap(pVtxIn,pVtxOut);\n" -" float4* tmp = pVtxOut;\n" -" pVtxOut = pVtxIn;\n" -" pVtxIn = tmp;\n" -" numVertsIn = numVertsOut;\n" -" numVertsOut = 0;\n" -" }\n" -" \n" -" // only keep points that are behind the witness face\n" -" {\n" -" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -" float localPlaneEq = polyA.m_plane.w;\n" -" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" -" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" -" for (int i=0;i<numVertsIn;i++)\n" -" {\n" -" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -" if (depth <=minDist)\n" -" {\n" -" depth = minDist;\n" -" }\n" -" if (depth <=maxDist)\n" -" {\n" -" float4 pointInWorld = pVtxIn[i];\n" -" //resultOut.addContactPoint(separatingNormal,point,depth);\n" -" contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -" }\n" -" }\n" -" }\n" -" return numContactsOut;\n" -"}\n" -"int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, \n" -" const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" -" float4* worldVertsB2, int capacityWorldVertsB2,\n" -" const float minDist, float maxDist,\n" -" const float4* verticesA,\n" -" const b3GpuFace_t* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB,\n" -" __global const b3GpuFace_t* facesB,\n" -" __global const int* indicesB,\n" -" float4* contactsOut,\n" -" int contactCapacity)\n" -"{\n" -" int numContactsOut = 0;\n" -" float4* pVtxIn = worldVertsB1;\n" -" float4* pVtxOut = worldVertsB2;\n" -" \n" -" int numVertsIn = numWorldVertsB1;\n" -" int numVertsOut = 0;\n" -" int closestFaceA=-1;\n" -" {\n" -" float dmin = FLT_MAX;\n" -" for(int face=0;face<hullA->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(\n" -" facesA[hullA->m_faceOffset+face].m_plane.x, \n" -" facesA[hullA->m_faceOffset+face].m_plane.y, \n" -" facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -" \n" -" float d = dot3F4(faceANormalWS,separatingNormal);\n" -" if (d < dmin)\n" -" {\n" -" dmin = d;\n" -" closestFaceA = face;\n" -" }\n" -" }\n" -" }\n" -" if (closestFaceA<0)\n" -" return numContactsOut;\n" -" b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" -" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -" int numVerticesA = polyA.m_numIndices;\n" -" for(int e0=0;e0<numVerticesA;e0++)\n" -" {\n" -" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" -" const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" -" const float4 edge0 = a - b;\n" -" const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" -" float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -" float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" -" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -" float4 worldA1 = transform(&a,&posA,&ornA);\n" -" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -" \n" -" float4 planeNormalWS = planeNormalWS1;\n" -" float planeEqWS=planeEqWS1;\n" -" \n" -" //clip face\n" -" //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" -" numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" -" //btSwap(pVtxIn,pVtxOut);\n" -" float4* tmp = pVtxOut;\n" -" pVtxOut = pVtxIn;\n" -" pVtxIn = tmp;\n" -" numVertsIn = numVertsOut;\n" -" numVertsOut = 0;\n" -" }\n" -" \n" -" // only keep points that are behind the witness face\n" -" {\n" -" float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" -" float localPlaneEq = polyA.m_plane.w;\n" -" float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" -" float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" -" for (int i=0;i<numVertsIn;i++)\n" -" {\n" -" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -" if (depth <=minDist)\n" -" {\n" -" depth = minDist;\n" -" }\n" -" if (depth <=maxDist)\n" -" {\n" -" float4 pointInWorld = pVtxIn[i];\n" -" //resultOut.addContactPoint(separatingNormal,point,depth);\n" -" contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -" }\n" -" }\n" -" }\n" -" return numContactsOut;\n" -"}\n" -"int clipHullAgainstHull(const float4 separatingNormal,\n" -" __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" -" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" -" const float minDist, float maxDist,\n" -" __global const float4* vertices,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" float4* localContactsOut,\n" -" int localContactCapacity)\n" -"{\n" -" int numContactsOut = 0;\n" -" int numWorldVertsB1= 0;\n" -" int closestFaceB=-1;\n" -" float dmax = -FLT_MAX;\n" -" {\n" -" for(int face=0;face<hullB->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" -" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 WorldNormal = qtRotate(ornB, Normal);\n" -" float d = dot3F4(WorldNormal,separatingNormal);\n" -" if (d > dmax)\n" -" {\n" -" dmax = d;\n" -" closestFaceB = face;\n" -" }\n" -" }\n" -" }\n" -" {\n" -" const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" -" const int numVertices = polyB.m_numIndices;\n" -" for(int e0=0;e0<numVertices;e0++)\n" -" {\n" -" const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" -" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -" }\n" -" }\n" -" if (closestFaceB>=0)\n" -" {\n" -" numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" -" posA,ornA,\n" -" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" -" faces,\n" -" indices,localContactsOut,localContactCapacity);\n" -" }\n" -" return numContactsOut;\n" -"}\n" -"int clipHullAgainstHullLocalA(const float4 separatingNormal,\n" -" const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" -" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" -" float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" -" const float minDist, float maxDist,\n" -" const float4* verticesA,\n" -" const b3GpuFace_t* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB,\n" -" __global const b3GpuFace_t* facesB,\n" -" __global const int* indicesB,\n" -" float4* localContactsOut,\n" -" int localContactCapacity)\n" -"{\n" -" int numContactsOut = 0;\n" -" int numWorldVertsB1= 0;\n" -" int closestFaceB=-1;\n" -" float dmax = -FLT_MAX;\n" -" {\n" -" for(int face=0;face<hullB->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" -" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 WorldNormal = qtRotate(ornB, Normal);\n" -" float d = dot3F4(WorldNormal,separatingNormal);\n" -" if (d > dmax)\n" -" {\n" -" dmax = d;\n" -" closestFaceB = face;\n" -" }\n" -" }\n" -" }\n" -" {\n" -" const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -" const int numVertices = polyB.m_numIndices;\n" -" for(int e0=0;e0<numVertices;e0++)\n" -" {\n" -" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -" worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -" }\n" -" }\n" -" if (closestFaceB>=0)\n" -" {\n" -" numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" -" posA,ornA,\n" -" worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" -" verticesA,facesA,indicesA,\n" -" verticesB,facesB,indicesB,\n" -" localContactsOut,localContactCapacity);\n" -" }\n" -" return numContactsOut;\n" -"}\n" -"#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" -"#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" -"#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" -"#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" -"int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" -"{\n" -" if( nPoints == 0 )\n" -" return 0;\n" -" \n" -" if (nPoints <=4)\n" -" return nPoints;\n" -" \n" -" \n" -" if (nPoints >64)\n" -" nPoints = 64;\n" -" \n" -" float4 center = make_float4(0.f);\n" -" {\n" -" \n" -" for (int i=0;i<nPoints;i++)\n" -" center += p[i];\n" -" center /= (float)nPoints;\n" -" }\n" -" \n" -" \n" -" \n" -" // sample 4 directions\n" -" \n" -" float4 aVector = p[0] - center;\n" -" float4 u = cross3( nearNormal, aVector );\n" -" float4 v = cross3( nearNormal, u );\n" -" u = normalize3( u );\n" -" v = normalize3( v );\n" -" \n" -" \n" -" //keep point with deepest penetration\n" -" float minW= FLT_MAX;\n" -" \n" -" int minIndex=-1;\n" -" \n" -" float4 maxDots;\n" -" maxDots.x = FLT_MIN;\n" -" maxDots.y = FLT_MIN;\n" -" maxDots.z = FLT_MIN;\n" -" maxDots.w = FLT_MIN;\n" -" \n" -" // idx, distance\n" -" for(int ie = 0; ie<nPoints; ie++ )\n" -" {\n" -" if (p[ie].w<minW)\n" -" {\n" -" minW = p[ie].w;\n" -" minIndex=ie;\n" -" }\n" -" float f;\n" -" float4 r = p[ie]-center;\n" -" f = dot3F4( u, r );\n" -" if (f<maxDots.x)\n" -" {\n" -" maxDots.x = f;\n" -" contactIdx[0].x = ie;\n" -" }\n" -" \n" -" f = dot3F4( -u, r );\n" -" if (f<maxDots.y)\n" -" {\n" -" maxDots.y = f;\n" -" contactIdx[0].y = ie;\n" -" }\n" -" \n" -" \n" -" f = dot3F4( v, r );\n" -" if (f<maxDots.z)\n" -" {\n" -" maxDots.z = f;\n" -" contactIdx[0].z = ie;\n" -" }\n" -" \n" -" f = dot3F4( -v, r );\n" -" if (f<maxDots.w)\n" -" {\n" -" maxDots.w = f;\n" -" contactIdx[0].w = ie;\n" -" }\n" -" \n" -" }\n" -" \n" -" if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" -" {\n" -" //replace the first contact with minimum (todo: replace contact with least penetration)\n" -" contactIdx[0].x = minIndex;\n" -" }\n" -" \n" -" return 4;\n" -" \n" -"}\n" -"int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" -"{\n" -" contactIdx[0] = 0;\n" -" contactIdx[1] = 1;\n" -" contactIdx[2] = 2;\n" -" contactIdx[3] = 3;\n" -" \n" -" if( nPoints == 0 ) return 0;\n" -" \n" -" nPoints = min2( nPoints, 4 );\n" -" return nPoints;\n" -" \n" -"}\n" -"int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" -"{\n" -" if( nPoints == 0 ) return 0;\n" -" nPoints = min2( nPoints, 64 );\n" -" float4 center = make_float4(0.f);\n" -" {\n" -" float4 v[64];\n" -" for (int i=0;i<nPoints;i++)\n" -" v[i] = p[i];\n" -" //memcpy( v, p, nPoints*sizeof(float4) );\n" -" PARALLEL_SUM( v, nPoints );\n" -" center = v[0]/(float)nPoints;\n" -" }\n" -" \n" -" { // sample 4 directions\n" -" if( nPoints < 4 )\n" -" {\n" -" for(int i=0; i<nPoints; i++) \n" -" contactIdx[i] = i;\n" -" return nPoints;\n" -" }\n" -" float4 aVector = p[0] - center;\n" -" float4 u = cross3( nearNormal, aVector );\n" -" float4 v = cross3( nearNormal, u );\n" -" u = normalize3( u );\n" -" v = normalize3( v );\n" -" int idx[4];\n" -" float2 max00 = make_float2(0,FLT_MAX);\n" -" {\n" -" // idx, distance\n" -" {\n" -" {\n" -" int4 a[64];\n" -" for(int ie = 0; ie<nPoints; ie++ )\n" -" {\n" -" \n" -" \n" -" float f;\n" -" float4 r = p[ie]-center;\n" -" f = dot3F4( u, r );\n" -" a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -" f = dot3F4( -u, r );\n" -" a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -" f = dot3F4( v, r );\n" -" a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -" f = dot3F4( -v, r );\n" -" a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" -" }\n" -" for(int ie=0; ie<nPoints; ie++)\n" -" {\n" -" a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" -" a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" -" a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" -" a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" -" }\n" -" idx[0] = (int)a[0].x & 0xff;\n" -" idx[1] = (int)a[0].y & 0xff;\n" -" idx[2] = (int)a[0].z & 0xff;\n" -" idx[3] = (int)a[0].w & 0xff;\n" -" }\n" -" }\n" -" {\n" -" float2 h[64];\n" -" PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" -" REDUCE_MIN( h, nPoints );\n" -" max00 = h[0];\n" -" }\n" -" }\n" -" contactIdx[0] = idx[0];\n" -" contactIdx[1] = idx[1];\n" -" contactIdx[2] = idx[2];\n" -" contactIdx[3] = idx[3];\n" -" return 4;\n" -" }\n" -"}\n" -"__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const float4* closestPointsWorld,\n" -" __global const float4* separatingNormalsWorld,\n" -" __global const int* contactCounts,\n" -" __global const int* contactOffsets,\n" -" __global struct b3Contact4Data* restrict contactsOut,\n" -" counter32_t nContactsOut,\n" -" int contactCapacity,\n" -" int numPairs,\n" -" int pairIndex\n" -" )\n" -"{\n" -" int idx = get_global_id(0);\n" -" \n" -" if (idx<numPairs)\n" -" {\n" -" float4 normal = separatingNormalsWorld[idx];\n" -" int nPoints = contactCounts[idx];\n" -" __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" -" float4 localPoints[64];\n" -" for (int i=0;i<nPoints;i++)\n" -" {\n" -" localPoints[i] = pointsIn[i];\n" -" }\n" -" int contactIdx[4];// = {-1,-1,-1,-1};\n" -" contactIdx[0] = -1;\n" -" contactIdx[1] = -1;\n" -" contactIdx[2] = -1;\n" -" contactIdx[3] = -1;\n" -" int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" -" int dstIdx;\n" -" AppendInc( nContactsOut, dstIdx );\n" -" if (dstIdx<contactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = contactsOut + dstIdx;\n" -" c->m_worldNormalOnB = -normal;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = idx;\n" -" int bodyA = pairs[pairIndex].x;\n" -" int bodyB = pairs[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" for (int i=0;i<nContacts;i++)\n" -" {\n" -" c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" -" }\n" -" GET_NPOINTS(*c) = nContacts;\n" -" }\n" -" }\n" -"}\n" -"void trInverse(float4 translationIn, Quaternion orientationIn,\n" -" float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -" *orientationOut = qtInvert(orientationIn);\n" -" *translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"void trMul(float4 translationA, Quaternion orientationA,\n" -" float4 translationB, Quaternion orientationB,\n" -" float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -" *orientationOut = qtMul(orientationA,orientationB);\n" -" *translationOut = transform(&translationB,&translationA,&orientationA);\n" -"}\n" -"__kernel void clipHullHullKernel( __global int4* pairs, \n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const b3Collidable_t* collidables,\n" -" __global const b3ConvexPolyhedronData_t* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" __global const float4* separatingNormals,\n" -" __global const int* hasSeparatingAxis,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int numPairs,\n" -" int contactCapacity)\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" float4 worldVertsB1[64];\n" -" float4 worldVertsB2[64];\n" -" int capacityWorldVerts = 64; \n" -" float4 localContactsOut[64];\n" -" int localContactCapacity=64;\n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" if (i<numPairs)\n" -" {\n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" if (hasSeparatingAxis[i])\n" -" {\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" -" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" -" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" -" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -" minDist, maxDist,\n" -" vertices,faces,indices,\n" -" localContactsOut,localContactCapacity);\n" -" \n" -" if (numLocalContactsOut>0)\n" -" {\n" -" float4 normal = -separatingNormals[i];\n" -" int nPoints = numLocalContactsOut;\n" -" float4* pointsIn = localContactsOut;\n" -" int contactIdx[4];// = {-1,-1,-1,-1};\n" -" contactIdx[0] = -1;\n" -" contactIdx[1] = -1;\n" -" contactIdx[2] = -1;\n" -" contactIdx[3] = -1;\n" -" \n" -" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -" \n" -" \n" -" int mprContactIndex = pairs[pairIndex].z;\n" -" int dstIdx = mprContactIndex;\n" -" if (dstIdx<0)\n" -" {\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" }\n" -" if (dstIdx<contactCapacity)\n" -" {\n" -" pairs[pairIndex].z = dstIdx;\n" -" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -" c->m_worldNormalOnB = -normal;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = pairs[pairIndex].x;\n" -" int bodyB = pairs[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" for (int i=0;i<nReducedContacts;i++)\n" -" {\n" -" //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" -" if (i>0||(mprContactIndex<0))\n" -" {\n" -" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -" }\n" -" }\n" -" GET_NPOINTS(*c) = nReducedContacts;\n" -" }\n" -" \n" -" }// if (numContactsOut>0)\n" -" }// if (hasSeparatingAxis[i])\n" -" }// if (i<numPairs)\n" -"}\n" -"__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const b3Collidable_t* collidables,\n" -" __global const b3ConvexPolyhedronData_t* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" __global const b3GpuChildShape_t* gpuChildShapes,\n" -" __global const float4* gpuCompoundSepNormalsOut,\n" -" __global const int* gpuHasCompoundSepNormalsOut,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int numCompoundPairs, int maxContactCapacity)\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" float4 worldVertsB1[64];\n" -" float4 worldVertsB2[64];\n" -" int capacityWorldVerts = 64; \n" -" float4 localContactsOut[64];\n" -" int localContactCapacity=64;\n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" if (i<numCompoundPairs)\n" -" {\n" -" if (gpuHasCompoundSepNormalsOut[i])\n" -" {\n" -" int bodyIndexA = gpuCompoundPairs[i].x;\n" -" int bodyIndexB = gpuCompoundPairs[i].y;\n" -" \n" -" int childShapeIndexA = gpuCompoundPairs[i].z;\n" -" int childShapeIndexB = gpuCompoundPairs[i].w;\n" -" \n" -" int collidableIndexA = -1;\n" -" int collidableIndexB = -1;\n" -" \n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" \n" -" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" \n" -" if (childShapeIndexA >= 0)\n" -" {\n" -" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -" float4 newOrnA = qtMul(ornA,childOrnA);\n" -" posA = newPosA;\n" -" ornA = newOrnA;\n" -" } else\n" -" {\n" -" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" }\n" -" \n" -" if (childShapeIndexB>=0)\n" -" {\n" -" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" } else\n" -" {\n" -" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" -" }\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" -" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -" minDist, maxDist,\n" -" vertices,faces,indices,\n" -" localContactsOut,localContactCapacity);\n" -" \n" -" if (numLocalContactsOut>0)\n" -" {\n" -" float4 normal = -gpuCompoundSepNormalsOut[i];\n" -" int nPoints = numLocalContactsOut;\n" -" float4* pointsIn = localContactsOut;\n" -" int contactIdx[4];// = {-1,-1,-1,-1};\n" -" contactIdx[0] = -1;\n" -" contactIdx[1] = -1;\n" -" contactIdx[2] = -1;\n" -" contactIdx[3] = -1;\n" -" \n" -" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -" \n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -" c->m_worldNormalOnB = -normal;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = gpuCompoundPairs[pairIndex].x;\n" -" int bodyB = gpuCompoundPairs[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -" c->m_childIndexA = childShapeIndexA;\n" -" c->m_childIndexB = childShapeIndexB;\n" -" for (int i=0;i<nReducedContacts;i++)\n" -" {\n" -" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -" }\n" -" GET_NPOINTS(*c) = nReducedContacts;\n" -" }\n" -" \n" -" }// if (numContactsOut>0)\n" -" }// if (gpuHasCompoundSepNormalsOut[i])\n" -" }// if (i<numCompoundPairs)\n" -"}\n" -"__kernel void sphereSphereCollisionKernel( __global const int4* pairs, \n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const b3Collidable_t* collidables,\n" -" __global const float4* separatingNormals,\n" -" __global const int* hasSeparatingAxis,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int contactCapacity,\n" -" int numPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" -" collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" -" {\n" -" //sphere-sphere\n" -" float radiusA = collidables[collidableIndexA].m_radius;\n" -" float radiusB = collidables[collidableIndexB].m_radius;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" float4 diff = posA-posB;\n" -" float len = length(diff);\n" -" \n" -" ///iff distance positive, don't generate a new contact\n" -" if ( len <= (radiusA+radiusB))\n" -" {\n" -" ///distance (negative means penetration)\n" -" float dist = len - (radiusA+radiusB);\n" -" float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" -" if (len > 0.00001)\n" -" {\n" -" normalOnSurfaceB = diff / len;\n" -" }\n" -" float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" -" contactPosB.w = dist;\n" -" \n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" if (dstIdx < contactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = -normalOnSurfaceB;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = pairs[pairIndex].x;\n" -" int bodyB = pairs[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -" c->m_worldPosB[0] = contactPosB;\n" -" c->m_childIndexA = -1;\n" -" c->m_childIndexB = -1;\n" -" GET_NPOINTS(*c) = 1;\n" -" }//if (dstIdx < numPairs)\n" -" }//if ( len <= (radiusA+radiusB))\n" -" }//SHAPE_SPHERE SHAPE_SPHERE\n" -" }//if (i<numPairs)\n" -"} \n" -"__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" -" __global const b3RigidBodyData_t* rigidBodies, \n" -" __global const b3Collidable_t* collidables,\n" -" __global const b3ConvexPolyhedronData_t* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" __global const b3GpuChildShape_t* gpuChildShapes,\n" -" __global const float4* separatingNormals,\n" -" __global struct b3Contact4Data* restrict globalContactsOut,\n" -" counter32_t nGlobalContactsOut,\n" -" int contactCapacity,\n" -" int numConcavePairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" float4 worldVertsB1[64];\n" -" float4 worldVertsB2[64];\n" -" int capacityWorldVerts = 64; \n" -" float4 localContactsOut[64];\n" -" int localContactCapacity=64;\n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" if (i<numConcavePairs)\n" -" {\n" -" //negative value means that the pair is invalid\n" -" if (concavePairsIn[i].w<0)\n" -" return;\n" -" int bodyIndexA = concavePairsIn[i].x;\n" -" int bodyIndexB = concavePairsIn[i].y;\n" -" int f = concavePairsIn[i].z;\n" -" int childShapeIndexA = f;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" ///////////////////////////////////////////////////////////////\n" -" \n" -" \n" -" bool overlap = false;\n" -" \n" -" b3ConvexPolyhedronData_t convexPolyhedronA;\n" -" //add 3 vertices of the triangle\n" -" convexPolyhedronA.m_numVertices = 3;\n" -" convexPolyhedronA.m_vertexOffset = 0;\n" -" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -" b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -" \n" -" float4 verticesA[3];\n" -" for (int i=0;i<3;i++)\n" -" {\n" -" int index = indices[face.m_indexOffset+i];\n" -" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -" verticesA[i] = vert;\n" -" localCenter += vert;\n" -" }\n" -" float dmin = FLT_MAX;\n" -" int localCC=0;\n" -" //a triangle has 3 unique edges\n" -" convexPolyhedronA.m_numUniqueEdges = 3;\n" -" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -" float4 uniqueEdgesA[3];\n" -" \n" -" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -" convexPolyhedronA.m_faceOffset = 0;\n" -" \n" -" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -" \n" -" b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -" int indicesA[3+3+2+2+2];\n" -" int curUsedIndices=0;\n" -" int fidx=0;\n" -" //front size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[0] = 0;\n" -" indicesA[1] = 1;\n" -" indicesA[2] = 2;\n" -" curUsedIndices+=3;\n" -" float c = face.m_plane.w;\n" -" facesA[fidx].m_plane.x = normal.x;\n" -" facesA[fidx].m_plane.y = normal.y;\n" -" facesA[fidx].m_plane.z = normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" //back size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[3]=2;\n" -" indicesA[4]=1;\n" -" indicesA[5]=0;\n" -" curUsedIndices+=3;\n" -" float c = dot3F4(normal,verticesA[0]);\n" -" float c1 = -face.m_plane.w;\n" -" facesA[fidx].m_plane.x = -normal.x;\n" -" facesA[fidx].m_plane.y = -normal.y;\n" -" facesA[fidx].m_plane.z = -normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" bool addEdgePlanes = true;\n" -" if (addEdgePlanes)\n" -" {\n" -" int numVertices=3;\n" -" int prevVertex = numVertices-1;\n" -" for (int i=0;i<numVertices;i++)\n" -" {\n" -" float4 v0 = verticesA[i];\n" -" float4 v1 = verticesA[prevVertex];\n" -" \n" -" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -" float c = -dot3F4(edgeNormal,v0);\n" -" facesA[fidx].m_numIndices = 2;\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[curUsedIndices++]=i;\n" -" indicesA[curUsedIndices++]=prevVertex;\n" -" \n" -" facesA[fidx].m_plane.x = edgeNormal.x;\n" -" facesA[fidx].m_plane.y = edgeNormal.y;\n" -" facesA[fidx].m_plane.z = edgeNormal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" fidx++;\n" -" prevVertex = i;\n" -" }\n" -" }\n" -" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" float4 sepAxis = separatingNormals[i];\n" -" \n" -" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -" int childShapeIndexB =-1;\n" -" if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" ///////////////////\n" -" ///compound shape support\n" -" \n" -" childShapeIndexB = concavePairsIn[pairIndex].w;\n" -" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" \n" -" }\n" -" \n" -" ////////////////////////////////////////\n" -" \n" -" \n" -" \n" -" int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" -" &convexPolyhedronA, &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" worldVertsB1,worldVertsB2,capacityWorldVerts,\n" -" minDist, maxDist,\n" -" &verticesA,&facesA,&indicesA,\n" -" vertices,faces,indices,\n" -" localContactsOut,localContactCapacity);\n" -" \n" -" if (numLocalContactsOut>0)\n" -" {\n" -" float4 normal = -separatingNormals[i];\n" -" int nPoints = numLocalContactsOut;\n" -" float4* pointsIn = localContactsOut;\n" -" int contactIdx[4];// = {-1,-1,-1,-1};\n" -" contactIdx[0] = -1;\n" -" contactIdx[1] = -1;\n" -" contactIdx[2] = -1;\n" -" contactIdx[3] = -1;\n" -" \n" -" int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" -" \n" -" int dstIdx;\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" if (dstIdx<contactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" -" c->m_worldNormalOnB = -normal;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = concavePairsIn[pairIndex].x;\n" -" int bodyB = concavePairsIn[pairIndex].y;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -" c->m_childIndexA = childShapeIndexA;\n" -" c->m_childIndexB = childShapeIndexB;\n" -" for (int i=0;i<nReducedContacts;i++)\n" -" {\n" -" c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" -" }\n" -" GET_NPOINTS(*c) = nReducedContacts;\n" -" }\n" -" \n" -" }// if (numContactsOut>0)\n" -" }// if (i<numPairs)\n" -"}\n" -"int findClippingFaces(const float4 separatingNormal,\n" -" __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" -" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -" __global float4* worldVertsA1,\n" -" __global float4* worldNormalsA1,\n" -" __global float4* worldVertsB1,\n" -" int capacityWorldVerts,\n" -" const float minDist, float maxDist,\n" -" __global const float4* vertices,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" __global int4* clippingFaces, int pairIndex)\n" -"{\n" -" int numContactsOut = 0;\n" -" int numWorldVertsB1= 0;\n" -" \n" -" \n" -" int closestFaceB=-1;\n" -" float dmax = -FLT_MAX;\n" -" \n" -" {\n" -" for(int face=0;face<hullB->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" -" faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 WorldNormal = qtRotate(ornB, Normal);\n" -" float d = dot3F4(WorldNormal,separatingNormal);\n" -" if (d > dmax)\n" -" {\n" -" dmax = d;\n" -" closestFaceB = face;\n" -" }\n" -" }\n" -" }\n" -" \n" -" {\n" -" const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" -" const int numVertices = polyB.m_numIndices;\n" -" for(int e0=0;e0<numVertices;e0++)\n" -" {\n" -" const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" -" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -" }\n" -" }\n" -" \n" -" int closestFaceA=-1;\n" -" {\n" -" float dmin = FLT_MAX;\n" -" for(int face=0;face<hullA->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(\n" -" faces[hullA->m_faceOffset+face].m_plane.x,\n" -" faces[hullA->m_faceOffset+face].m_plane.y,\n" -" faces[hullA->m_faceOffset+face].m_plane.z,\n" -" 0.f);\n" -" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -" \n" -" float d = dot3F4(faceANormalWS,separatingNormal);\n" -" if (d < dmin)\n" -" {\n" -" dmin = d;\n" -" closestFaceA = face;\n" -" worldNormalsA1[pairIndex] = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" \n" -" int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -" for(int e0=0;e0<numVerticesA;e0++)\n" -" {\n" -" const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -" }\n" -" \n" -" clippingFaces[pairIndex].x = closestFaceA;\n" -" clippingFaces[pairIndex].y = closestFaceB;\n" -" clippingFaces[pairIndex].z = numVerticesA;\n" -" clippingFaces[pairIndex].w = numWorldVertsB1;\n" -" \n" -" \n" -" return numContactsOut;\n" -"}\n" -"int clipFaces(__global float4* worldVertsA1,\n" -" __global float4* worldNormalsA1,\n" -" __global float4* worldVertsB1,\n" -" __global float4* worldVertsB2, \n" -" int capacityWorldVertsB2,\n" -" const float minDist, float maxDist,\n" -" __global int4* clippingFaces,\n" -" int pairIndex)\n" -"{\n" -" int numContactsOut = 0;\n" -" \n" -" int closestFaceA = clippingFaces[pairIndex].x;\n" -" int closestFaceB = clippingFaces[pairIndex].y;\n" -" int numVertsInA = clippingFaces[pairIndex].z;\n" -" int numVertsInB = clippingFaces[pairIndex].w;\n" -" \n" -" int numVertsOut = 0;\n" -" \n" -" if (closestFaceA<0)\n" -" return numContactsOut;\n" -" \n" -" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" -" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" -" \n" -" \n" -" \n" -" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -" \n" -" for(int e0=0;e0<numVertsInA;e0++)\n" -" {\n" -" const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" -" const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" -" const float4 WorldEdge0 = aw - bw;\n" -" float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" -" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -" float4 worldA1 = aw;\n" -" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -" float4 planeNormalWS = planeNormalWS1;\n" -" float planeEqWS=planeEqWS1;\n" -" numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" -" __global float4* tmp = pVtxOut;\n" -" pVtxOut = pVtxIn;\n" -" pVtxIn = tmp;\n" -" numVertsInB = numVertsOut;\n" -" numVertsOut = 0;\n" -" }\n" -" \n" -" //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -" //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -" \n" -" /*for (int i=0;i<numVertsInB;i++)\n" -" {\n" -" pVtxOut[i] = pVtxIn[i];\n" -" }*/\n" -" \n" -" \n" -" \n" -" \n" -" //numVertsInB=0;\n" -" \n" -" float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -" float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -" for (int i=0;i<numVertsInB;i++)\n" -" {\n" -" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -" if (depth <=minDist)\n" -" {\n" -" depth = minDist;\n" -" }\n" -" \n" -" if (depth <=maxDist)\n" -" {\n" -" float4 pointInWorld = pVtxIn[i];\n" -" pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -" }\n" -" }\n" -" \n" -" clippingFaces[pairIndex].w =numContactsOut;\n" -" \n" -" \n" -" return numContactsOut;\n" -"}\n" -"__kernel void findClippingFacesKernel( __global const int4* pairs,\n" -" __global const b3RigidBodyData_t* rigidBodies,\n" -" __global const b3Collidable_t* collidables,\n" -" __global const b3ConvexPolyhedronData_t* convexShapes,\n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const b3GpuFace_t* faces,\n" -" __global const int* indices,\n" -" __global const float4* separatingNormals,\n" -" __global const int* hasSeparatingAxis,\n" -" __global int4* clippingFacesOut,\n" -" __global float4* worldVertsA1,\n" -" __global float4* worldNormalsA1,\n" -" __global float4* worldVertsB1,\n" -" int capacityWorldVerts,\n" -" int numPairs\n" -" )\n" -"{\n" -" \n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" \n" -" if (hasSeparatingAxis[i])\n" -" {\n" -" \n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" \n" -" int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" -" &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" -" rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" -" rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" -" worldVertsA1,\n" -" worldNormalsA1,\n" -" worldVertsB1,capacityWorldVerts,\n" -" minDist, maxDist,\n" -" vertices,faces,indices,\n" -" clippingFacesOut,i);\n" -" \n" -" \n" -" }// if (hasSeparatingAxis[i])\n" -" }// if (i<numPairs)\n" -" \n" -"}\n" -"__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,\n" -" __global const int* hasSeparatingAxis,\n" -" __global int4* clippingFacesOut,\n" -" __global float4* worldVertsA1,\n" -" __global float4* worldNormalsA1,\n" -" __global float4* worldVertsB1,\n" -" __global float4* worldVertsB2,\n" -" int vertexFaceCapacity,\n" -" int numPairs,\n" -" int debugMode\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" \n" -" if (hasSeparatingAxis[i])\n" -" {\n" -" \n" -"// int bodyIndexA = pairs[i].x;\n" -" // int bodyIndexB = pairs[i].y;\n" -" \n" -" int numLocalContactsOut = 0;\n" -" int capacityWorldVertsB2 = vertexFaceCapacity;\n" -" \n" -" __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" -" __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" -" \n" -" {\n" -" __global int4* clippingFaces = clippingFacesOut;\n" -" \n" -" \n" -" int closestFaceA = clippingFaces[pairIndex].x;\n" -" int closestFaceB = clippingFaces[pairIndex].y;\n" -" int numVertsInA = clippingFaces[pairIndex].z;\n" -" int numVertsInB = clippingFaces[pairIndex].w;\n" -" \n" -" int numVertsOut = 0;\n" -" \n" -" if (closestFaceA>=0)\n" -" {\n" -" \n" -" \n" -" \n" -" // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" -" \n" -" for(int e0=0;e0<numVertsInA;e0++)\n" -" {\n" -" const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" -" const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" -" const float4 WorldEdge0 = aw - bw;\n" -" float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" -" float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" -" float4 worldA1 = aw;\n" -" float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" -" float4 planeNormalWS = planeNormalWS1;\n" -" float planeEqWS=planeEqWS1;\n" -" numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" -" __global float4* tmp = pVtxOut;\n" -" pVtxOut = pVtxIn;\n" -" pVtxIn = tmp;\n" -" numVertsInB = numVertsOut;\n" -" numVertsOut = 0;\n" -" }\n" -" \n" -" float4 planeNormalWS = worldNormalsA1[pairIndex];\n" -" float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" -" \n" -" for (int i=0;i<numVertsInB;i++)\n" -" {\n" -" float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" -" if (depth <=minDist)\n" -" {\n" -" depth = minDist;\n" -" }\n" -" \n" -" if (depth <=maxDist)\n" -" {\n" -" float4 pointInWorld = pVtxIn[i];\n" -" pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" -" }\n" -" }\n" -" \n" -" }\n" -" clippingFaces[pairIndex].w =numLocalContactsOut;\n" -" \n" -" }\n" -" \n" -" for (int i=0;i<numLocalContactsOut;i++)\n" -" pVtxIn[i] = pVtxOut[i];\n" -" \n" -" }// if (hasSeparatingAxis[i])\n" -" }// if (i<numPairs)\n" -" \n" -"}\n" -"__kernel void newContactReductionKernel( __global int4* pairs,\n" -" __global const b3RigidBodyData_t* rigidBodies,\n" -" __global const float4* separatingNormals,\n" -" __global const int* hasSeparatingAxis,\n" -" __global struct b3Contact4Data* globalContactsOut,\n" -" __global int4* clippingFaces,\n" -" __global float4* worldVertsB2,\n" -" volatile __global int* nGlobalContactsOut,\n" -" int vertexFaceCapacity,\n" -" int contactCapacity,\n" -" int numPairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" int pairIndex = i;\n" -" \n" -" int4 contactIdx;\n" -" contactIdx=make_int4(0,1,2,3);\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" \n" -" if (hasSeparatingAxis[i])\n" -" {\n" -" \n" -" \n" -" \n" -" \n" -" int nPoints = clippingFaces[pairIndex].w;\n" -" \n" -" if (nPoints>0)\n" -" {\n" -" __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" -" float4 normal = -separatingNormals[i];\n" -" \n" -" int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" -" \n" -" int mprContactIndex = pairs[pairIndex].z;\n" -" int dstIdx = mprContactIndex;\n" -" if (dstIdx<0)\n" -" {\n" -" AppendInc( nGlobalContactsOut, dstIdx );\n" -" }\n" -"//#if 0\n" -" \n" -" if (dstIdx < contactCapacity)\n" -" {\n" -" __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" -" c->m_worldNormalOnB = -normal;\n" -" c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" -" c->m_batchIdx = pairIndex;\n" -" int bodyA = pairs[pairIndex].x;\n" -" int bodyB = pairs[pairIndex].y;\n" -" pairs[pairIndex].w = dstIdx;\n" -" c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" -" c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" -" c->m_childIndexA =-1;\n" -" c->m_childIndexB =-1;\n" -" switch (nReducedContacts)\n" -" {\n" -" case 4:\n" -" c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" -" case 3:\n" -" c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" -" case 2:\n" -" c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" -" case 1:\n" -" if (mprContactIndex<0)//test\n" -" c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" -" default:\n" -" {\n" -" }\n" -" };\n" -" \n" -" GET_NPOINTS(*c) = nReducedContacts;\n" -" \n" -" }\n" -" \n" -" \n" -"//#endif\n" -" \n" -" }// if (numContactsOut>0)\n" -" }// if (hasSeparatingAxis[i])\n" -" }// if (i<numPairs)\n" -" \n" -" \n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl deleted file mode 100644 index 31ca43b8cd..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl +++ /dev/null @@ -1,1220 +0,0 @@ - -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 - -#define B3_MAX_STACK_DEPTH 256 - - -typedef unsigned int u32; - -///keep this in sync with btCollidable.h -typedef struct -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - union - { - float m_radius; - int m_compoundBvhIndex; - }; - - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} b3QuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - - -int getTriangleIndex(const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeafNode(const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} b3BvhSubtreeInfo; - - - - - - - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -} ConvexPolyhedronCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int2.h" - - - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define make_float4 (float4) - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); - - -// float4 a1 = make_float4(a.xyz,0.f); -// float4 b1 = make_float4(b.xyz,0.f); - -// return cross(a1,b1); - -//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); - - // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); - - //return c; -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = make_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - -inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0<d1 ? d0:d1; - return true; -} - - - - -inline bool IsAlmostZero(const float4 v) -{ - if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) - return false; - return true; -} - - - -bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - -bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* verticesA, - __global const float4* uniqueEdgesA, - __global const btGpuFace* facesA, - __global const int* indicesA, - const float4* verticesB, - const float4* uniqueEdgesB, - const btGpuFace* facesB, - const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -inline int findClippingFaces(const float4 separatingNormal, - const ConvexPolyhedronCL* hullA, - __global const ConvexPolyhedronCL* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=0; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; - int numVertices = polyB.m_numIndices; - if (numVertices>capacityWorldVerts) - numVertices = capacityWorldVerts; - if (numVertices<0) - numVertices = 0; - - for(int e0=0;e0<numVertices;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - } - - int closestFaceA=0; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; - if (numVerticesA>capacityWorldVerts) - numVerticesA = capacityWorldVerts; - if (numVerticesA<0) - numVerticesA=0; - - for(int e0=0;e0<numVerticesA;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - __global float* dmins, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - concaveHasSeparatingNormals[i] = 0; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& - collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - float4 c0local = convexPolyhedronA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - - bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA, - posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces,indices, - verticesA,uniqueEdgesA,facesA,indicesA, - &sepAxis,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else - { - hasSeparatingAxis = 1; - } - } - - if (hasSeparatingAxis) - { - dmins[i] = dmin; - concaveSeparatingNormalsOut[pairIdx]=sepAxis; - concaveHasSeparatingNormals[i]=1; - - } else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - __global float* dmins, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - if (!concaveHasSeparatingNormals[i]) - return; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = dmins[i]; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - sepAxis = concaveSeparatingNormalsOut[pairIdx]; - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - float4 c0local = convexPolyhedronA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - - { - bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - - if (!sepEE) - { - hasSeparatingAxis = 0; - } else - { - hasSeparatingAxis = 1; - } - } - - - if (hasSeparatingAxis) - { - sepAxis.w = dmin; - dmins[i] = dmin; - concaveSeparatingNormalsOut[pairIdx]=sepAxis; - concaveHasSeparatingNormals[i]=1; - - float minDist = -1e30f; - float maxDist = 0.02f; - - - findClippingFaces(sepAxis, - &convexPolyhedronA, - &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsA1GPU, - worldNormalsAGPU, - worldVertsB1GPU, - vertexFaceCapacity, - minDist, maxDist, - verticesA, - facesA, - indicesA, - vertices, - faces, - indices, - clippingFacesOut, pairIdx); - - - } else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - - concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h deleted file mode 100644 index 611569cacf..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h +++ /dev/null @@ -1,1457 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satConcaveKernelsCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define B3_MAX_STACK_DEPTH 256\n" -"typedef unsigned int u32;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" union\n" -" {\n" -" float m_radius;\n" -" int m_compoundBvhIndex;\n" -" };\n" -" \n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -" \n" -"} btCollidableGpu;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -" //12 bytes\n" -" unsigned short int m_quantizedAabbMin[3];\n" -" unsigned short int m_quantizedAabbMax[3];\n" -" //4 bytes\n" -" int m_escapeIndexOrTriangleIndex;\n" -"} b3QuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -" float4 m_aabbMin;\n" -" float4 m_aabbMax;\n" -" float4 m_quantization;\n" -" int m_numNodes;\n" -" int m_numSubTrees;\n" -" int m_nodeOffset;\n" -" int m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -" \n" -"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -" //12 bytes\n" -" unsigned short int m_quantizedAabbMin[3];\n" -" unsigned short int m_quantizedAabbMax[3];\n" -" //4 bytes, points to the root of the subtree\n" -" int m_rootNodeIndex;\n" -" //4 bytes\n" -" int m_subtreeSize;\n" -" int m_padding[3];\n" -"} b3BvhSubtreeInfo;\n" -"typedef struct\n" -"{\n" -" float4 m_childPosition;\n" -" float4 m_childOrientation;\n" -" int m_shapeIndex;\n" -" int m_unused0;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct \n" -"{\n" -" float4 m_localCenter;\n" -" float4 m_extents;\n" -" float4 mC;\n" -" float4 mE;\n" -" \n" -" float m_radius;\n" -" int m_faceOffset;\n" -" int m_numFaces;\n" -" int m_numVertices;\n" -" int m_vertexOffset;\n" -" int m_uniqueEdgesOffset;\n" -" int m_numUniqueEdges;\n" -" int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} btAabbCL;\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -" union\n" -" {\n" -" float m_min[4];\n" -" b3Float4 m_minVec;\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float m_max[4];\n" -" b3Float4 m_maxVec;\n" -" int m_signedMaxIndices[4];\n" -" };\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -" b3Float4ConstArg pos,\n" -" b3QuatConstArg orn,\n" -" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -" b3Mat3x3 m;\n" -" m = b3QuatGetRotationMatrix(orn);\n" -" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -" \n" -" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -" 0.f);\n" -" *aabbMinOut = center-extent;\n" -" *aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"#ifndef B3_INT2_H\n" -"#define B3_INT2_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define b3UnsignedInt2 uint2\n" -"#define b3Int2 int2\n" -"#define b3MakeInt2 (int2)\n" -"#endif //__cplusplus\n" -"#endif\n" -"typedef struct\n" -"{\n" -" float4 m_plane;\n" -" int m_indexOffset;\n" -" int m_numIndices;\n" -"} btGpuFace;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -" \n" -"// float4 a1 = make_float4(a.xyz,0.f);\n" -"// float4 b1 = make_float4(b.xyz,0.f);\n" -"// return cross(a1,b1);\n" -"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" -" \n" -" // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" -" \n" -" //return c;\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" v = make_float4(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -" return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"}\n" -"inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" -"const float4* dir, const float4* vertices, float* min, float* max)\n" -"{\n" -" min[0] = FLT_MAX;\n" -" max[0] = -FLT_MAX;\n" -" int numVerts = hull->m_numVertices;\n" -" const float4 localDir = qtInvRotate(orn,*dir);\n" -" float offset = dot(pos,*dir);\n" -" for(int i=0;i<numVerts;i++)\n" -" {\n" -" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -" if(dp < min[0]) \n" -" min[0] = dp;\n" -" if(dp > max[0]) \n" -" max[0] = dp;\n" -" }\n" -" if(min[0]>max[0])\n" -" {\n" -" float tmp = min[0];\n" -" min[0] = max[0];\n" -" max[0] = tmp;\n" -" }\n" -" min[0] += offset;\n" -" max[0] += offset;\n" -"}\n" -"inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -" min[0] = FLT_MAX;\n" -" max[0] = -FLT_MAX;\n" -" int numVerts = hull->m_numVertices;\n" -" const float4 localDir = qtInvRotate(orn,*dir);\n" -" float offset = dot(pos,*dir);\n" -" for(int i=0;i<numVerts;i++)\n" -" {\n" -" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -" if(dp < min[0]) \n" -" min[0] = dp;\n" -" if(dp > max[0]) \n" -" max[0] = dp;\n" -" }\n" -" if(min[0]>max[0])\n" -" {\n" -" float tmp = min[0];\n" -" min[0] = max[0];\n" -" max[0] = tmp;\n" -" }\n" -" min[0] += offset;\n" -" max[0] += offset;\n" -"}\n" -"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA,const float4 ornA,\n" -" const float4 posB,const float4 ornB,\n" -" float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" -"{\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" -" project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" -" if(Max0<Min1 || Max1<Min0)\n" -" return false;\n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" *depth = d0<d1 ? d0:d1;\n" -" return true;\n" -"}\n" -"inline bool IsAlmostZero(const float4 v)\n" -"{\n" -" if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" -" return false;\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" \n" -" const float4* verticesA, \n" -" const float4* uniqueEdgesA, \n" -" const btGpuFace* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB, \n" -" __global const float4* uniqueEdgesB, \n" -" __global const btGpuFace* facesB,\n" -" __global const int* indicesB,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" \n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" {\n" -" int numFacesA = hullA->m_numFaces;\n" -" // Test normals from hullA\n" -" for(int i=0;i<numFacesA;i++)\n" -" {\n" -" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -" float4 faceANormalWS = qtRotate(ornA,normal);\n" -" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -" faceANormalWS*=-1.f;\n" -" curPlaneTests++;\n" -" float d;\n" -" if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" -" return false;\n" -" if(d<*dmin)\n" -" {\n" -" *dmin = d;\n" -" *sep = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" __global const float4* verticesA, \n" -" __global const float4* uniqueEdgesA, \n" -" __global const btGpuFace* facesA,\n" -" __global const int* indicesA,\n" -" const float4* verticesB,\n" -" const float4* uniqueEdgesB, \n" -" const btGpuFace* facesB,\n" -" const int* indicesB,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" {\n" -" int numFacesA = hullA->m_numFaces;\n" -" // Test normals from hullA\n" -" for(int i=0;i<numFacesA;i++)\n" -" {\n" -" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -" float4 faceANormalWS = qtRotate(ornA,normal);\n" -" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -" faceANormalWS *= -1.f;\n" -" curPlaneTests++;\n" -" float d;\n" -" if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" -" return false;\n" -" if(d<*dmin)\n" -" {\n" -" *dmin = d;\n" -" *sep = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" const float4* verticesA, \n" -" const float4* uniqueEdgesA, \n" -" const btGpuFace* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB, \n" -" __global const float4* uniqueEdgesB, \n" -" __global const btGpuFace* facesB,\n" -" __global const int* indicesB,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" int curEdgeEdge = 0;\n" -" // Test edges\n" -" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -" {\n" -" const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" -" float4 edge0World = qtRotate(ornA,edge0);\n" -" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -" {\n" -" const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" -" float4 edge1World = qtRotate(ornB,edge1);\n" -" float4 crossje = cross3(edge0World,edge1World);\n" -" curEdgeEdge++;\n" -" if(!IsAlmostZero(crossje))\n" -" {\n" -" crossje = normalize3(crossje);\n" -" if (dot3F4(DeltaC2,crossje)<0)\n" -" crossje *= -1.f;\n" -" float dist;\n" -" bool result = true;\n" -" {\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" -" project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" -" \n" -" if(Max0<Min1 || Max1<Min0)\n" -" result = false;\n" -" \n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" dist = d0<d1 ? d0:d1;\n" -" result = true;\n" -" }\n" -" \n" -" if(dist<*dmin)\n" -" {\n" -" *dmin = dist;\n" -" *sep = crossje;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" \n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"inline int findClippingFaces(const float4 separatingNormal,\n" -" const ConvexPolyhedronCL* hullA, \n" -" __global const ConvexPolyhedronCL* hullB,\n" -" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -" __global float4* worldVertsA1,\n" -" __global float4* worldNormalsA1,\n" -" __global float4* worldVertsB1,\n" -" int capacityWorldVerts,\n" -" const float minDist, float maxDist,\n" -" const float4* verticesA,\n" -" const btGpuFace* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB,\n" -" __global const btGpuFace* facesB,\n" -" __global const int* indicesB,\n" -" __global int4* clippingFaces, int pairIndex)\n" -"{\n" -" int numContactsOut = 0;\n" -" int numWorldVertsB1= 0;\n" -" \n" -" \n" -" int closestFaceB=0;\n" -" float dmax = -FLT_MAX;\n" -" \n" -" {\n" -" for(int face=0;face<hullB->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" -" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 WorldNormal = qtRotate(ornB, Normal);\n" -" float d = dot3F4(WorldNormal,separatingNormal);\n" -" if (d > dmax)\n" -" {\n" -" dmax = d;\n" -" closestFaceB = face;\n" -" }\n" -" }\n" -" }\n" -" \n" -" {\n" -" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -" int numVertices = polyB.m_numIndices;\n" -" if (numVertices>capacityWorldVerts)\n" -" numVertices = capacityWorldVerts;\n" -" if (numVertices<0)\n" -" numVertices = 0;\n" -" \n" -" for(int e0=0;e0<numVertices;e0++)\n" -" {\n" -" if (e0<capacityWorldVerts)\n" -" {\n" -" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -" }\n" -" }\n" -" }\n" -" \n" -" int closestFaceA=0;\n" -" {\n" -" float dmin = FLT_MAX;\n" -" for(int face=0;face<hullA->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(\n" -" facesA[hullA->m_faceOffset+face].m_plane.x,\n" -" facesA[hullA->m_faceOffset+face].m_plane.y,\n" -" facesA[hullA->m_faceOffset+face].m_plane.z,\n" -" 0.f);\n" -" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -" \n" -" float d = dot3F4(faceANormalWS,separatingNormal);\n" -" if (d < dmin)\n" -" {\n" -" dmin = d;\n" -" closestFaceA = face;\n" -" worldNormalsA1[pairIndex] = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" \n" -" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -" if (numVerticesA>capacityWorldVerts)\n" -" numVerticesA = capacityWorldVerts;\n" -" if (numVerticesA<0)\n" -" numVerticesA=0;\n" -" \n" -" for(int e0=0;e0<numVerticesA;e0++)\n" -" {\n" -" if (e0<capacityWorldVerts)\n" -" {\n" -" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -" }\n" -" }\n" -" \n" -" clippingFaces[pairIndex].x = closestFaceA;\n" -" clippingFaces[pairIndex].y = closestFaceB;\n" -" clippingFaces[pairIndex].z = numVerticesA;\n" -" clippingFaces[pairIndex].w = numWorldVertsB1;\n" -" \n" -" \n" -" return numContactsOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" -" __global const BodyData* rigidBodies,\n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes,\n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global const btGpuChildShape* gpuChildShapes,\n" -" __global btAabbCL* aabbs,\n" -" __global float4* concaveSeparatingNormalsOut,\n" -" __global int* concaveHasSeparatingNormals,\n" -" __global int4* clippingFacesOut,\n" -" __global float4* worldVertsA1GPU,\n" -" __global float4* worldNormalsAGPU,\n" -" __global float4* worldVertsB1GPU,\n" -" __global float* dmins,\n" -" int vertexFaceCapacity,\n" -" int numConcavePairs\n" -" )\n" -"{\n" -" \n" -" int i = get_global_id(0);\n" -" if (i>=numConcavePairs)\n" -" return;\n" -" \n" -" concaveHasSeparatingNormals[i] = 0;\n" -" \n" -" int pairIdx = i;\n" -" \n" -" int bodyIndexA = concavePairs[i].x;\n" -" int bodyIndexB = concavePairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" -" collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" concavePairs[pairIdx].w = -1;\n" -" return;\n" -" }\n" -" \n" -" \n" -" \n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" int numActualConcaveConvexTests = 0;\n" -" \n" -" int f = concavePairs[i].z;\n" -" \n" -" bool overlap = false;\n" -" \n" -" ConvexPolyhedronCL convexPolyhedronA;\n" -" \n" -" //add 3 vertices of the triangle\n" -" convexPolyhedronA.m_numVertices = 3;\n" -" convexPolyhedronA.m_vertexOffset = 0;\n" -" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -" \n" -" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -" float4 triMinAabb, triMaxAabb;\n" -" btAabbCL triAabb;\n" -" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -" \n" -" float4 verticesA[3];\n" -" for (int i=0;i<3;i++)\n" -" {\n" -" int index = indices[face.m_indexOffset+i];\n" -" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -" verticesA[i] = vert;\n" -" localCenter += vert;\n" -" \n" -" triAabb.m_min = min(triAabb.m_min,vert);\n" -" triAabb.m_max = max(triAabb.m_max,vert);\n" -" \n" -" }\n" -" \n" -" overlap = true;\n" -" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -" \n" -" if (overlap)\n" -" {\n" -" float dmin = FLT_MAX;\n" -" int hasSeparatingAxis=5;\n" -" float4 sepAxis=make_float4(1,2,3,4);\n" -" \n" -" int localCC=0;\n" -" numActualConcaveConvexTests++;\n" -" \n" -" //a triangle has 3 unique edges\n" -" convexPolyhedronA.m_numUniqueEdges = 3;\n" -" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -" float4 uniqueEdgesA[3];\n" -" \n" -" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -" \n" -" \n" -" convexPolyhedronA.m_faceOffset = 0;\n" -" \n" -" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -" \n" -" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -" int indicesA[3+3+2+2+2];\n" -" int curUsedIndices=0;\n" -" int fidx=0;\n" -" \n" -" //front size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[0] = 0;\n" -" indicesA[1] = 1;\n" -" indicesA[2] = 2;\n" -" curUsedIndices+=3;\n" -" float c = face.m_plane.w;\n" -" facesA[fidx].m_plane.x = normal.x;\n" -" facesA[fidx].m_plane.y = normal.y;\n" -" facesA[fidx].m_plane.z = normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" //back size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[3]=2;\n" -" indicesA[4]=1;\n" -" indicesA[5]=0;\n" -" curUsedIndices+=3;\n" -" float c = dot(normal,verticesA[0]);\n" -" float c1 = -face.m_plane.w;\n" -" facesA[fidx].m_plane.x = -normal.x;\n" -" facesA[fidx].m_plane.y = -normal.y;\n" -" facesA[fidx].m_plane.z = -normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" \n" -" bool addEdgePlanes = true;\n" -" if (addEdgePlanes)\n" -" {\n" -" int numVertices=3;\n" -" int prevVertex = numVertices-1;\n" -" for (int i=0;i<numVertices;i++)\n" -" {\n" -" float4 v0 = verticesA[i];\n" -" float4 v1 = verticesA[prevVertex];\n" -" \n" -" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -" float c = -dot(edgeNormal,v0);\n" -" \n" -" facesA[fidx].m_numIndices = 2;\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[curUsedIndices++]=i;\n" -" indicesA[curUsedIndices++]=prevVertex;\n" -" \n" -" facesA[fidx].m_plane.x = edgeNormal.x;\n" -" facesA[fidx].m_plane.y = edgeNormal.y;\n" -" facesA[fidx].m_plane.z = edgeNormal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" fidx++;\n" -" prevVertex = i;\n" -" }\n" -" }\n" -" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -" \n" -" \n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" \n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" \n" -" \n" -" \n" -" \n" -" ///////////////////\n" -" ///compound shape support\n" -" \n" -" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" int compoundChild = concavePairs[pairIdx].w;\n" -" int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -" }\n" -" //////////////////\n" -" \n" -" float4 c0local = convexPolyhedronA.m_localCenter;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" \n" -" \n" -" bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" verticesA,uniqueEdgesA,facesA,indicesA,\n" -" vertices,uniqueEdges,faces,indices,\n" -" &sepAxis,&dmin);\n" -" hasSeparatingAxis = 4;\n" -" if (!sepA)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA,\n" -" posB,ornB,\n" -" posA,ornA,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,indices,\n" -" verticesA,uniqueEdgesA,facesA,indicesA,\n" -" &sepAxis,&dmin);\n" -" \n" -" if (!sepB)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis = 1;\n" -" }\n" -" } \n" -" \n" -" if (hasSeparatingAxis)\n" -" {\n" -" dmins[i] = dmin;\n" -" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -" concaveHasSeparatingNormals[i]=1;\n" -" \n" -" } else\n" -" { \n" -" //mark this pair as in-active\n" -" concavePairs[pairIdx].w = -1;\n" -" }\n" -" }\n" -" else\n" -" { \n" -" //mark this pair as in-active\n" -" concavePairs[pairIdx].w = -1;\n" -" }\n" -"}\n" -"// work-in-progress\n" -"__kernel void findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" -" __global const BodyData* rigidBodies,\n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes,\n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global const btGpuChildShape* gpuChildShapes,\n" -" __global btAabbCL* aabbs,\n" -" __global float4* concaveSeparatingNormalsOut,\n" -" __global int* concaveHasSeparatingNormals,\n" -" __global int4* clippingFacesOut,\n" -" __global float4* worldVertsA1GPU,\n" -" __global float4* worldNormalsAGPU,\n" -" __global float4* worldVertsB1GPU,\n" -" __global float* dmins,\n" -" int vertexFaceCapacity,\n" -" int numConcavePairs\n" -" )\n" -"{\n" -" \n" -" int i = get_global_id(0);\n" -" if (i>=numConcavePairs)\n" -" return;\n" -" \n" -" if (!concaveHasSeparatingNormals[i])\n" -" return;\n" -" \n" -" int pairIdx = i;\n" -" \n" -" int bodyIndexA = concavePairs[i].x;\n" -" int bodyIndexB = concavePairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" int numActualConcaveConvexTests = 0;\n" -" \n" -" int f = concavePairs[i].z;\n" -" \n" -" bool overlap = false;\n" -" \n" -" ConvexPolyhedronCL convexPolyhedronA;\n" -" \n" -" //add 3 vertices of the triangle\n" -" convexPolyhedronA.m_numVertices = 3;\n" -" convexPolyhedronA.m_vertexOffset = 0;\n" -" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -" \n" -" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -" float4 triMinAabb, triMaxAabb;\n" -" btAabbCL triAabb;\n" -" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -" \n" -" float4 verticesA[3];\n" -" for (int i=0;i<3;i++)\n" -" {\n" -" int index = indices[face.m_indexOffset+i];\n" -" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -" verticesA[i] = vert;\n" -" localCenter += vert;\n" -" \n" -" triAabb.m_min = min(triAabb.m_min,vert);\n" -" triAabb.m_max = max(triAabb.m_max,vert);\n" -" \n" -" }\n" -" \n" -" overlap = true;\n" -" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -" \n" -" if (overlap)\n" -" {\n" -" float dmin = dmins[i];\n" -" int hasSeparatingAxis=5;\n" -" float4 sepAxis=make_float4(1,2,3,4);\n" -" sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" -" \n" -" int localCC=0;\n" -" numActualConcaveConvexTests++;\n" -" \n" -" //a triangle has 3 unique edges\n" -" convexPolyhedronA.m_numUniqueEdges = 3;\n" -" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -" float4 uniqueEdgesA[3];\n" -" \n" -" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -" \n" -" \n" -" convexPolyhedronA.m_faceOffset = 0;\n" -" \n" -" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -" \n" -" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -" int indicesA[3+3+2+2+2];\n" -" int curUsedIndices=0;\n" -" int fidx=0;\n" -" \n" -" //front size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[0] = 0;\n" -" indicesA[1] = 1;\n" -" indicesA[2] = 2;\n" -" curUsedIndices+=3;\n" -" float c = face.m_plane.w;\n" -" facesA[fidx].m_plane.x = normal.x;\n" -" facesA[fidx].m_plane.y = normal.y;\n" -" facesA[fidx].m_plane.z = normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" //back size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[3]=2;\n" -" indicesA[4]=1;\n" -" indicesA[5]=0;\n" -" curUsedIndices+=3;\n" -" float c = dot(normal,verticesA[0]);\n" -" float c1 = -face.m_plane.w;\n" -" facesA[fidx].m_plane.x = -normal.x;\n" -" facesA[fidx].m_plane.y = -normal.y;\n" -" facesA[fidx].m_plane.z = -normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" \n" -" bool addEdgePlanes = true;\n" -" if (addEdgePlanes)\n" -" {\n" -" int numVertices=3;\n" -" int prevVertex = numVertices-1;\n" -" for (int i=0;i<numVertices;i++)\n" -" {\n" -" float4 v0 = verticesA[i];\n" -" float4 v1 = verticesA[prevVertex];\n" -" \n" -" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -" float c = -dot(edgeNormal,v0);\n" -" \n" -" facesA[fidx].m_numIndices = 2;\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[curUsedIndices++]=i;\n" -" indicesA[curUsedIndices++]=prevVertex;\n" -" \n" -" facesA[fidx].m_plane.x = edgeNormal.x;\n" -" facesA[fidx].m_plane.y = edgeNormal.y;\n" -" facesA[fidx].m_plane.z = edgeNormal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" fidx++;\n" -" prevVertex = i;\n" -" }\n" -" }\n" -" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -" \n" -" \n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" \n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" \n" -" \n" -" \n" -" \n" -" ///////////////////\n" -" ///compound shape support\n" -" \n" -" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" int compoundChild = concavePairs[pairIdx].w;\n" -" int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -" }\n" -" //////////////////\n" -" \n" -" float4 c0local = convexPolyhedronA.m_localCenter;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" \n" -" \n" -" {\n" -" bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" verticesA,uniqueEdgesA,facesA,indicesA,\n" -" vertices,uniqueEdges,faces,indices,\n" -" &sepAxis,&dmin);\n" -" \n" -" if (!sepEE)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis = 1;\n" -" }\n" -" }\n" -" \n" -" \n" -" if (hasSeparatingAxis)\n" -" {\n" -" sepAxis.w = dmin;\n" -" dmins[i] = dmin;\n" -" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -" concaveHasSeparatingNormals[i]=1;\n" -" \n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" \n" -" findClippingFaces(sepAxis,\n" -" &convexPolyhedronA,\n" -" &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" worldVertsA1GPU,\n" -" worldNormalsAGPU,\n" -" worldVertsB1GPU,\n" -" vertexFaceCapacity,\n" -" minDist, maxDist,\n" -" verticesA,\n" -" facesA,\n" -" indicesA,\n" -" vertices,\n" -" faces,\n" -" indices,\n" -" clippingFacesOut, pairIdx);\n" -" \n" -" \n" -" } else\n" -" { \n" -" //mark this pair as in-active\n" -" concavePairs[pairIdx].w = -1;\n" -" }\n" -" }\n" -" else\n" -" { \n" -" //mark this pair as in-active\n" -" concavePairs[pairIdx].w = -1;\n" -" }\n" -" \n" -" concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h deleted file mode 100644 index 6f8b0a90db..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ /dev/null @@ -1,2104 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satKernelsCL= \ -"//keep this enum in sync with the CPU version (in btCollidable.h)\n" -"//written by Erwin Coumans\n" -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define TRIANGLE_NUM_CONVEX_FACES 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define B3_MAX_STACK_DEPTH 256\n" -"typedef unsigned int u32;\n" -"///keep this in sync with btCollidable.h\n" -"typedef struct\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" union\n" -" {\n" -" float m_radius;\n" -" int m_compoundBvhIndex;\n" -" };\n" -" \n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -" \n" -"} btCollidableGpu;\n" -"#define MAX_NUM_PARTS_IN_BITS 10\n" -"///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" -"///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" -"typedef struct\n" -"{\n" -" //12 bytes\n" -" unsigned short int m_quantizedAabbMin[3];\n" -" unsigned short int m_quantizedAabbMax[3];\n" -" //4 bytes\n" -" int m_escapeIndexOrTriangleIndex;\n" -"} b3QuantizedBvhNode;\n" -"typedef struct\n" -"{\n" -" float4 m_aabbMin;\n" -" float4 m_aabbMax;\n" -" float4 m_quantization;\n" -" int m_numNodes;\n" -" int m_numSubTrees;\n" -" int m_nodeOffset;\n" -" int m_subTreeOffset;\n" -"} b3BvhInfo;\n" -"int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" unsigned int x=0;\n" -" unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" -" // Get only the lower bits where the triangle index is stored\n" -" return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" -"}\n" -"int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -"int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" -" return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" -"}\n" -" \n" -"int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" -"{\n" -" return -rootNode->m_escapeIndexOrTriangleIndex;\n" -"}\n" -"typedef struct\n" -"{\n" -" //12 bytes\n" -" unsigned short int m_quantizedAabbMin[3];\n" -" unsigned short int m_quantizedAabbMax[3];\n" -" //4 bytes, points to the root of the subtree\n" -" int m_rootNodeIndex;\n" -" //4 bytes\n" -" int m_subtreeSize;\n" -" int m_padding[3];\n" -"} b3BvhSubtreeInfo;\n" -"typedef struct\n" -"{\n" -" float4 m_childPosition;\n" -" float4 m_childOrientation;\n" -" int m_shapeIndex;\n" -" int m_unused0;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"} btGpuChildShape;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} BodyData;\n" -"typedef struct \n" -"{\n" -" float4 m_localCenter;\n" -" float4 m_extents;\n" -" float4 mC;\n" -" float4 mE;\n" -" \n" -" float m_radius;\n" -" int m_faceOffset;\n" -" int m_numFaces;\n" -" int m_numVertices;\n" -" int m_vertexOffset;\n" -" int m_uniqueEdgesOffset;\n" -" int m_numUniqueEdges;\n" -" int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} btAabbCL;\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -" union\n" -" {\n" -" float m_min[4];\n" -" b3Float4 m_minVec;\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float m_max[4];\n" -" b3Float4 m_maxVec;\n" -" int m_signedMaxIndices[4];\n" -" };\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -" b3Float4ConstArg pos,\n" -" b3QuatConstArg orn,\n" -" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -" b3Mat3x3 m;\n" -" m = b3QuatGetRotationMatrix(orn);\n" -" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -" \n" -" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -" 0.f);\n" -" *aabbMinOut = center-extent;\n" -" *aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose,\n" -"including commercial applications, and to alter it and redistribute it freely,\n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"#ifndef B3_INT2_H\n" -"#define B3_INT2_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#define b3UnsignedInt2 uint2\n" -"#define b3Int2 int2\n" -"#define b3MakeInt2 (int2)\n" -"#endif //__cplusplus\n" -"#endif\n" -"typedef struct\n" -"{\n" -" float4 m_plane;\n" -" int m_indexOffset;\n" -" int m_numIndices;\n" -"} btGpuFace;\n" -"#define make_float4 (float4)\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -" \n" -"// float4 a1 = make_float4(a.xyz,0.f);\n" -"// float4 b1 = make_float4(b.xyz,0.f);\n" -"// return cross(a1,b1);\n" -"//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" -" \n" -" // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" -" \n" -" //return c;\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" v = make_float4(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"__inline\n" -"float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" -"{\n" -" return qtRotate( *orientation, *p ) + (*translation);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"}\n" -"inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" -"const float4* dir, const float4* vertices, float* min, float* max)\n" -"{\n" -" min[0] = FLT_MAX;\n" -" max[0] = -FLT_MAX;\n" -" int numVerts = hull->m_numVertices;\n" -" const float4 localDir = qtInvRotate(orn,*dir);\n" -" float offset = dot(pos,*dir);\n" -" for(int i=0;i<numVerts;i++)\n" -" {\n" -" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -" if(dp < min[0]) \n" -" min[0] = dp;\n" -" if(dp > max[0]) \n" -" max[0] = dp;\n" -" }\n" -" if(min[0]>max[0])\n" -" {\n" -" float tmp = min[0];\n" -" min[0] = max[0];\n" -" max[0] = tmp;\n" -" }\n" -" min[0] += offset;\n" -" max[0] += offset;\n" -"}\n" -"inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" -"const float4* dir, __global const float4* vertices, float* min, float* max)\n" -"{\n" -" min[0] = FLT_MAX;\n" -" max[0] = -FLT_MAX;\n" -" int numVerts = hull->m_numVertices;\n" -" const float4 localDir = qtInvRotate(orn,*dir);\n" -" float offset = dot(pos,*dir);\n" -" for(int i=0;i<numVerts;i++)\n" -" {\n" -" float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" -" if(dp < min[0]) \n" -" min[0] = dp;\n" -" if(dp > max[0]) \n" -" max[0] = dp;\n" -" }\n" -" if(min[0]>max[0])\n" -" {\n" -" float tmp = min[0];\n" -" min[0] = max[0];\n" -" max[0] = tmp;\n" -" }\n" -" min[0] += offset;\n" -" max[0] += offset;\n" -"}\n" -"inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA,const float4 ornA,\n" -" const float4 posB,const float4 ornB,\n" -" float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" -"{\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" -" project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" -" if(Max0<Min1 || Max1<Min0)\n" -" return false;\n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" *depth = d0<d1 ? d0:d1;\n" -" return true;\n" -"}\n" -"inline bool IsAlmostZero(const float4 v)\n" -"{\n" -" if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" -" return false;\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" \n" -" const float4* verticesA, \n" -" const float4* uniqueEdgesA, \n" -" const btGpuFace* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB, \n" -" __global const float4* uniqueEdgesB, \n" -" __global const btGpuFace* facesB,\n" -" __global const int* indicesB,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" \n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" {\n" -" int numFacesA = hullA->m_numFaces;\n" -" // Test normals from hullA\n" -" for(int i=0;i<numFacesA;i++)\n" -" {\n" -" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -" float4 faceANormalWS = qtRotate(ornA,normal);\n" -" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -" faceANormalWS*=-1.f;\n" -" curPlaneTests++;\n" -" float d;\n" -" if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" -" return false;\n" -" if(d<*dmin)\n" -" {\n" -" *dmin = d;\n" -" *sep = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" __global const float4* verticesA, \n" -" __global const float4* uniqueEdgesA, \n" -" __global const btGpuFace* facesA,\n" -" __global const int* indicesA,\n" -" const float4* verticesB,\n" -" const float4* uniqueEdgesB, \n" -" const btGpuFace* facesB,\n" -" const int* indicesB,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" {\n" -" int numFacesA = hullA->m_numFaces;\n" -" // Test normals from hullA\n" -" for(int i=0;i<numFacesA;i++)\n" -" {\n" -" const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" -" float4 faceANormalWS = qtRotate(ornA,normal);\n" -" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -" faceANormalWS *= -1.f;\n" -" curPlaneTests++;\n" -" float d;\n" -" if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" -" return false;\n" -" if(d<*dmin)\n" -" {\n" -" *dmin = d;\n" -" *sep = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" const float4* verticesA, \n" -" const float4* uniqueEdgesA, \n" -" const btGpuFace* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB, \n" -" __global const float4* uniqueEdgesB, \n" -" __global const btGpuFace* facesB,\n" -" __global const int* indicesB,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" int curEdgeEdge = 0;\n" -" // Test edges\n" -" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -" {\n" -" const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" -" float4 edge0World = qtRotate(ornA,edge0);\n" -" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -" {\n" -" const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" -" float4 edge1World = qtRotate(ornB,edge1);\n" -" float4 crossje = cross3(edge0World,edge1World);\n" -" curEdgeEdge++;\n" -" if(!IsAlmostZero(crossje))\n" -" {\n" -" crossje = normalize3(crossje);\n" -" if (dot3F4(DeltaC2,crossje)<0)\n" -" crossje *= -1.f;\n" -" float dist;\n" -" bool result = true;\n" -" {\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" -" project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" -" \n" -" if(Max0<Min1 || Max1<Min0)\n" -" result = false;\n" -" \n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" dist = d0<d1 ? d0:d1;\n" -" result = true;\n" -" }\n" -" \n" -" if(dist<*dmin)\n" -" {\n" -" *dmin = dist;\n" -" *sep = crossje;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" \n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA,const float4 ornA,\n" -" const float4 posB,const float4 ornB,\n" -" float4* sep_axis, __global const float4* vertices,float* depth)\n" -"{\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" -" project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" -" if(Max0<Min1 || Max1<Min0)\n" -" return false;\n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" *depth = d0<d1 ? d0:d1;\n" -" return true;\n" -"}\n" -"bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" __global const float4* vertices, \n" -" __global const float4* uniqueEdges, \n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" \n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" \n" -" int curPlaneTests=0;\n" -" {\n" -" int numFacesA = hullA->m_numFaces;\n" -" // Test normals from hullA\n" -" for(int i=0;i<numFacesA;i++)\n" -" {\n" -" const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" -" float4 faceANormalWS = qtRotate(ornA,normal);\n" -" \n" -" if (dot3F4(DeltaC2,faceANormalWS)<0)\n" -" faceANormalWS*=-1.f;\n" -" \n" -" curPlaneTests++;\n" -" \n" -" float d;\n" -" if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" -" return false;\n" -" \n" -" if(d<*dmin)\n" -" {\n" -" *dmin = d;\n" -" *sep = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" \n" -" return true;\n" -"}\n" -"bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" __global const float4* vertices,\n" -" __global const float4* unitSphereDirections,\n" -" int numUnitSphereDirections,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" \n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" int curEdgeEdge = 0;\n" -" // Test unit sphere directions\n" -" for (int i=0;i<numUnitSphereDirections;i++)\n" -" {\n" -" float4 crossje;\n" -" crossje = unitSphereDirections[i]; \n" -" if (dot3F4(DeltaC2,crossje)>0)\n" -" crossje *= -1.f;\n" -" {\n" -" float dist;\n" -" bool result = true;\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -" \n" -" if(Max0<Min1 || Max1<Min0)\n" -" return false;\n" -" \n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" dist = d0<d1 ? d0:d1;\n" -" result = true;\n" -" \n" -" if(dist<*dmin)\n" -" {\n" -" *dmin = dist;\n" -" *sep = crossje;\n" -" }\n" -" }\n" -" }\n" -" \n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" -" const float4 posA1,\n" -" const float4 ornA,\n" -" const float4 posB1,\n" -" const float4 ornB,\n" -" const float4 DeltaC2,\n" -" __global const float4* vertices, \n" -" __global const float4* uniqueEdges, \n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" float4* sep,\n" -" float* dmin)\n" -"{\n" -" \n" -" float4 posA = posA1;\n" -" posA.w = 0.f;\n" -" float4 posB = posB1;\n" -" posB.w = 0.f;\n" -" int curPlaneTests=0;\n" -" int curEdgeEdge = 0;\n" -" // Test edges\n" -" for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" -" {\n" -" const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" -" float4 edge0World = qtRotate(ornA,edge0);\n" -" for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" -" {\n" -" const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" -" float4 edge1World = qtRotate(ornB,edge1);\n" -" float4 crossje = cross3(edge0World,edge1World);\n" -" curEdgeEdge++;\n" -" if(!IsAlmostZero(crossje))\n" -" {\n" -" crossje = normalize3(crossje);\n" -" if (dot3F4(DeltaC2,crossje)<0)\n" -" crossje*=-1.f;\n" -" \n" -" float dist;\n" -" bool result = true;\n" -" {\n" -" float Min0,Max0;\n" -" float Min1,Max1;\n" -" project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" -" project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" -" \n" -" if(Max0<Min1 || Max1<Min0)\n" -" return false;\n" -" \n" -" float d0 = Max0 - Min1;\n" -" float d1 = Max1 - Min0;\n" -" dist = d0<d1 ? d0:d1;\n" -" result = true;\n" -" }\n" -" \n" -" if(dist<*dmin)\n" -" {\n" -" *dmin = dist;\n" -" *sep = crossje;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" \n" -" if((dot3F4(-DeltaC2,*sep))>0.0f)\n" -" {\n" -" *sep = -(*sep);\n" -" }\n" -" return true;\n" -"}\n" -"// work-in-progress\n" -"__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" -" __global const btGpuChildShape* gpuChildShapes,\n" -" __global volatile float4* gpuCompoundSepNormalsOut,\n" -" __global volatile int* gpuHasCompoundSepNormalsOut,\n" -" int numCompoundPairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i<numCompoundPairs)\n" -" {\n" -" int bodyIndexA = gpuCompoundPairs[i].x;\n" -" int bodyIndexB = gpuCompoundPairs[i].y;\n" -" int childShapeIndexA = gpuCompoundPairs[i].z;\n" -" int childShapeIndexB = gpuCompoundPairs[i].w;\n" -" \n" -" int collidableIndexA = -1;\n" -" int collidableIndexB = -1;\n" -" \n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" \n" -" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" \n" -" if (childShapeIndexA >= 0)\n" -" {\n" -" collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -" float4 newOrnA = qtMul(ornA,childOrnA);\n" -" posA = newPosA;\n" -" ornA = newOrnA;\n" -" } else\n" -" {\n" -" collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" }\n" -" \n" -" if (childShapeIndexB>=0)\n" -" {\n" -" collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" } else\n" -" {\n" -" collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" -" }\n" -" \n" -" gpuHasCompoundSepNormalsOut[i] = 0;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" -" int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" -" \n" -" if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" -" {\n" -" return;\n" -" }\n" -" int hasSeparatingAxis = 5;\n" -" \n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" float dmin = FLT_MAX;\n" -" posA.w = 0.f;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" float4 sepNormal = make_float4(1,0,0,0);\n" -" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -" hasSeparatingAxis = 4;\n" -" if (!sepA)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -" if (!sepB)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else//(!sepB)\n" -" {\n" -" bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" -" if (sepEE)\n" -" {\n" -" gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" -" gpuHasCompoundSepNormalsOut[i] = 1;\n" -" }//sepEE\n" -" }//(!sepB)\n" -" }//(!sepA)\n" -" \n" -" \n" -" }\n" -" \n" -"}\n" -"inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" -"{\n" -" b3Float4 vecOut;\n" -" vecOut = b3MakeFloat4(\n" -" (float)(vecIn[0]) / (quantization.x),\n" -" (float)(vecIn[1]) / (quantization.y),\n" -" (float)(vecIn[2]) / (quantization.z),\n" -" 0.f);\n" -" vecOut += bvhAabbMin;\n" -" return vecOut;\n" -"}\n" -"inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" -"{\n" -" b3Float4 vecOut;\n" -" vecOut = b3MakeFloat4(\n" -" (float)(vecIn[0]) / (quantization.x),\n" -" (float)(vecIn[1]) / (quantization.y),\n" -" (float)(vecIn[2]) / (quantization.z),\n" -" 0.f);\n" -" vecOut += bvhAabbMin;\n" -" return vecOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void findCompoundPairsKernel( __global const int4* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global b3Aabb_t* aabbLocalSpace,\n" -" __global const btGpuChildShape* gpuChildShapes,\n" -" __global volatile int4* gpuCompoundPairsOut,\n" -" __global volatile int* numCompoundPairsOut,\n" -" __global const b3BvhSubtreeInfo* subtrees,\n" -" __global const b3QuantizedBvhNode* quantizedNodes,\n" -" __global const b3BvhInfo* bvhInfos,\n" -" int numPairs,\n" -" int maxNumCompoundPairsCapacity\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i<numPairs)\n" -" {\n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" //once the broadphase avoids static-static pairs, we can remove this test\n" -" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -" {\n" -" return;\n" -" }\n" -" if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -" {\n" -" int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" -" int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" -" int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" -" int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" -" int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" -" int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" -" \n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" -" b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" \n" -" for (int p=0;p<numSubTreesA;p++)\n" -" {\n" -" b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" -" //bvhInfos[bvhA].m_quantization\n" -" b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -" b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -" b3Float4 aabbAMinOut,aabbAMaxOut;\n" -" float margin=0.f;\n" -" b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" -" \n" -" for (int q=0;q<numSubTreesB;q++)\n" -" {\n" -" b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" -" b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -" b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -" b3Float4 aabbBMinOut,aabbBMaxOut;\n" -" float margin=0.f;\n" -" b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" -" \n" -" \n" -" bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" -" if (aabbOverlap)\n" -" {\n" -" \n" -" int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" -" int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" -" int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" -" int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" -" b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" -" b3Int2 node0;\n" -" node0.x = startNodeIndexA;\n" -" node0.y = startNodeIndexB;\n" -" int maxStackDepth = B3_MAX_STACK_DEPTH;\n" -" int depth=0;\n" -" nodeStack[depth++]=node0;\n" -" do\n" -" {\n" -" b3Int2 node = nodeStack[--depth];\n" -" b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -" b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" -" b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -" b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" -" float margin=0.f;\n" -" b3Float4 aabbAMinOut,aabbAMaxOut;\n" -" b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" -" b3Float4 aabbBMinOut,aabbBMaxOut;\n" -" b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" -" \n" -" bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" -" if (nodeOverlap)\n" -" {\n" -" bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" -" bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" -" bool isInternalA = !isLeafA;\n" -" bool isInternalB = !isLeafB;\n" -" //fail, even though it might hit two leaf nodes\n" -" if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" -" {\n" -" //printf(\"Error: traversal exceeded maxStackDepth\");\n" -" continue;\n" -" }\n" -" if(isInternalA)\n" -" {\n" -" int nodeAleftChild = node.x+1;\n" -" bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" -" int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" -" if(isInternalB)\n" -" { \n" -" int nodeBleftChild = node.y+1;\n" -" bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" -" int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" -" nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" -" nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" -" nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" -" nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" -" }\n" -" else\n" -" {\n" -" nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" -" nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" -" }\n" -" }\n" -" else\n" -" {\n" -" if(isInternalB)\n" -" {\n" -" int nodeBleftChild = node.y+1;\n" -" bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" -" int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" -" nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" -" nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" -" }\n" -" else\n" -" {\n" -" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -" {\n" -" int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" -" int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" -" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" -" }\n" -" }\n" -" }\n" -" }\n" -" } while (depth);\n" -" }\n" -" }\n" -" }\n" -" \n" -" return;\n" -" }\n" -" if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -" {\n" -" if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" -" {\n" -" int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" -" for (int c=0;c<numChildrenA;c++)\n" -" {\n" -" int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" -" int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" -" float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" -" float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" -" float4 newOrnA = qtMul(ornA,childOrnA);\n" -" int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" -" b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" -" float margin = 0.f;\n" -" \n" -" b3Float4 aabbAMinWS;\n" -" b3Float4 aabbAMaxWS;\n" -" \n" -" b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" -" newPosA,\n" -" newOrnA,\n" -" &aabbAMinWS,&aabbAMaxWS);\n" -" \n" -" \n" -" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -" for (int b=0;b<numChildrenB;b++)\n" -" {\n" -" int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -" b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" -" \n" -" b3Float4 aabbBMinWS;\n" -" b3Float4 aabbBMaxWS;\n" -" \n" -" b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" -" newPosB,\n" -" newOrnB,\n" -" &aabbBMinWS,&aabbBMaxWS);\n" -" \n" -" \n" -" \n" -" bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" -" if (aabbOverlap)\n" -" {\n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" float dmin = FLT_MAX;\n" -" float4 posA = newPosA;\n" -" posA.w = 0.f;\n" -" float4 posB = newPosB;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = newOrnA;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB =newOrnB;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" {//\n" -" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -" {\n" -" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" -" }\n" -" }//\n" -" }//fi(1)\n" -" } //for (int b=0\n" -" }//if (collidables[collidableIndexB].\n" -" else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" if (1)\n" -" {\n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" float dmin = FLT_MAX;\n" -" float4 posA = newPosA;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = newOrnA;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" {\n" -" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -" {\n" -" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" -" }//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -" }//\n" -" }//fi (1)\n" -" }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" }//for (int b=0;b<numChildrenB;b++) \n" -" return;\n" -" }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" -" && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -" {\n" -" int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" -" for (int b=0;b<numChildrenB;b++)\n" -" {\n" -" int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" -" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -" //////////////////////////////////////\n" -" if (1)\n" -" {\n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" float dmin = FLT_MAX;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = newPosB;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB =newOrnB;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" {//\n" -" int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" -" if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -" {\n" -" gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" -" }//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" -" }//\n" -" }//fi (1) \n" -" }//for (int b=0;b<numChildrenB;b++)\n" -" return;\n" -" }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" return;\n" -" }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" -" }//i<numPairs\n" -"}\n" -"// work-in-progress\n" -"__kernel void findSeparatingAxisKernel( __global const int4* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" -" __global volatile float4* separatingNormals,\n" -" __global volatile int* hasSeparatingAxis,\n" -" int numPairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" \n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" //once the broadphase avoids static-static pairs, we can remove this test\n" -" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" return;\n" -" }\n" -" \n" -" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" return;\n" -" }\n" -" \n" -" if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" return;\n" -" }\n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" float dmin = FLT_MAX;\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" float4 sepNormal;\n" -" \n" -" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,\n" -" indices,&sepNormal,&dmin);\n" -" hasSeparatingAxis[i] = 4;\n" -" if (!sepA)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" -" posA,ornA,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,\n" -" indices,&sepNormal,&dmin);\n" -" if (!sepB)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,\n" -" indices,&sepNormal,&dmin);\n" -" if (!sepEE)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis[i] = 1;\n" -" separatingNormals[i] = sepNormal;\n" -" }\n" -" }\n" -" }\n" -" \n" -" }\n" -"}\n" -"__kernel void findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" -" __global volatile float4* separatingNormals,\n" -" __global volatile int* hasSeparatingAxis,\n" -" __global float* dmins,\n" -" int numPairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" \n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" hasSeparatingAxis[i] = 0; \n" -" \n" -" //once the broadphase avoids static-static pairs, we can remove this test\n" -" if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" -" {\n" -" return;\n" -" }\n" -" \n" -" if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" -" {\n" -" return;\n" -" }\n" -" \n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" float dmin = FLT_MAX;\n" -" dmins[i] = dmin;\n" -" \n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" float4 sepNormal;\n" -" \n" -" bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,\n" -" indices,&sepNormal,&dmin);\n" -" hasSeparatingAxis[i] = 4;\n" -" if (!sepA)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" -" posA,ornA,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,\n" -" indices,&sepNormal,&dmin);\n" -" if (sepB)\n" -" {\n" -" dmins[i] = dmin;\n" -" hasSeparatingAxis[i] = 1;\n" -" separatingNormals[i] = sepNormal;\n" -" }\n" -" }\n" -" \n" -" }\n" -"}\n" -"__kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" -" __global const BodyData* rigidBodies, \n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global btAabbCL* aabbs,\n" -" __global float4* separatingNormals,\n" -" __global int* hasSeparatingAxis,\n" -" __global float* dmins,\n" -" __global const float4* unitSphereDirections,\n" -" int numUnitSphereDirections,\n" -" int numPairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" \n" -" if (i<numPairs)\n" -" {\n" -" if (hasSeparatingAxis[i])\n" -" {\n" -" \n" -" int bodyIndexA = pairs[i].x;\n" -" int bodyIndexB = pairs[i].y;\n" -" \n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" \n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" \n" -" \n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" \n" -" float dmin = dmins[i];\n" -" \n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" float4 sepNormal = separatingNormals[i];\n" -" \n" -" \n" -" \n" -" bool sepEE = false;\n" -" int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" -" if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" -" {\n" -" sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,\n" -" indices,&sepNormal,&dmin);\n" -" \n" -" if (!sepEE)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis[i] = 1;\n" -" separatingNormals[i] = sepNormal;\n" -" }\n" -" }\n" -" /*\n" -" ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" -" else\n" -" {\n" -" sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" vertices,unitSphereDirections,numUnitSphereDirections,\n" -" &sepNormal,&dmin);\n" -" if (!sepEE)\n" -" {\n" -" hasSeparatingAxis[i] = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis[i] = 1;\n" -" separatingNormals[i] = sepNormal;\n" -" }\n" -" }\n" -" */\n" -" } //if (hasSeparatingAxis[i])\n" -" }//(i<numPairs)\n" -"}\n" -"inline int findClippingFaces(const float4 separatingNormal,\n" -" const ConvexPolyhedronCL* hullA, \n" -" __global const ConvexPolyhedronCL* hullB,\n" -" const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" -" __global float4* worldVertsA1,\n" -" __global float4* worldNormalsA1,\n" -" __global float4* worldVertsB1,\n" -" int capacityWorldVerts,\n" -" const float minDist, float maxDist,\n" -" const float4* verticesA,\n" -" const btGpuFace* facesA,\n" -" const int* indicesA,\n" -" __global const float4* verticesB,\n" -" __global const btGpuFace* facesB,\n" -" __global const int* indicesB,\n" -" __global int4* clippingFaces, int pairIndex)\n" -"{\n" -" int numContactsOut = 0;\n" -" int numWorldVertsB1= 0;\n" -" \n" -" \n" -" int closestFaceB=0;\n" -" float dmax = -FLT_MAX;\n" -" \n" -" {\n" -" for(int face=0;face<hullB->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" -" facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" -" const float4 WorldNormal = qtRotate(ornB, Normal);\n" -" float d = dot3F4(WorldNormal,separatingNormal);\n" -" if (d > dmax)\n" -" {\n" -" dmax = d;\n" -" closestFaceB = face;\n" -" }\n" -" }\n" -" }\n" -" \n" -" {\n" -" const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" -" int numVertices = polyB.m_numIndices;\n" -" if (numVertices>capacityWorldVerts)\n" -" numVertices = capacityWorldVerts;\n" -" \n" -" for(int e0=0;e0<numVertices;e0++)\n" -" {\n" -" if (e0<capacityWorldVerts)\n" -" {\n" -" const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" -" worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" -" }\n" -" }\n" -" }\n" -" \n" -" int closestFaceA=0;\n" -" {\n" -" float dmin = FLT_MAX;\n" -" for(int face=0;face<hullA->m_numFaces;face++)\n" -" {\n" -" const float4 Normal = make_float4(\n" -" facesA[hullA->m_faceOffset+face].m_plane.x,\n" -" facesA[hullA->m_faceOffset+face].m_plane.y,\n" -" facesA[hullA->m_faceOffset+face].m_plane.z,\n" -" 0.f);\n" -" const float4 faceANormalWS = qtRotate(ornA,Normal);\n" -" \n" -" float d = dot3F4(faceANormalWS,separatingNormal);\n" -" if (d < dmin)\n" -" {\n" -" dmin = d;\n" -" closestFaceA = face;\n" -" worldNormalsA1[pairIndex] = faceANormalWS;\n" -" }\n" -" }\n" -" }\n" -" \n" -" int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" -" if (numVerticesA>capacityWorldVerts)\n" -" numVerticesA = capacityWorldVerts;\n" -" \n" -" for(int e0=0;e0<numVerticesA;e0++)\n" -" {\n" -" if (e0<capacityWorldVerts)\n" -" {\n" -" const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" -" worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" -" }\n" -" }\n" -" \n" -" clippingFaces[pairIndex].x = closestFaceA;\n" -" clippingFaces[pairIndex].y = closestFaceB;\n" -" clippingFaces[pairIndex].z = numVerticesA;\n" -" clippingFaces[pairIndex].w = numWorldVertsB1;\n" -" \n" -" \n" -" return numContactsOut;\n" -"}\n" -"// work-in-progress\n" -"__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" -" __global const BodyData* rigidBodies,\n" -" __global const btCollidableGpu* collidables,\n" -" __global const ConvexPolyhedronCL* convexShapes, \n" -" __global const float4* vertices,\n" -" __global const float4* uniqueEdges,\n" -" __global const btGpuFace* faces,\n" -" __global const int* indices,\n" -" __global const btGpuChildShape* gpuChildShapes,\n" -" __global btAabbCL* aabbs,\n" -" __global float4* concaveSeparatingNormalsOut,\n" -" __global int* concaveHasSeparatingNormals,\n" -" __global int4* clippingFacesOut,\n" -" __global float4* worldVertsA1GPU,\n" -" __global float4* worldNormalsAGPU,\n" -" __global float4* worldVertsB1GPU,\n" -" int vertexFaceCapacity,\n" -" int numConcavePairs\n" -" )\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConcavePairs)\n" -" return;\n" -" concaveHasSeparatingNormals[i] = 0;\n" -" int pairIdx = i;\n" -" int bodyIndexA = concavePairs[i].x;\n" -" int bodyIndexB = concavePairs[i].y;\n" -" int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" -" int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" -" int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" -" int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" -" if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" -" collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" concavePairs[pairIdx].w = -1;\n" -" return;\n" -" }\n" -" int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" -" int numActualConcaveConvexTests = 0;\n" -" \n" -" int f = concavePairs[i].z;\n" -" \n" -" bool overlap = false;\n" -" \n" -" ConvexPolyhedronCL convexPolyhedronA;\n" -" //add 3 vertices of the triangle\n" -" convexPolyhedronA.m_numVertices = 3;\n" -" convexPolyhedronA.m_vertexOffset = 0;\n" -" float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" -" btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" -" float4 triMinAabb, triMaxAabb;\n" -" btAabbCL triAabb;\n" -" triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" -" triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" -" \n" -" float4 verticesA[3];\n" -" for (int i=0;i<3;i++)\n" -" {\n" -" int index = indices[face.m_indexOffset+i];\n" -" float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" -" verticesA[i] = vert;\n" -" localCenter += vert;\n" -" \n" -" triAabb.m_min = min(triAabb.m_min,vert); \n" -" triAabb.m_max = max(triAabb.m_max,vert); \n" -" }\n" -" overlap = true;\n" -" overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" -" overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" -" overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" -" \n" -" if (overlap)\n" -" {\n" -" float dmin = FLT_MAX;\n" -" int hasSeparatingAxis=5;\n" -" float4 sepAxis=make_float4(1,2,3,4);\n" -" int localCC=0;\n" -" numActualConcaveConvexTests++;\n" -" //a triangle has 3 unique edges\n" -" convexPolyhedronA.m_numUniqueEdges = 3;\n" -" convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" -" float4 uniqueEdgesA[3];\n" -" \n" -" uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" -" uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" -" uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" -" convexPolyhedronA.m_faceOffset = 0;\n" -" \n" -" float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" -" \n" -" btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" -" int indicesA[3+3+2+2+2];\n" -" int curUsedIndices=0;\n" -" int fidx=0;\n" -" //front size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[0] = 0;\n" -" indicesA[1] = 1;\n" -" indicesA[2] = 2;\n" -" curUsedIndices+=3;\n" -" float c = face.m_plane.w;\n" -" facesA[fidx].m_plane.x = normal.x;\n" -" facesA[fidx].m_plane.y = normal.y;\n" -" facesA[fidx].m_plane.z = normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" //back size of triangle\n" -" {\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[3]=2;\n" -" indicesA[4]=1;\n" -" indicesA[5]=0;\n" -" curUsedIndices+=3;\n" -" float c = dot(normal,verticesA[0]);\n" -" float c1 = -face.m_plane.w;\n" -" facesA[fidx].m_plane.x = -normal.x;\n" -" facesA[fidx].m_plane.y = -normal.y;\n" -" facesA[fidx].m_plane.z = -normal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" facesA[fidx].m_numIndices=3;\n" -" }\n" -" fidx++;\n" -" bool addEdgePlanes = true;\n" -" if (addEdgePlanes)\n" -" {\n" -" int numVertices=3;\n" -" int prevVertex = numVertices-1;\n" -" for (int i=0;i<numVertices;i++)\n" -" {\n" -" float4 v0 = verticesA[i];\n" -" float4 v1 = verticesA[prevVertex];\n" -" \n" -" float4 edgeNormal = normalize(cross(normal,v1-v0));\n" -" float c = -dot(edgeNormal,v0);\n" -" facesA[fidx].m_numIndices = 2;\n" -" facesA[fidx].m_indexOffset=curUsedIndices;\n" -" indicesA[curUsedIndices++]=i;\n" -" indicesA[curUsedIndices++]=prevVertex;\n" -" \n" -" facesA[fidx].m_plane.x = edgeNormal.x;\n" -" facesA[fidx].m_plane.y = edgeNormal.y;\n" -" facesA[fidx].m_plane.z = edgeNormal.z;\n" -" facesA[fidx].m_plane.w = c;\n" -" fidx++;\n" -" prevVertex = i;\n" -" }\n" -" }\n" -" convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" -" convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" -" float4 posA = rigidBodies[bodyIndexA].m_pos;\n" -" posA.w = 0.f;\n" -" float4 posB = rigidBodies[bodyIndexB].m_pos;\n" -" posB.w = 0.f;\n" -" float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" -" float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" -" \n" -" ///////////////////\n" -" ///compound shape support\n" -" if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" -" {\n" -" int compoundChild = concavePairs[pairIdx].w;\n" -" int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" -" int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" -" float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" -" float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" -" float4 newPosB = transform(&childPosB,&posB,&ornB);\n" -" float4 newOrnB = qtMul(ornB,childOrnB);\n" -" posB = newPosB;\n" -" ornB = newOrnB;\n" -" shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" -" }\n" -" //////////////////\n" -" float4 c0local = convexPolyhedronA.m_localCenter;\n" -" float4 c0 = transform(&c0local, &posA, &ornA);\n" -" float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" -" float4 c1 = transform(&c1local,&posB,&ornB);\n" -" const float4 DeltaC2 = c0 - c1;\n" -" bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" verticesA,uniqueEdgesA,facesA,indicesA,\n" -" vertices,uniqueEdges,faces,indices,\n" -" &sepAxis,&dmin);\n" -" hasSeparatingAxis = 4;\n" -" if (!sepA)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA,\n" -" posB,ornB,\n" -" posA,ornA,\n" -" DeltaC2,\n" -" vertices,uniqueEdges,faces,indices,\n" -" verticesA,uniqueEdgesA,facesA,indicesA,\n" -" &sepAxis,&dmin);\n" -" if (!sepB)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" DeltaC2,\n" -" verticesA,uniqueEdgesA,facesA,indicesA,\n" -" vertices,uniqueEdges,faces,indices,\n" -" &sepAxis,&dmin);\n" -" \n" -" if (!sepEE)\n" -" {\n" -" hasSeparatingAxis = 0;\n" -" } else\n" -" {\n" -" hasSeparatingAxis = 1;\n" -" }\n" -" }\n" -" } \n" -" \n" -" if (hasSeparatingAxis)\n" -" {\n" -" sepAxis.w = dmin;\n" -" concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" -" concaveHasSeparatingNormals[i]=1;\n" -" float minDist = -1e30f;\n" -" float maxDist = 0.02f;\n" -" \n" -" findClippingFaces(sepAxis,\n" -" &convexPolyhedronA,\n" -" &convexShapes[shapeIndexB],\n" -" posA,ornA,\n" -" posB,ornB,\n" -" worldVertsA1GPU,\n" -" worldNormalsAGPU,\n" -" worldVertsB1GPU,\n" -" vertexFaceCapacity,\n" -" minDist, maxDist,\n" -" verticesA,\n" -" facesA,\n" -" indicesA,\n" -" vertices,\n" -" faces,\n" -" indices,\n" -" clippingFacesOut, pairIdx);\n" -" } else\n" -" { \n" -" //mark this pair as in-active\n" -" concavePairs[pairIdx].w = -1;\n" -" }\n" -" }\n" -" else\n" -" { \n" -" //mark this pair as in-active\n" -" concavePairs[pairIdx].w = -1;\n" -" }\n" -" \n" -" concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp deleted file mode 100644 index a4980f71e1..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp +++ /dev/null @@ -1,213 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada -//Host-code rewritten by Erwin Coumans - -#define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl" -#define KERNEL0 "SearchSortDataLowerKernel" -#define KERNEL1 "SearchSortDataUpperKernel" -#define KERNEL2 "SubtractKernel" - - -#include "b3BoundSearchCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3LauncherCL.h" -#include "kernels/BoundSearchKernelsCL.h" - -b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize) - :m_context(ctx), - m_device(device), - m_queue(queue) -{ - - const char* additionalMacros = ""; - //const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* kernelSource = boundSearchKernelsCL; - - cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH); - b3Assert(boundSearchProg); - - m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros ); - b3Assert(m_lowerSortDataKernel ); - - m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros ); - b3Assert(m_upperSortDataKernel); - - m_subtractKernel = 0; - - if( maxSize ) - { - m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros ); - b3Assert(m_subtractKernel); - } - - //m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST ); - - m_lower = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue,maxSize ); - m_upper = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue, maxSize ); - - m_filler = new b3FillCL(ctx,device,queue); -} - -b3BoundSearchCL::~b3BoundSearchCL() -{ - - delete m_lower; - delete m_upper; - delete m_filler; - - clReleaseKernel(m_lowerSortDataKernel); - clReleaseKernel(m_upperSortDataKernel); - clReleaseKernel(m_subtractKernel); - - -} - - -void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option ) -{ - b3Int4 constBuffer; - constBuffer.x = nSrc; - constBuffer.y = nDst; - - if( option == BOUND_LOWER ) - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) }; - - b3LauncherCL launcher( m_queue, m_lowerSortDataKernel,"m_lowerSortDataKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nSrc ); - launcher.setConst( nDst ); - - launcher.launch1D( nSrc, 64 ); - } - else if( option == BOUND_UPPER ) - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; - - b3LauncherCL launcher(m_queue, m_upperSortDataKernel,"m_upperSortDataKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nSrc ); - launcher.setConst( nDst ); - - launcher.launch1D( nSrc, 64 ); - } - else if( option == COUNT ) - { - b3Assert( m_lower ); - b3Assert( m_upper ); - b3Assert( m_lower->capacity() <= (int)nDst ); - b3Assert( m_upper->capacity() <= (int)nDst ); - - int zero = 0; - m_filler->execute( *m_lower, zero, nDst ); - m_filler->execute( *m_upper, zero, nDst ); - - execute( src, nSrc, *m_lower, nDst, BOUND_LOWER ); - execute( src, nSrc, *m_upper, nDst, BOUND_UPPER ); - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; - - b3LauncherCL launcher( m_queue, m_subtractKernel ,"m_subtractKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( nSrc ); - launcher.setConst( nDst ); - - launcher.launch1D( nDst, 64 ); - } - } - else - { - b3Assert( 0 ); - } - -} - - -void b3BoundSearchCL::executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc, - b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option ) -{ - - - for(int i=0; i<nSrc-1; i++) - b3Assert( src[i].m_key <= src[i+1].m_key ); - - b3SortData minData,zeroData,maxData; - minData.m_key = -1; - minData.m_value = -1; - zeroData.m_key=0; - zeroData.m_value=0; - maxData.m_key = nDst; - maxData.m_value = nDst; - - if( option == BOUND_LOWER ) - { - for(int i=0; i<nSrc; i++) - { - b3SortData& iData = (i==0)? minData: src[i-1]; - b3SortData& jData = (i==nSrc)? maxData: src[i]; - - if( iData.m_key != jData.m_key ) - { - int k = jData.m_key; - { - dst[k] = i; - } - } - } - } - else if( option == BOUND_UPPER ) - { - for(int i=1; i<nSrc+1; i++) - { - b3SortData& iData = src[i-1]; - b3SortData& jData = (i==nSrc)? maxData: src[i]; - - if( iData.m_key != jData.m_key ) - { - int k = iData.m_key; - { - dst[k] = i; - } - } - } - } - else if( option == COUNT ) - { - b3AlignedObjectArray<unsigned int> lower; - lower.resize(nDst ); - b3AlignedObjectArray<unsigned int> upper; - upper.resize(nDst ); - - for(int i=0; i<nDst; i++) - { - lower[i] = upper[i] = 0; - } - - executeHost( src, nSrc, lower, nDst, BOUND_LOWER ); - executeHost( src, nSrc, upper, nDst, BOUND_UPPER ); - - for( int i=0; i<nDst; i++) - { - dst[i] = upper[i] - lower[i]; - } - } - else - { - b3Assert( 0 ); - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h deleted file mode 100644 index 7e2940965c..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h +++ /dev/null @@ -1,67 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#ifndef B3_BOUNDSEARCH_H -#define B3_BOUNDSEARCH_H - -#pragma once - -/*#include <Adl/Adl.h> -#include <AdlPrimitives/Math/Math.h> -#include <AdlPrimitives/Sort/SortData.h> -#include <AdlPrimitives/Fill/Fill.h> -*/ - -#include "b3OpenCLArray.h" -#include "b3FillCL.h" -#include "b3RadixSort32CL.h" //for b3SortData (perhaps move it?) -class b3BoundSearchCL -{ - public: - - enum Option - { - BOUND_LOWER, - BOUND_UPPER, - COUNT, - }; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - - cl_kernel m_lowerSortDataKernel; - cl_kernel m_upperSortDataKernel; - cl_kernel m_subtractKernel; - - b3OpenCLArray<b3Int4>* m_constbtOpenCLArray; - b3OpenCLArray<unsigned int>* m_lower; - b3OpenCLArray<unsigned int>* m_upper; - - b3FillCL* m_filler; - - b3BoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size); - - virtual ~b3BoundSearchCL(); - - // src has to be src[i].m_key <= src[i+1].m_key - void execute( b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER ); - - void executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc, b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER); -}; - - -#endif //B3_BOUNDSEARCH_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h deleted file mode 100644 index 52f219ae3f..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h +++ /dev/null @@ -1,19 +0,0 @@ - -#ifndef B3_BUFFER_INFO_CL_H -#define B3_BUFFER_INFO_CL_H - -#include "b3OpenCLArray.h" - - -struct b3BufferInfoCL -{ - //b3BufferInfoCL(){} - -// template<typename T> - b3BufferInfoCL(cl_mem buff, bool isReadOnly = false): m_clBuffer(buff), m_isReadOnly(isReadOnly){} - - cl_mem m_clBuffer; - bool m_isReadOnly; -}; - -#endif //B3_BUFFER_INFO_CL_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp deleted file mode 100644 index f05c2648f1..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "b3FillCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3BufferInfoCL.h" -#include "b3LauncherCL.h" - -#define FILL_CL_PROGRAM_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl" - -#include "kernels/FillKernelsCL.h" - -b3FillCL::b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue) -:m_commandQueue(queue) -{ - const char* kernelSource = fillKernelsCL; - cl_int pErrNum; - const char* additionalMacros = ""; - - cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, FILL_CL_PROGRAM_PATH); - b3Assert(fillProg); - - m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg,additionalMacros ); - b3Assert(m_fillIntKernel); - - m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg,additionalMacros ); - b3Assert(m_fillIntKernel); - - m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg,additionalMacros ); - b3Assert(m_fillFloatKernel); - - - - m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg,additionalMacros ); - b3Assert(m_fillKernelInt2); - -} - -b3FillCL::~b3FillCL() -{ - clReleaseKernel(m_fillKernelInt2); - clReleaseKernel(m_fillIntKernel); - clReleaseKernel(m_fillUnsignedIntKernel); - clReleaseKernel(m_fillFloatKernel); - -} - -void b3FillCL::execute(b3OpenCLArray<float>& src, const float value, int n, int offset) -{ - b3Assert( n>0 ); - - { - b3LauncherCL launcher( m_commandQueue, m_fillFloatKernel,"m_fillFloatKernel" ); - launcher.setBuffer( src.getBufferCL()); - launcher.setConst( n ); - launcher.setConst( value ); - launcher.setConst( offset); - - launcher.launch1D( n ); - } -} - -void b3FillCL::execute(b3OpenCLArray<int>& src, const int value, int n, int offset) -{ - b3Assert( n>0 ); - - - { - b3LauncherCL launcher( m_commandQueue, m_fillIntKernel ,"m_fillIntKernel"); - launcher.setBuffer(src.getBufferCL()); - launcher.setConst( n); - launcher.setConst( value); - launcher.setConst( offset); - launcher.launch1D( n ); - } -} - - -void b3FillCL::execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset) -{ - b3Assert( n>0 ); - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) }; - - b3LauncherCL launcher( m_commandQueue, m_fillUnsignedIntKernel,"m_fillUnsignedIntKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( n ); - launcher.setConst(value); - launcher.setConst(offset); - - launcher.launch1D( n ); - } -} - -void b3FillCL::executeHost(b3AlignedObjectArray<b3Int2> &src, const b3Int2 &value, int n, int offset) -{ - for (int i=0;i<n;i++) - { - src[i+offset]=value; - } -} - -void b3FillCL::executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset) -{ - for (int i=0;i<n;i++) - { - src[i+offset]=value; - } -} - -void b3FillCL::execute(b3OpenCLArray<b3Int2> &src, const b3Int2 &value, int n, int offset) -{ - b3Assert( n>0 ); - - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL() ) }; - - b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2,"m_fillKernelInt2"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - //( constBuffer ); - launcher.launch1D( n ); - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h deleted file mode 100644 index 1609676b9d..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h +++ /dev/null @@ -1,63 +0,0 @@ -#ifndef B3_FILL_CL_H -#define B3_FILL_CL_H - -#include "b3OpenCLArray.h" -#include "Bullet3Common/b3Scalar.h" - -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" - - -class b3FillCL -{ - - cl_command_queue m_commandQueue; - - cl_kernel m_fillKernelInt2; - cl_kernel m_fillIntKernel; - cl_kernel m_fillUnsignedIntKernel; - cl_kernel m_fillFloatKernel; - - public: - - struct b3ConstData - { - union - { - b3Int4 m_data; - b3UnsignedInt4 m_UnsignedData; - }; - int m_offset; - int m_n; - int m_padding[2]; - }; - -protected: - -public: - - b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue); - - virtual ~b3FillCL(); - - void execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset = 0); - - void execute(b3OpenCLArray<int>& src, const int value, int n, int offset = 0); - - void execute(b3OpenCLArray<float>& src, const float value, int n, int offset = 0); - - void execute(b3OpenCLArray<b3Int2>& src, const b3Int2& value, int n, int offset = 0); - - void executeHost(b3AlignedObjectArray<b3Int2> &src, const b3Int2 &value, int n, int offset); - - void executeHost(b3AlignedObjectArray<int> &src, const int value, int n, int offset); - - // void execute(b3OpenCLArray<b3Int4>& src, const b3Int4& value, int n, int offset = 0); - -}; - - - - - -#endif //B3_FILL_CL_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp deleted file mode 100644 index 94590d11ca..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp +++ /dev/null @@ -1,308 +0,0 @@ -#include "b3LauncherCL.h" - -bool gDebugLauncherCL = false; - -b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name) -:m_commandQueue(queue), -m_kernel(kernel), -m_idx(0), -m_enableSerialization(false), -m_name(name) -{ - if (gDebugLauncherCL) - { - static int counter = 0; - printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name); - } - - m_serializationSizeInBytes = sizeof(int); -} - -b3LauncherCL::~b3LauncherCL() - { - for (int i=0;i<m_arrays.size();i++) - { - delete (m_arrays[i]); - } - - m_arrays.clear(); - if (gDebugLauncherCL) - { - static int counter = 0; - printf("[%d] Finished launching OpenCL kernel %s\n", counter++,m_name); - } - } - -void b3LauncherCL::setBuffer( cl_mem clBuffer) -{ - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo ( kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert( err == CL_SUCCESS ); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes+= sizeof(b3KernelArgData); - m_serializationSizeInBytes+=param_value; - } - cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &clBuffer); - b3Assert( status == CL_SUCCESS ); -} - - -void b3LauncherCL::setBuffers( b3BufferInfoCL* buffInfo, int n ) -{ - for(int i=0; i<n; i++) - { - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = buffInfo[i].m_clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo ( kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert( err == CL_SUCCESS ); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes+= sizeof(b3KernelArgData); - m_serializationSizeInBytes+=param_value; - } - cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer); - b3Assert( status == CL_SUCCESS ); - } -} - -struct b3KernelArgDataUnaligned -{ - int m_isBuffer; - int m_argIndex; - int m_argSizeInBytes; - int m_unusedPadding; - union - { - cl_mem m_clBuffer; - unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; - }; - -}; -#include <string.h> - - - -int b3LauncherCL::deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx) -{ - int index=0; - - int numArguments = *(int*) &buf[index]; - index+=sizeof(int); - - for (int i=0;i<numArguments;i++) - { - b3KernelArgDataUnaligned* arg = (b3KernelArgDataUnaligned*)&buf[index]; - - index+=sizeof(b3KernelArgData); - if (arg->m_isBuffer) - { - b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx,m_commandQueue, arg->m_argSizeInBytes); - clData->resize(arg->m_argSizeInBytes); - - clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes); - - arg->m_clBuffer = clData->getBufferCL(); - - m_arrays.push_back(clData); - - cl_int status = clSetKernelArg( m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer); - b3Assert( status == CL_SUCCESS ); - index+=arg->m_argSizeInBytes; - } else - { - cl_int status = clSetKernelArg( m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData); - b3Assert( status == CL_SUCCESS ); - } - b3KernelArgData b; - memcpy(&b,arg,sizeof(b3KernelArgDataUnaligned)); - m_kernelArguments.push_back(b); - } -m_serializationSizeInBytes = index; - return index; -} - -int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx) - { - int index=0; - - int numArguments = *(int*) &goldBuffer[index]; - index+=sizeof(int); - - if (numArguments != m_kernelArguments.size()) - { - printf("failed validation: expected %d arguments, found %d\n",numArguments, m_kernelArguments.size()); - return -1; - } - - for (int ii=0;ii<numArguments;ii++) - { - b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index]; - - if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes) - { - printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n",ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes); - return -2; - } - - { - int expected = argGold->m_isBuffer; - int found = m_kernelArguments[ii].m_isBuffer; - - if (expected != found) - { - printf("failed validation: argument %d isBuffer expected: %d, found %d\n",ii,expected, found); - return -3; - } - } - index+=sizeof(b3KernelArgData); - - if (argGold->m_isBuffer) - { - - unsigned char* memBuf= (unsigned char*) malloc(m_kernelArguments[ii].m_argSizeInBytes); - unsigned char* goldBuf = &goldBuffer[index]; - for (int j=0;j<m_kernelArguments[j].m_argSizeInBytes;j++) - { - memBuf[j] = 0xaa; - } - - cl_int status = 0; - status = clEnqueueReadBuffer( m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes, - memBuf, 0,0,0 ); - b3Assert( status==CL_SUCCESS ); - clFinish(m_commandQueue); - - for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++) - { - int expected = goldBuf[b]; - int found = memBuf[b]; - if (expected != found) - { - printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -4; - } - } - - - index+=argGold->m_argSizeInBytes; - } else - { - - //compare content - for (int b=0;b<m_kernelArguments[ii].m_argSizeInBytes;b++) - { - int expected = argGold->m_argData[b]; - int found =m_kernelArguments[ii].m_argData[b]; - if (expected != found) - { - printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -5; - } - } - - } - } - return index; - -} - -int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity) -{ -//initialize to known values -for (int i=0;i<destBufferCapacity;i++) - destBuffer[i] = 0xec; - - assert(destBufferCapacity>=m_serializationSizeInBytes); - - //todo: use the b3Serializer for this to allow for 32/64bit, endianness etc - int numArguments = m_kernelArguments.size(); - int curBufferSize = 0; - int* dest = (int*)&destBuffer[curBufferSize]; - *dest = numArguments; - curBufferSize += sizeof(int); - - - - for (int i=0;i<this->m_kernelArguments.size();i++) - { - b3KernelArgData* arg = (b3KernelArgData*) &destBuffer[curBufferSize]; - *arg = m_kernelArguments[i]; - curBufferSize+=sizeof(b3KernelArgData); - if (arg->m_isBuffer==1) - { - //copy the OpenCL buffer content - cl_int status = 0; - status = clEnqueueReadBuffer( m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes, - &destBuffer[curBufferSize], 0,0,0 ); - b3Assert( status==CL_SUCCESS ); - clFinish(m_commandQueue); - curBufferSize+=arg->m_argSizeInBytes; - } - - } - return curBufferSize; -} - -void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems) -{ - int num = numWorkItems; - int buffSize = getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; - for (int i=0;i<buffSize+1;i++) - { - unsigned char* ptr = (unsigned char*)&buf[i]; - *ptr = 0xff; - } -// int actualWrite = serializeArguments(buf,buffSize); - -// unsigned char* cptr = (unsigned char*)&buf[buffSize]; -// printf("buf[buffSize] = %d\n",*cptr); - - assert(buf[buffSize]==0xff);//check for buffer overrun - int* ptr = (int*)&buf[buffSize]; - - *ptr = num; - - FILE* f = fopen(fileName,"wb"); - fwrite(buf,buffSize+sizeof(int),1,f); - fclose(f); - - delete[] buf; -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h deleted file mode 100644 index 1b267b31ef..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h +++ /dev/null @@ -1,135 +0,0 @@ - -#ifndef B3_LAUNCHER_CL_H -#define B3_LAUNCHER_CL_H - -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3MinMax.h" -#include "b3OpenCLArray.h" -#include <stdio.h> - -#define B3_DEBUG_SERIALIZE_CL - - -#ifdef _WIN32 -#pragma warning(disable :4996) -#endif -#define B3_CL_MAX_ARG_SIZE 16 -B3_ATTRIBUTE_ALIGNED16(struct) b3KernelArgData -{ - int m_isBuffer; - int m_argIndex; - int m_argSizeInBytes; - int m_unusedPadding; - union - { - cl_mem m_clBuffer; - unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; - }; - -}; - -class b3LauncherCL -{ - - cl_command_queue m_commandQueue; - cl_kernel m_kernel; - int m_idx; - - b3AlignedObjectArray<b3KernelArgData> m_kernelArguments; - int m_serializationSizeInBytes; - bool m_enableSerialization; - - const char* m_name; - public: - - b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays; - - b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name); - - virtual ~b3LauncherCL(); - - void setBuffer( cl_mem clBuffer); - - void setBuffers( b3BufferInfoCL* buffInfo, int n ); - - int getSerializationBufferSize() const - { - return m_serializationSizeInBytes; - } - - int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx); - - inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx); - - int serializeArguments(unsigned char* destBuffer, int destBufferCapacity); - - int getNumArguments() const - { - return m_kernelArguments.size(); - } - - b3KernelArgData getArgument(int index) - { - return m_kernelArguments[index]; - } - - void serializeToFile(const char* fileName, int numWorkItems); - - template<typename T> - inline void setConst( const T& consts ) - { - int sz=sizeof(T); - b3Assert(sz<=B3_CL_MAX_ARG_SIZE); - - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 0; - T* destArg = (T*)kernelArg.m_argData; - *destArg = consts; - kernelArg.m_argSizeInBytes = sizeof(T); - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes+=sizeof(b3KernelArgData); - } - - cl_int status = clSetKernelArg( m_kernel, m_idx++, sz, &consts ); - b3Assert( status == CL_SUCCESS ); - } - - inline void launch1D( int numThreads, int localSize = 64) - { - launch2D( numThreads, 1, localSize, 1 ); - } - - inline void launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ) - { - size_t gRange[3] = {1,1,1}; - size_t lRange[3] = {1,1,1}; - lRange[0] = localSizeX; - lRange[1] = localSizeY; - gRange[0] = b3Max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1)); - gRange[0] *= lRange[0]; - gRange[1] = b3Max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1)); - gRange[1] *= lRange[1]; - - cl_int status = clEnqueueNDRangeKernel( m_commandQueue, - m_kernel, 2, NULL, gRange, lRange, 0,0,0 ); - if (status != CL_SUCCESS) - { - printf("Error: OpenCL status = %d\n",status); - } - b3Assert( status == CL_SUCCESS ); - - } - - void enableSerialization(bool serialize) - { - m_enableSerialization = serialize; - } - -}; - - - -#endif //B3_LAUNCHER_CL_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h deleted file mode 100644 index d70c30f53f..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h +++ /dev/null @@ -1,306 +0,0 @@ -#ifndef B3_OPENCL_ARRAY_H -#define B3_OPENCL_ARRAY_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -template <typename T> -class b3OpenCLArray -{ - size_t m_size; - size_t m_capacity; - cl_mem m_clBuffer; - - cl_context m_clContext; - cl_command_queue m_commandQueue; - - bool m_ownsMemory; - - bool m_allowGrowingCapacity; - - void deallocate() - { - if (m_clBuffer && m_ownsMemory) - { - clReleaseMemObject(m_clBuffer); - } - m_clBuffer = 0; - m_capacity=0; - } - - b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src); - - B3_FORCE_INLINE size_t allocSize(size_t size) - { - return (size ? size*2 : 1); - } - -public: - - b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity=0, bool allowGrowingCapacity=true) - :m_size(0), m_capacity(0),m_clBuffer(0), - m_clContext(ctx),m_commandQueue(queue), - m_ownsMemory(true),m_allowGrowingCapacity(true) - { - if (initialCapacity) - { - reserve(initialCapacity); - } - m_allowGrowingCapacity = allowGrowingCapacity; - } - - ///this is an error-prone method with no error checking, be careful! - void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements) - { - deallocate(); - m_ownsMemory = false; - m_allowGrowingCapacity = false; - m_clBuffer = buffer; - m_size = sizeInElements; - m_capacity = sizeInElements; - } - -// we could enable this assignment, but need to make sure to avoid accidental deep copies -// b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src) -// { -// copyFromArray(src); -// return *this; -// } - - - cl_mem getBufferCL() const - { - return m_clBuffer; - } - - - virtual ~b3OpenCLArray() - { - deallocate(); - m_size=0; - m_capacity=0; - } - - B3_FORCE_INLINE bool push_back(const T& _Val,bool waitForCompletion=true) - { - bool result = true; - size_t sz = size(); - if( sz == capacity() ) - { - result = reserve( allocSize(size()) ); - } - copyFromHostPointer(&_Val, 1, sz, waitForCompletion); - m_size++; - return result; - } - - B3_FORCE_INLINE T forcedAt(size_t n) const - { - b3Assert(n>=0); - b3Assert(n<capacity()); - T elem; - copyToHostPointer(&elem,1,n,true); - return elem; - } - - B3_FORCE_INLINE T at(size_t n) const - { - b3Assert(n>=0); - b3Assert(n<size()); - T elem; - copyToHostPointer(&elem,1,n,true); - return elem; - } - - B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents=true) - { - bool result = true; - size_t curSize = size(); - - if (newsize < curSize) - { - //leave the OpenCL memory for now - } else - { - if (newsize > size()) - { - result = reserve(newsize,copyOldContents); - } - - //leave new data uninitialized (init in debug mode?) - //for (size_t i=curSize;i<newsize;i++) ... - } - - if (result) - { - m_size = newsize; - } else - { - m_size = 0; - } - return result; - } - - B3_FORCE_INLINE size_t size() const - { - return m_size; - } - - B3_FORCE_INLINE size_t capacity() const - { - return m_capacity; - } - - B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents=true) - { - bool result=true; - // determine new minimum length of allocated storage - if (capacity() < _Count) - { // not enough room, reallocate - - if (m_allowGrowingCapacity) - { - cl_int ciErrNum; - //create a new OpenCL buffer - size_t memSizeInBytes = sizeof(T)*_Count; - cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum); - if (ciErrNum!=CL_SUCCESS) - { - b3Error("OpenCL out-of-memory\n"); - _Count = 0; - result = false; - } -//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS -#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS - unsigned char* src = (unsigned char*)malloc(memSizeInBytes); - for (size_t i=0;i<memSizeInBytes;i++) - src[i] = 0xbb; - ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 ); - b3Assert(ciErrNum==CL_SUCCESS); - clFinish(m_commandQueue); - free(src); -#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS - - if (result) - { - if (copyOldContents) - copyToCL(buf, size()); - } - - //deallocate the old buffer - deallocate(); - - m_clBuffer = buf; - - m_capacity = _Count; - } else - { - //fail: assert and - b3Assert(0); - deallocate(); - result=false; - } - } - return result; - } - - - void copyToCL(cl_mem destination, size_t numElements, size_t firstElem=0, size_t dstOffsetInElems=0) const - { - if (numElements<=0) - return; - - b3Assert(m_clBuffer); - b3Assert(destination); - - //likely some error, destination is same as source - b3Assert(m_clBuffer != destination); - - b3Assert((firstElem+numElements)<=m_size); - - cl_int status = 0; - - - b3Assert(numElements>0); - b3Assert(numElements<=m_size); - - size_t srcOffsetBytes = sizeof(T)*firstElem; - size_t dstOffsetInBytes = sizeof(T)*dstOffsetInElems; - - status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination, - srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 ); - - b3Assert( status == CL_SUCCESS ); - } - - void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true) - { - size_t newSize = srcArray.size(); - - bool copyOldContents = false; - resize (newSize,copyOldContents); - if (newSize) - copyFromHostPointer(&srcArray[0],newSize,0,waitForCompletion); - - } - - void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem= 0, bool waitForCompletion=true) - { - b3Assert(numElems+destFirstElem <= capacity()); - - if (numElems+destFirstElem) - { - cl_int status = 0; - size_t sizeInBytes=sizeof(T)*numElems; - status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes, - src, 0,0,0 ); - b3Assert(status == CL_SUCCESS ); - if (waitForCompletion) - clFinish(m_commandQueue); - } else - { - b3Error("copyFromHostPointer invalid range\n"); - } - } - - - void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion=true) const - { - destArray.resize(this->size()); - if (size()) - copyToHostPointer(&destArray[0], size(),0,waitForCompletion); - } - - void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem=0, bool waitForCompletion=true) const - { - b3Assert(numElem+srcFirstElem <= capacity()); - - if(numElem+srcFirstElem <= capacity()) - { - cl_int status = 0; - status = clEnqueueReadBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*srcFirstElem, sizeof(T)*numElem, - destPtr, 0,0,0 ); - b3Assert( status==CL_SUCCESS ); - - if (waitForCompletion) - clFinish(m_commandQueue); - } else - { - b3Error("copyToHostPointer invalid range\n"); - } - } - - void copyFromOpenCLArray(const b3OpenCLArray& src) - { - size_t newSize = src.size(); - resize(newSize); - if (size()) - { - src.copyToCL(m_clBuffer,size()); - } - } - -}; - - -#endif //B3_OPENCL_ARRAY_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp deleted file mode 100644 index 42cd197740..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "b3PrefixScanCL.h" -#include "b3FillCL.h" -#define B3_PREFIXSCAN_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl" - -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/PrefixScanKernelsCL.h" - -b3PrefixScanCL::b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) -:m_commandQueue(queue) -{ - const char* scanKernelSource = prefixScanKernelsCL; - cl_int pErrNum; - char* additionalMacros=0; - - m_workBuffer = new b3OpenCLArray<unsigned int>(ctx,queue,size); - cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, B3_PREFIXSCAN_PROG_PATH); - b3Assert(scanProg); - - m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros ); - b3Assert(m_localScanKernel ); - m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros ); - b3Assert(m_blockSumKernel ); - m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros ); - b3Assert(m_propagationKernel ); -} - - -b3PrefixScanCL::~b3PrefixScanCL() -{ - delete m_workBuffer; - clReleaseKernel(m_localScanKernel); - clReleaseKernel(m_blockSumKernel); - clReleaseKernel(m_propagationKernel); -} - -template<class T> -T b3NextPowerOf2(T n) -{ - n -= 1; - for(int i=0; i<sizeof(T)*8; i++) - n = n | (n>>i); - return n+1; -} - -void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum) -{ - -// b3Assert( data->m_option == EXCLUSIVE ); - const unsigned int numBlocks = (const unsigned int)( (n+BLOCK_SIZE*2-1)/(BLOCK_SIZE*2) ); - - dst.resize(src.size()); - m_workBuffer->resize(src.size()); - - b3Int4 constBuffer; - constBuffer.x = n; - constBuffer.y = numBlocks; - constBuffer.z = (int)b3NextPowerOf2( numBlocks ); - - b3OpenCLArray<unsigned int>* srcNative = &src; - b3OpenCLArray<unsigned int>* dstNative = &dst; - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - - b3LauncherCL launcher( m_commandQueue, m_localScanKernel,"m_localScanKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( constBuffer ); - launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); - } - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - - b3LauncherCL launcher( m_commandQueue, m_blockSumKernel,"m_blockSumKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( constBuffer ); - launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); - } - - - if( numBlocks > 1 ) - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_propagationKernel,"m_propagationKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( constBuffer ); - launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); - } - - - if( sum ) - { - clFinish(m_commandQueue); - dstNative->copyToHostPointer(sum,1,n-1,true); - } - -} - - -void b3PrefixScanCL::executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum) -{ - unsigned int s = 0; - //if( data->m_option == EXCLUSIVE ) - { - for(int i=0; i<n; i++) - { - dst[i] = s; - s += src[i]; - } - } - /*else - { - for(int i=0; i<n; i++) - { - s += hSrc[i]; - hDst[i] = s; - } - } - */ - - if( sum ) - { - *sum = dst[n-1]; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h deleted file mode 100644 index a9a2e61b9e..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h +++ /dev/null @@ -1,37 +0,0 @@ - -#ifndef B3_PREFIX_SCAN_CL_H -#define B3_PREFIX_SCAN_CL_H - -#include "b3OpenCLArray.h" -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -class b3PrefixScanCL -{ - enum - { - BLOCK_SIZE = 128 - }; - -// Option m_option; - - cl_command_queue m_commandQueue; - - cl_kernel m_localScanKernel; - cl_kernel m_blockSumKernel; - cl_kernel m_propagationKernel; - - b3OpenCLArray<unsigned int>* m_workBuffer; - - - public: - - b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue,int size=0); - - virtual ~b3PrefixScanCL(); - - void execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum = 0); - void executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum=0); -}; - -#endif //B3_PREFIX_SCAN_CL_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp deleted file mode 100644 index 80560d793d..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "b3PrefixScanFloat4CL.h" -#include "b3FillCL.h" -#define B3_PREFIXSCAN_FLOAT4_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl" - -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/PrefixScanKernelsFloat4CL.h" - -b3PrefixScanFloat4CL::b3PrefixScanFloat4CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) -:m_commandQueue(queue) -{ - const char* scanKernelSource = prefixScanKernelsFloat4CL; - cl_int pErrNum; - char* additionalMacros=0; - - m_workBuffer = new b3OpenCLArray<b3Vector3>(ctx,queue,size); - cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, scanKernelSource, &pErrNum,additionalMacros, B3_PREFIXSCAN_FLOAT4_PROG_PATH); - b3Assert(scanProg); - - m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg,additionalMacros ); - b3Assert(m_localScanKernel ); - m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg,additionalMacros ); - b3Assert(m_blockSumKernel ); - m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg,additionalMacros ); - b3Assert(m_propagationKernel ); -} - - -b3PrefixScanFloat4CL::~b3PrefixScanFloat4CL() -{ - delete m_workBuffer; - clReleaseKernel(m_localScanKernel); - clReleaseKernel(m_blockSumKernel); - clReleaseKernel(m_propagationKernel); -} - -template<class T> -T b3NextPowerOf2(T n) -{ - n -= 1; - for(int i=0; i<sizeof(T)*8; i++) - n = n | (n>>i); - return n+1; -} - -void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<b3Vector3>& dst, int n, b3Vector3* sum) -{ - -// b3Assert( data->m_option == EXCLUSIVE ); - const unsigned int numBlocks = (const unsigned int)( (n+BLOCK_SIZE*2-1)/(BLOCK_SIZE*2) ); - - dst.resize(src.size()); - m_workBuffer->resize(src.size()); - - b3Int4 constBuffer; - constBuffer.x = n; - constBuffer.y = numBlocks; - constBuffer.z = (int)b3NextPowerOf2( numBlocks ); - - b3OpenCLArray<b3Vector3>* srcNative = &src; - b3OpenCLArray<b3Vector3>* dstNative = &dst; - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( srcNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - - b3LauncherCL launcher( m_commandQueue, m_localScanKernel ,"m_localScanKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( constBuffer ); - launcher.launch1D( numBlocks*BLOCK_SIZE, BLOCK_SIZE ); - } - - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - - b3LauncherCL launcher( m_commandQueue, m_blockSumKernel ,"m_blockSumKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( constBuffer ); - launcher.launch1D( BLOCK_SIZE, BLOCK_SIZE ); - } - - - if( numBlocks > 1 ) - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( dstNative->getBufferCL() ), b3BufferInfoCL( m_workBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_propagationKernel ,"m_propagationKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( constBuffer ); - launcher.launch1D( (numBlocks-1)*BLOCK_SIZE, BLOCK_SIZE ); - } - - - if( sum ) - { - clFinish(m_commandQueue); - dstNative->copyToHostPointer(sum,1,n-1,true); - } - -} - - -void b3PrefixScanFloat4CL::executeHost(b3AlignedObjectArray<b3Vector3>& src, b3AlignedObjectArray<b3Vector3>& dst, int n, b3Vector3* sum) -{ - b3Vector3 s=b3MakeVector3(0,0,0); - //if( data->m_option == EXCLUSIVE ) - { - for(int i=0; i<n; i++) - { - dst[i] = s; - s += src[i]; - } - } - /*else - { - for(int i=0; i<n; i++) - { - s += hSrc[i]; - hDst[i] = s; - } - } - */ - - if( sum ) - { - *sum = dst[n-1]; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h deleted file mode 100644 index 2c8003c1bb..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h +++ /dev/null @@ -1,38 +0,0 @@ - -#ifndef B3_PREFIX_SCAN_CL_H -#define B3_PREFIX_SCAN_CL_H - -#include "b3OpenCLArray.h" -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -class b3PrefixScanFloat4CL -{ - enum - { - BLOCK_SIZE = 128 - }; - -// Option m_option; - - cl_command_queue m_commandQueue; - - cl_kernel m_localScanKernel; - cl_kernel m_blockSumKernel; - cl_kernel m_propagationKernel; - - b3OpenCLArray<b3Vector3>* m_workBuffer; - - - public: - - b3PrefixScanFloat4CL(cl_context ctx, cl_device_id device, cl_command_queue queue,int size=0); - - virtual ~b3PrefixScanFloat4CL(); - - void execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<b3Vector3>& dst, int n, b3Vector3* sum = 0); - void executeHost(b3AlignedObjectArray<b3Vector3>& src, b3AlignedObjectArray<b3Vector3>& dst, int n, b3Vector3* sum); -}; - -#endif //B3_PREFIX_SCAN_CL_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp deleted file mode 100644 index f11ae4bcdb..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp +++ /dev/null @@ -1,710 +0,0 @@ - -#include "b3RadixSort32CL.h" -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3PrefixScanCL.h" -#include "b3FillCL.h" - -#define RADIXSORT32_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl" - -#include "kernels/RadixSort32KernelsCL.h" - -b3RadixSort32CL::b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity) -:m_commandQueue(queue) -{ - b3OpenCLDeviceInfo info; - b3OpenCLUtils::getDeviceInfo(device,&info); - m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU)!=0; - - m_workBuffer1 = new b3OpenCLArray<unsigned int>(ctx,queue); - m_workBuffer2 = new b3OpenCLArray<unsigned int>(ctx,queue); - m_workBuffer3 = new b3OpenCLArray<b3SortData>(ctx,queue); - m_workBuffer3a = new b3OpenCLArray<unsigned int>(ctx,queue); - m_workBuffer4 = new b3OpenCLArray<b3SortData>(ctx,queue); - m_workBuffer4a = new b3OpenCLArray<unsigned int>(ctx,queue); - - - if (initialCapacity>0) - { - m_workBuffer1->resize(initialCapacity); - m_workBuffer3->resize(initialCapacity); - m_workBuffer3a->resize(initialCapacity); - m_workBuffer4->resize(initialCapacity); - m_workBuffer4a->resize(initialCapacity); - } - - m_scan = new b3PrefixScanCL(ctx,device,queue); - m_fill = new b3FillCL(ctx,device,queue); - - const char* additionalMacros = ""; - - cl_int pErrNum; - const char* kernelSource = radixSort32KernelsCL; - - cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, RADIXSORT32_PATH); - b3Assert(sortProg); - - m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_streamCountSortDataKernel ); - - - - m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_streamCountKernel); - - - - if (m_deviceCPU) - { - - m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_sortAndScatterSortDataKernel); - m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_sortAndScatterKernel); - } else - { - m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_sortAndScatterSortDataKernel); - m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_sortAndScatterKernel); - } - - m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg,additionalMacros ); - b3Assert(m_prefixScanKernel); - -} - -b3RadixSort32CL::~b3RadixSort32CL() -{ - delete m_scan; - delete m_fill; - delete m_workBuffer1; - delete m_workBuffer2; - delete m_workBuffer3; - delete m_workBuffer3a; - delete m_workBuffer4; - delete m_workBuffer4a; - - clReleaseKernel(m_streamCountSortDataKernel); - clReleaseKernel(m_streamCountKernel); - clReleaseKernel(m_sortAndScatterSortDataKernel); - clReleaseKernel(m_sortAndScatterKernel); - clReleaseKernel(m_prefixScanKernel); -} - -void b3RadixSort32CL::executeHost(b3AlignedObjectArray<b3SortData>& inout, int sortBits /* = 32 */) -{ - int n = inout.size(); - const int BITS_PER_PASS = 8; - const int NUM_TABLES = (1<<BITS_PER_PASS); - - - int tables[NUM_TABLES]; - int counter[NUM_TABLES]; - - b3SortData* src = &inout[0]; - b3AlignedObjectArray<b3SortData> workbuffer; - workbuffer.resize(inout.size()); - b3SortData* dst = &workbuffer[0]; - - int count=0; - for(int startBit=0; startBit<sortBits; startBit+=BITS_PER_PASS) - { - for(int i=0; i<NUM_TABLES; i++) - { - tables[i] = 0; - } - - for(int i=0; i<n; i++) - { - int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1); - tables[tableIdx]++; - } -//#define TEST -#ifdef TEST - printf("histogram size=%d\n",NUM_TABLES); - for (int i=0;i<NUM_TABLES;i++) - { - if (tables[i]!=0) - { - printf("tables[%d]=%d]\n",i,tables[i]); - } - - } -#endif //TEST - // prefix scan - int sum = 0; - for(int i=0; i<NUM_TABLES; i++) - { - int iData = tables[i]; - tables[i] = sum; - sum += iData; - counter[i] = 0; - } - - // distribute - for(int i=0; i<n; i++) - { - int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES-1); - - dst[tables[tableIdx] + counter[tableIdx]] = src[i]; - counter[tableIdx] ++; - } - - b3Swap( src, dst ); - count++; - } - - if (count&1) - { - b3Assert(0);//need to copy - - } -} - -void b3RadixSort32CL::executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */) -{ - - b3AlignedObjectArray<b3SortData> inout; - keyValuesInOut.copyToHost(inout); - - executeHost(inout,sortBits); - - keyValuesInOut.copyFromHost(inout); -} - -void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn, - b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits) -{ - -} - -//#define DEBUG_RADIXSORT -//#define DEBUG_RADIXSORT2 - - -void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */) -{ - - int originalSize = keyValuesInOut.size(); - int workingSize = originalSize; - - - int dataAlignment = DATA_ALIGNMENT; - -#ifdef DEBUG_RADIXSORT2 - b3AlignedObjectArray<b3SortData> test2; - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n",test2.size()); - for (int i=0;i<test2.size();i++) - { - printf("test2[%d].m_key=%d\n",i,test2[i].m_key); - printf("test2[%d].m_value=%d\n",i,test2[i].m_value); - } -#endif //DEBUG_RADIXSORT2 - - b3OpenCLArray<b3SortData>* src = 0; - - if (workingSize%dataAlignment) - { - workingSize += dataAlignment-(workingSize%dataAlignment); - m_workBuffer4->copyFromOpenCLArray(keyValuesInOut); - m_workBuffer4->resize(workingSize); - b3SortData fillValue; - fillValue.m_key = 0xffffffff; - fillValue.m_value = 0xffffffff; - -#define USE_BTFILL -#ifdef USE_BTFILL - m_fill->execute((b3OpenCLArray<b3Int2>&)*m_workBuffer4,(b3Int2&)fillValue,workingSize-originalSize,originalSize); -#else - //fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side) - - for (int i=originalSize; i<workingSize;i++) - { - m_workBuffer4->copyFromHostPointer(&fillValue,1,i); - } -#endif//USE_BTFILL - - src = m_workBuffer4; - } else - { - src = &keyValuesInOut; - m_workBuffer4->resize(0); - } - - b3Assert( workingSize%DATA_ALIGNMENT == 0 ); - int minCap = NUM_BUCKET*NUM_WGS; - - - int n = workingSize; - - m_workBuffer1->resize(minCap); - m_workBuffer3->resize(workingSize); - - -// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); - b3Assert( BITS_PER_PASS == 4 ); - b3Assert( WG_SIZE == 64 ); - b3Assert( (sortBits&0x3) == 0 ); - - - - b3OpenCLArray<b3SortData>* dst = m_workBuffer3; - - b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1; - b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2; - - - int nWGs = NUM_WGS; - b3ConstData cdata; - - { - int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256 - int nBlocks = (n+blockSize-1)/(blockSize); - cdata.m_n = n; - cdata.m_nWGs = NUM_WGS; - cdata.m_startBit = 0; - cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; - if( nBlocks < NUM_WGS ) - { - cdata.m_nBlocksPerWG = 1; - nWGs = nBlocks; - } - } - - int count=0; - for(int ib=0; ib<sortBits; ib+=4) - { -#ifdef DEBUG_RADIXSORT2 - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n",test2.size()); - for (int i=0;i<test2.size();i++) - { - if (test2[i].m_key != test2[i].m_value) - { - printf("test2[%d].m_key=%d\n",i,test2[i].m_key); - printf("test2[%d].m_value=%d\n",i,test2[i].m_value); - } - } -#endif //DEBUG_RADIXSORT2 - - cdata.m_startBit = ib; - - if (src->size()) - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel,"m_streamCountSortDataKernel"); - - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - - int num = NUM_WGS*WG_SIZE; - launcher.launch1D( num, WG_SIZE ); - } - - - -#ifdef DEBUG_RADIXSORT - b3AlignedObjectArray<unsigned int> testHist; - srcHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size()); - for (int i=0;i<testHist.size();i++) - { - if (testHist[i]!=0) - printf("testHist[%d]=%d\n",i,testHist[i]); - } -#endif //DEBUG_RADIXSORT - - - -//fast prefix scan is not working properly on Mac OSX yet -#ifdef __APPLE__ - bool fastScan=false; -#else - bool fastScan=!m_deviceCPU;//only use fast scan on GPU -#endif - - if (fastScan) - {// prefix scan group histogram - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( 128, 128 ); - destHisto = srcHisto; - }else - { - //unsigned int sum; //for debugging - m_scan->execute(*srcHisto,*destHisto,1920,0);//,&sum); - } - - -#ifdef DEBUG_RADIXSORT - destHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size()); - for (int i=0;i<testHist.size();i++) - { - if (testHist[i]!=0) - printf("testHist[%d]=%d\n",i,testHist[i]); - } - - for (int i=0;i<testHist.size();i+=NUM_WGS) - { - printf("testHist[%d]=%d\n",i/NUM_WGS,testHist[i]); - } - -#endif //DEBUG_RADIXSORT - -#define USE_GPU -#ifdef USE_GPU - - if (src->size()) - {// local sort and distribute - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )}; - b3LauncherCL launcher( m_commandQueue, m_sortAndScatterSortDataKernel,"m_sortAndScatterSortDataKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); - - } -#else - { -#define NUM_TABLES 16 -//#define SEQUENTIAL -#ifdef SEQUENTIAL - int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - int tables[NUM_TABLES]; - int startBit = ib; - - destHisto->copyToHost(testHist); - b3AlignedObjectArray<b3SortData> srcHost; - b3AlignedObjectArray<b3SortData> dstHost; - dstHost.resize(src->size()); - - src->copyToHost(srcHost); - - for (int i=0;i<NUM_TABLES;i++) - { - tables[i] = testHist[i*NUM_WGS]; - } - - // distribute - for(int i=0; i<n; i++) - { - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1); - - dstHost[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; - counter2[tableIdx] ++; - } - - -#else - - int counter2[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - - int tables[NUM_TABLES]; - b3AlignedObjectArray<b3SortData> dstHostOK; - dstHostOK.resize(src->size()); - - destHisto->copyToHost(testHist); - b3AlignedObjectArray<b3SortData> srcHost; - src->copyToHost(srcHost); - - int blockSize = 256; - int nBlocksPerWG = cdata.m_nBlocksPerWG; - int startBit = ib; - - { - for (int i=0;i<NUM_TABLES;i++) - { - tables[i] = testHist[i*NUM_WGS]; - } - - // distribute - for(int i=0; i<n; i++) - { - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1); - - dstHostOK[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; - counter2[tableIdx] ++; - } - - - } - - - b3AlignedObjectArray<b3SortData> dstHost; - dstHost.resize(src->size()); - - - int counter[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - - - - for (int wgIdx=0;wgIdx<NUM_WGS;wgIdx++) - { - int counter[NUM_TABLES]={0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - for(int iblock=0; iblock<b3Min(cdata.m_nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx = 0;lIdx < 64;lIdx++) - { - int addr = iblock*blockSize + blockSize*cdata.m_nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++) - { - if( addr+j < n ) - { - // printf ("addr+j=%d\n", addr+j); - - int i = addr+j; - - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES-1); - - int destIndex = testHist[tableIdx*NUM_WGS+wgIdx] + counter[tableIdx]; - - b3SortData ok = dstHostOK[destIndex]; - - if (ok.m_key != srcHost[i].m_key) - { - printf("ok.m_key = %d, srcHost[i].m_key = %d\n", ok.m_key,srcHost[i].m_key ); - printf("(ok.m_value = %d, srcHost[i].m_value = %d)\n", ok.m_value,srcHost[i].m_value ); - } - if (ok.m_value != srcHost[i].m_value) - { - - printf("ok.m_value = %d, srcHost[i].m_value = %d\n", ok.m_value,srcHost[i].m_value ); - printf("(ok.m_key = %d, srcHost[i].m_key = %d)\n", ok.m_key,srcHost[i].m_key ); - - } - - dstHost[destIndex] = srcHost[i]; - counter[tableIdx] ++; - - } - } - } - } - } - - -#endif //SEQUENTIAL - - dst->copyFromHost(dstHost); - } -#endif//USE_GPU - - - -#ifdef DEBUG_RADIXSORT - destHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n",ib, testHist.size()); - for (int i=0;i<testHist.size();i++) - { - if (testHist[i]!=0) - printf("testHist[%d]=%d\n",i,testHist[i]); - } -#endif //DEBUG_RADIXSORT - b3Swap(src, dst ); - b3Swap(srcHisto,destHisto); - -#ifdef DEBUG_RADIXSORT2 - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n",test2.size()); - for (int i=0;i<test2.size();i++) - { - if (test2[i].m_key != test2[i].m_value) - { - printf("test2[%d].m_key=%d\n",i,test2[i].m_key); - printf("test2[%d].m_value=%d\n",i,test2[i].m_value); - } - } -#endif //DEBUG_RADIXSORT2 - - count++; - - - } - - - - if (count&1) - { - b3Assert(0);//need to copy from workbuffer to keyValuesInOut - } - - if (m_workBuffer4->size()) - { - m_workBuffer4->resize(originalSize); - keyValuesInOut.copyFromOpenCLArray(*m_workBuffer4); - } - - -#ifdef DEBUG_RADIXSORT - keyValuesInOut.copyToHost(test2); - - printf("numElem = %d\n",test2.size()); - for (int i=0;i<test2.size();i++) - { - printf("test2[%d].m_key=%d\n",i,test2[i].m_key); - printf("test2[%d].m_value=%d\n",i,test2[i].m_value); - } -#endif - -} - - - - - - -void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits /* = 32 */) -{ - int originalSize = keysInOut.size(); - int workingSize = originalSize; - - - int dataAlignment = DATA_ALIGNMENT; - - b3OpenCLArray<unsigned int>* src = 0; - - if (workingSize%dataAlignment) - { - workingSize += dataAlignment-(workingSize%dataAlignment); - m_workBuffer4a->copyFromOpenCLArray(keysInOut); - m_workBuffer4a->resize(workingSize); - unsigned int fillValue = 0xffffffff; - - m_fill->execute(*m_workBuffer4a,fillValue,workingSize-originalSize,originalSize); - - src = m_workBuffer4a; - } else - { - src = &keysInOut; - m_workBuffer4a->resize(0); - } - - - - b3Assert( workingSize%DATA_ALIGNMENT == 0 ); - int minCap = NUM_BUCKET*NUM_WGS; - - - int n = workingSize; - - - m_workBuffer1->resize(minCap); - m_workBuffer3->resize(workingSize); - m_workBuffer3a->resize(workingSize); - -// ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); - b3Assert( BITS_PER_PASS == 4 ); - b3Assert( WG_SIZE == 64 ); - b3Assert( (sortBits&0x3) == 0 ); - - - - b3OpenCLArray<unsigned int>* dst = m_workBuffer3a; - - b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1; - b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2; - - - int nWGs = NUM_WGS; - b3ConstData cdata; - - { - int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;//set at 256 - int nBlocks = (n+blockSize-1)/(blockSize); - cdata.m_n = n; - cdata.m_nWGs = NUM_WGS; - cdata.m_startBit = 0; - cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1)/cdata.m_nWGs; - if( nBlocks < NUM_WGS ) - { - cdata.m_nBlocksPerWG = 1; - nWGs = nBlocks; - } - } - - int count=0; - for(int ib=0; ib<sortBits; ib+=4) - { - cdata.m_startBit = ib; - - if (src->size()) - { - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher(m_commandQueue, m_streamCountKernel,"m_streamCountKernel"); - - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - - int num = NUM_WGS*WG_SIZE; - launcher.launch1D( num, WG_SIZE ); - } - - - -//fast prefix scan is not working properly on Mac OSX yet -#ifdef __APPLE__ - bool fastScan=false; -#else - bool fastScan=!m_deviceCPU; -#endif - - if (fastScan) - {// prefix scan group histogram - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( srcHisto->getBufferCL() ) }; - b3LauncherCL launcher( m_commandQueue, m_prefixScanKernel,"m_prefixScanKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( 128, 128 ); - destHisto = srcHisto; - }else - { - //unsigned int sum; //for debugging - m_scan->execute(*srcHisto,*destHisto,1920,0);//,&sum); - } - - if (src->size()) - {// local sort and distribute - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src->getBufferCL(), true ), b3BufferInfoCL( destHisto->getBufferCL(), true ), b3BufferInfoCL( dst->getBufferCL() )}; - b3LauncherCL launcher( m_commandQueue, m_sortAndScatterKernel ,"m_sortAndScatterKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nWGs*WG_SIZE, WG_SIZE ); - - } - - b3Swap(src, dst ); - b3Swap(srcHisto,destHisto); - - count++; - } - - if (count&1) - { - b3Assert(0);//need to copy from workbuffer to keyValuesInOut - } - - if (m_workBuffer4a->size()) - { - m_workBuffer4a->resize(originalSize); - keysInOut.copyFromOpenCLArray(*m_workBuffer4a); - } - -} - - - - - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h deleted file mode 100644 index 975bd80e53..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h +++ /dev/null @@ -1,95 +0,0 @@ - -#ifndef B3_RADIXSORT32_H -#define B3_RADIXSORT32_H - -#include "b3OpenCLArray.h" - -struct b3SortData -{ - union - { - unsigned int m_key; - unsigned int x; - }; - - union - { - unsigned int m_value; - unsigned int y; - - }; -}; -#include "b3BufferInfoCL.h" - -class b3RadixSort32CL -{ - - b3OpenCLArray<unsigned int>* m_workBuffer1; - b3OpenCLArray<unsigned int>* m_workBuffer2; - - b3OpenCLArray<b3SortData>* m_workBuffer3; - b3OpenCLArray<b3SortData>* m_workBuffer4; - - b3OpenCLArray<unsigned int>* m_workBuffer3a; - b3OpenCLArray<unsigned int>* m_workBuffer4a; - - cl_command_queue m_commandQueue; - - cl_kernel m_streamCountSortDataKernel; - cl_kernel m_streamCountKernel; - - cl_kernel m_prefixScanKernel; - cl_kernel m_sortAndScatterSortDataKernel; - cl_kernel m_sortAndScatterKernel; - - - bool m_deviceCPU; - - class b3PrefixScanCL* m_scan; - class b3FillCL* m_fill; - -public: - struct b3ConstData - { - int m_n; - int m_nWGs; - int m_startBit; - int m_nBlocksPerWG; - }; - enum - { - DATA_ALIGNMENT = 256, - WG_SIZE = 64, - BLOCK_SIZE = 256, - ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE), - BITS_PER_PASS = 4, - NUM_BUCKET=(1<<BITS_PER_PASS), - // if you change this, change nPerWI in kernel as well - NUM_WGS = 20*6, // cypress -// NUM_WGS = 24*6, // cayman -// NUM_WGS = 32*4, // nv - }; - - -private: - - -public: - - b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0); - - virtual ~b3RadixSort32CL(); - - void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn, - b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32); - - ///keys only - void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32 ); - - void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32 ); - void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32); - void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32); - -}; -#endif //B3_RADIXSORT32_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl deleted file mode 100644 index f3b4a1e8a7..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl +++ /dev/null @@ -1,106 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -typedef struct -{ - u32 m_key; - u32 m_value; -}SortData; - - - -typedef struct -{ - u32 m_nSrc; - u32 m_nDst; - u32 m_padding[2]; -} ConstBuffer; - - - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nSrc ) - { - SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1); - SortData end; end.m_key = nDst; end.m_value = nDst; - - SortData iData = (gIdx==0)? first: src[gIdx-1]; - SortData jData = (gIdx==nSrc)? end: src[gIdx]; - - if( iData.m_key != jData.m_key ) - { -// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++) - u32 k = jData.m_key; - { - dst[k] = gIdx; - } - } - } -} - - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX+1; - - if( gIdx < nSrc+1 ) - { - SortData first; first.m_key = 0; first.m_value = 0; - SortData end; end.m_key = nDst; end.m_value = nDst; - - SortData iData = src[gIdx-1]; - SortData jData = (gIdx==nSrc)? end: src[gIdx]; - - if( iData.m_key != jData.m_key ) - { - u32 k = iData.m_key; - { - dst[k] = gIdx; - } - } - } -} - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX; - - - if( gIdx < nDst ) - { - C[gIdx] = A[gIdx] - B[gIdx]; - } -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h deleted file mode 100644 index 9c9e847138..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h +++ /dev/null @@ -1,87 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* boundSearchKernelsCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"typedef unsigned int u32;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"typedef struct\n" -"{\n" -" u32 m_key; \n" -" u32 m_value;\n" -"}SortData;\n" -"typedef struct\n" -"{\n" -" u32 m_nSrc;\n" -" u32 m_nDst;\n" -" u32 m_padding[2];\n" -"} ConstBuffer;\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"__kernel\n" -"void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, \n" -" unsigned int nSrc, unsigned int nDst)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nSrc )\n" -" {\n" -" SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);\n" -" SortData end; end.m_key = nDst; end.m_value = nDst;\n" -" SortData iData = (gIdx==0)? first: src[gIdx-1];\n" -" SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" -" if( iData.m_key != jData.m_key )\n" -" {\n" -"// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n" -" u32 k = jData.m_key;\n" -" {\n" -" dst[k] = gIdx;\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"__kernel\n" -"void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, \n" -" unsigned int nSrc, unsigned int nDst)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX+1;\n" -" if( gIdx < nSrc+1 )\n" -" {\n" -" SortData first; first.m_key = 0; first.m_value = 0;\n" -" SortData end; end.m_key = nDst; end.m_value = nDst;\n" -" SortData iData = src[gIdx-1];\n" -" SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" -" if( iData.m_key != jData.m_key )\n" -" {\n" -" u32 k = iData.m_key;\n" -" {\n" -" dst[k] = gIdx;\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"__kernel\n" -"void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, \n" -" unsigned int nSrc, unsigned int nDst)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nDst )\n" -" {\n" -" C[gIdx] = A[gIdx] - B[gIdx];\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl deleted file mode 100644 index 2eee5752ec..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -typedef struct -{ - int m_n; - int m_padding[3]; -} ConstBuffer; - - - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy1F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float4 a0 = src[gIdx]; - - dst[ gIdx ] = a0; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy2F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( 2*gIdx <= cb.m_n ) - { - float4 a0 = src[gIdx*2+0]; - float4 a1 = src[gIdx*2+1]; - - dst[ gIdx*2+0 ] = a0; - dst[ gIdx*2+1 ] = a1; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy4F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( 4*gIdx <= cb.m_n ) - { - int idx0 = gIdx*4+0; - int idx1 = gIdx*4+1; - int idx2 = gIdx*4+2; - int idx3 = gIdx*4+3; - - float4 a0 = src[idx0]; - float4 a1 = src[idx1]; - float4 a2 = src[idx2]; - float4 a3 = src[idx3]; - - dst[ idx0 ] = a0; - dst[ idx1 ] = a1; - dst[ idx2 ] = a2; - dst[ idx3 ] = a3; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void CopyF1Kernel(__global float* dstF1, __global float* srcF1, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float a0 = srcF1[gIdx]; - - dstF1[ gIdx ] = a0; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float2 a0 = srcF2[gIdx]; - - dstF2[ gIdx ] = a0; - } -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h deleted file mode 100644 index e5670e3cd3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h +++ /dev/null @@ -1,132 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* copyKernelsCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"\n" -"typedef unsigned int u32;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"\n" -"#define make_uint4 (uint4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"\n" -"typedef struct\n" -"{\n" -" int m_n;\n" -" int m_padding[3];\n" -"} ConstBuffer;\n" -"\n" -"\n" -"\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void Copy1F4Kernel(__global float4* dst, __global float4* src, \n" -" ConstBuffer cb)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -"\n" -" if( gIdx < cb.m_n )\n" -" {\n" -" float4 a0 = src[gIdx];\n" -"\n" -" dst[ gIdx ] = a0;\n" -" }\n" -"}\n" -"\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void Copy2F4Kernel(__global float4* dst, __global float4* src, \n" -" ConstBuffer cb)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -"\n" -" if( 2*gIdx <= cb.m_n )\n" -" {\n" -" float4 a0 = src[gIdx*2+0];\n" -" float4 a1 = src[gIdx*2+1];\n" -"\n" -" dst[ gIdx*2+0 ] = a0;\n" -" dst[ gIdx*2+1 ] = a1;\n" -" }\n" -"}\n" -"\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void Copy4F4Kernel(__global float4* dst, __global float4* src, \n" -" ConstBuffer cb)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -"\n" -" if( 4*gIdx <= cb.m_n )\n" -" {\n" -" int idx0 = gIdx*4+0;\n" -" int idx1 = gIdx*4+1;\n" -" int idx2 = gIdx*4+2;\n" -" int idx3 = gIdx*4+3;\n" -"\n" -" float4 a0 = src[idx0];\n" -" float4 a1 = src[idx1];\n" -" float4 a2 = src[idx2];\n" -" float4 a3 = src[idx3];\n" -"\n" -" dst[ idx0 ] = a0;\n" -" dst[ idx1 ] = a1;\n" -" dst[ idx2 ] = a2;\n" -" dst[ idx3 ] = a3;\n" -" }\n" -"}\n" -"\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void CopyF1Kernel(__global float* dstF1, __global float* srcF1, \n" -" ConstBuffer cb)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -"\n" -" if( gIdx < cb.m_n )\n" -" {\n" -" float a0 = srcF1[gIdx];\n" -"\n" -" dstF1[ gIdx ] = a0;\n" -" }\n" -"}\n" -"\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, \n" -" ConstBuffer cb)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -"\n" -" if( gIdx < cb.m_n )\n" -" {\n" -" float2 a0 = srcF2[gIdx];\n" -"\n" -" dstF2[ gIdx ] = a0;\n" -" }\n" -"}\n" -"\n" -"\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl deleted file mode 100644 index 71c31075dd..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -typedef struct -{ - union - { - int4 m_data; - uint4 m_unsignedData; - float m_floatData; - }; - int m_offset; - int m_n; - int m_padding[2]; -} ConstBuffer; - - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num_elements ) - { - dstInt[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num_elements ) - { - dstFloat[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt2[ gIdx + offset] = make_int2( value.x, value.y ); - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt4[ offset+gIdx ] = value; - } -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h deleted file mode 100644 index 4f8b96e489..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h +++ /dev/null @@ -1,91 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* fillKernelsCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"typedef unsigned int u32;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define make_uint4 (uint4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"typedef struct\n" -"{\n" -" union\n" -" {\n" -" int4 m_data;\n" -" uint4 m_unsignedData;\n" -" float m_floatData;\n" -" };\n" -" int m_offset;\n" -" int m_n;\n" -" int m_padding[2];\n" -"} ConstBuffer;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < num_elements )\n" -" {\n" -" dstInt[ offset+gIdx ] = value;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < num_elements )\n" -" {\n" -" dstFloat[ offset+gIdx ] = value;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < num )\n" -" {\n" -" dstInt[ offset+gIdx ] = value;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < num )\n" -" {\n" -" dstInt2[ gIdx + offset] = make_int2( value.x, value.y );\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(64,1,1)))\n" -"void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < num )\n" -" {\n" -" dstInt4[ offset+gIdx ] = value;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl deleted file mode 100644 index c9da79854a..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -// takahiro end -#define WG_SIZE 128 -#define m_numElems x -#define m_numBlocks y -#define m_numScanBlocks z - -/*typedef struct -{ - uint m_numElems; - uint m_numBlocks; - uint m_numScanBlocks; - uint m_padding[1]; -} ConstBuffer; -*/ - -float4 ScanExclusiveFloat4(__local float4* data, u32 n, int lIdx, int lSize) -{ - float4 blocksum; - int offset = 1; - for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1) - { - GROUP_LDS_BARRIER; - for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - data[bi] += data[ai]; - } - } - - GROUP_LDS_BARRIER; - - if( lIdx == 0 ) - { - blocksum = data[ n-1 ]; - data[ n-1 ] = 0; - } - - GROUP_LDS_BARRIER; - - offset >>= 1; - for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 ) - { - GROUP_LDS_BARRIER; - for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize ) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - float4 temp = data[ai]; - data[ai] = data[bi]; - data[bi] += temp; - } - } - GROUP_LDS_BARRIER; - - return blocksum; -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void LocalScanKernel(__global float4* dst, __global float4* src, __global float4* sumBuffer, uint4 cb) -{ - __local float4 ldsData[WG_SIZE*2]; - - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - - ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0; - ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0; - - float4 sum = ScanExclusiveFloat4(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE); - - if( lIdx == 0 ) - sumBuffer[GET_GROUP_IDX] = sum; - - if( (2*gIdx) < cb.m_numElems ) - { - dst[2*gIdx] = ldsData[2*lIdx]; - } - if( (2*gIdx + 1) < cb.m_numElems ) - { - dst[2*gIdx + 1] = ldsData[2*lIdx + 1]; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void AddOffsetKernel(__global float4* dst, __global float4* blockSum, uint4 cb) -{ - const u32 blockSize = WG_SIZE*2; - - int myIdx = GET_GROUP_IDX+1; - int lIdx = GET_LOCAL_IDX; - - float4 iBlockSum = blockSum[myIdx]; - - int endValue = min((myIdx+1)*(blockSize), cb.m_numElems); - for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE) - { - dst[i] += iBlockSum; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void TopLevelScanKernel(__global float4* dst, uint4 cb) -{ - __local float4 ldsData[2048]; - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - int lSize = GET_GROUP_SIZE; - - for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize ) - { - ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0; - } - - GROUP_LDS_BARRIER; - - float4 sum = ScanExclusiveFloat4(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE); - - for(int i=lIdx; i<cb.m_numBlocks; i+=lSize ) - { - dst[i] = ldsData[i]; - } - - if( gIdx == 0 ) - { - dst[cb.m_numBlocks] = sum; - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl deleted file mode 100644 index 963cc1e48e..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -// takahiro end -#define WG_SIZE 128 -#define m_numElems x -#define m_numBlocks y -#define m_numScanBlocks z - -/*typedef struct -{ - uint m_numElems; - uint m_numBlocks; - uint m_numScanBlocks; - uint m_padding[1]; -} ConstBuffer; -*/ - -u32 ScanExclusive(__local u32* data, u32 n, int lIdx, int lSize) -{ - u32 blocksum; - int offset = 1; - for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1) - { - GROUP_LDS_BARRIER; - for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - data[bi] += data[ai]; - } - } - - GROUP_LDS_BARRIER; - - if( lIdx == 0 ) - { - blocksum = data[ n-1 ]; - data[ n-1 ] = 0; - } - - GROUP_LDS_BARRIER; - - offset >>= 1; - for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 ) - { - GROUP_LDS_BARRIER; - for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize ) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - u32 temp = data[ai]; - data[ai] = data[bi]; - data[bi] += temp; - } - } - GROUP_LDS_BARRIER; - - return blocksum; -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void LocalScanKernel(__global u32* dst, __global u32 *src, __global u32 *sumBuffer, - uint4 cb) -{ - __local u32 ldsData[WG_SIZE*2]; - - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - - ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0; - ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0; - - u32 sum = ScanExclusive(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE); - - if( lIdx == 0 ) sumBuffer[GET_GROUP_IDX] = sum; - - if( (2*gIdx) < cb.m_numElems ) - { - dst[2*gIdx] = ldsData[2*lIdx]; - } - if( (2*gIdx + 1) < cb.m_numElems ) - { - dst[2*gIdx + 1] = ldsData[2*lIdx + 1]; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void AddOffsetKernel(__global u32 *dst, __global u32 *blockSum, uint4 cb) -{ - const u32 blockSize = WG_SIZE*2; - - int myIdx = GET_GROUP_IDX+1; - int lIdx = GET_LOCAL_IDX; - - u32 iBlockSum = blockSum[myIdx]; - - int endValue = min((myIdx+1)*(blockSize), cb.m_numElems); - for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE) - { - dst[i] += iBlockSum; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void TopLevelScanKernel(__global u32* dst, uint4 cb) -{ - __local u32 ldsData[2048]; - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - int lSize = GET_GROUP_SIZE; - - for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize ) - { - ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0; - } - - GROUP_LDS_BARRIER; - - u32 sum = ScanExclusive(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE); - - for(int i=lIdx; i<cb.m_numBlocks; i+=lSize ) - { - dst[i] = ldsData[i]; - } - - if( gIdx == 0 ) - { - dst[cb.m_numBlocks] = sum; - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h deleted file mode 100644 index 27baab8331..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h +++ /dev/null @@ -1,129 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* prefixScanKernelsCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"typedef unsigned int u32;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"// takahiro end\n" -"#define WG_SIZE 128 \n" -"#define m_numElems x\n" -"#define m_numBlocks y\n" -"#define m_numScanBlocks z\n" -"/*typedef struct\n" -"{\n" -" uint m_numElems;\n" -" uint m_numBlocks;\n" -" uint m_numScanBlocks;\n" -" uint m_padding[1];\n" -"} ConstBuffer;\n" -"*/\n" -"u32 ScanExclusive(__local u32* data, u32 n, int lIdx, int lSize)\n" -"{\n" -" u32 blocksum;\n" -" int offset = 1;\n" -" for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1)\n" -" {\n" -" GROUP_LDS_BARRIER;\n" -" for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize)\n" -" {\n" -" int ai = offset*(2*iIdx+1)-1;\n" -" int bi = offset*(2*iIdx+2)-1;\n" -" data[bi] += data[ai];\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx == 0 )\n" -" {\n" -" blocksum = data[ n-1 ];\n" -" data[ n-1 ] = 0;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" offset >>= 1;\n" -" for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 )\n" -" {\n" -" GROUP_LDS_BARRIER;\n" -" for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize )\n" -" {\n" -" int ai = offset*(2*iIdx+1)-1;\n" -" int bi = offset*(2*iIdx+2)-1;\n" -" u32 temp = data[ai];\n" -" data[ai] = data[bi];\n" -" data[bi] += temp;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" return blocksum;\n" -"}\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"__kernel\n" -"void LocalScanKernel(__global u32* dst, __global u32 *src, __global u32 *sumBuffer,\n" -" uint4 cb)\n" -"{\n" -" __local u32 ldsData[WG_SIZE*2];\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n" -" ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n" -" u32 sum = ScanExclusive(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" -" if( lIdx == 0 ) sumBuffer[GET_GROUP_IDX] = sum;\n" -" if( (2*gIdx) < cb.m_numElems )\n" -" {\n" -" dst[2*gIdx] = ldsData[2*lIdx];\n" -" }\n" -" if( (2*gIdx + 1) < cb.m_numElems )\n" -" {\n" -" dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n" -" }\n" -"}\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"__kernel\n" -"void AddOffsetKernel(__global u32 *dst, __global u32 *blockSum, uint4 cb)\n" -"{\n" -" const u32 blockSize = WG_SIZE*2;\n" -" int myIdx = GET_GROUP_IDX+1;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" u32 iBlockSum = blockSum[myIdx];\n" -" int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n" -" for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE)\n" -" {\n" -" dst[i] += iBlockSum;\n" -" }\n" -"}\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"__kernel\n" -"void TopLevelScanKernel(__global u32* dst, uint4 cb)\n" -"{\n" -" __local u32 ldsData[2048];\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" int lSize = GET_GROUP_SIZE;\n" -" for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize )\n" -" {\n" -" ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" u32 sum = ScanExclusive(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" -" for(int i=lIdx; i<cb.m_numBlocks; i+=lSize )\n" -" {\n" -" dst[i] = ldsData[i];\n" -" }\n" -" if( gIdx == 0 )\n" -" {\n" -" dst[cb.m_numBlocks] = sum;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsFloat4CL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsFloat4CL.h deleted file mode 100644 index 5b13254796..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsFloat4CL.h +++ /dev/null @@ -1,129 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* prefixScanKernelsFloat4CL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"typedef unsigned int u32;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"// takahiro end\n" -"#define WG_SIZE 128 \n" -"#define m_numElems x\n" -"#define m_numBlocks y\n" -"#define m_numScanBlocks z\n" -"/*typedef struct\n" -"{\n" -" uint m_numElems;\n" -" uint m_numBlocks;\n" -" uint m_numScanBlocks;\n" -" uint m_padding[1];\n" -"} ConstBuffer;\n" -"*/\n" -"float4 ScanExclusiveFloat4(__local float4* data, u32 n, int lIdx, int lSize)\n" -"{\n" -" float4 blocksum;\n" -" int offset = 1;\n" -" for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1)\n" -" {\n" -" GROUP_LDS_BARRIER;\n" -" for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize)\n" -" {\n" -" int ai = offset*(2*iIdx+1)-1;\n" -" int bi = offset*(2*iIdx+2)-1;\n" -" data[bi] += data[ai];\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx == 0 )\n" -" {\n" -" blocksum = data[ n-1 ];\n" -" data[ n-1 ] = 0;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" offset >>= 1;\n" -" for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 )\n" -" {\n" -" GROUP_LDS_BARRIER;\n" -" for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize )\n" -" {\n" -" int ai = offset*(2*iIdx+1)-1;\n" -" int bi = offset*(2*iIdx+2)-1;\n" -" float4 temp = data[ai];\n" -" data[ai] = data[bi];\n" -" data[bi] += temp;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" return blocksum;\n" -"}\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"__kernel\n" -"void LocalScanKernel(__global float4* dst, __global float4* src, __global float4* sumBuffer, uint4 cb)\n" -"{\n" -" __local float4 ldsData[WG_SIZE*2];\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n" -" ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n" -" float4 sum = ScanExclusiveFloat4(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" -" if( lIdx == 0 ) \n" -" sumBuffer[GET_GROUP_IDX] = sum;\n" -" if( (2*gIdx) < cb.m_numElems )\n" -" {\n" -" dst[2*gIdx] = ldsData[2*lIdx];\n" -" }\n" -" if( (2*gIdx + 1) < cb.m_numElems )\n" -" {\n" -" dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n" -" }\n" -"}\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"__kernel\n" -"void AddOffsetKernel(__global float4* dst, __global float4* blockSum, uint4 cb)\n" -"{\n" -" const u32 blockSize = WG_SIZE*2;\n" -" int myIdx = GET_GROUP_IDX+1;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" float4 iBlockSum = blockSum[myIdx];\n" -" int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n" -" for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE)\n" -" {\n" -" dst[i] += iBlockSum;\n" -" }\n" -"}\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"__kernel\n" -"void TopLevelScanKernel(__global float4* dst, uint4 cb)\n" -"{\n" -" __local float4 ldsData[2048];\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" int lSize = GET_GROUP_SIZE;\n" -" for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize )\n" -" {\n" -" ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" float4 sum = ScanExclusiveFloat4(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" -" for(int i=lIdx; i<cb.m_numBlocks; i+=lSize )\n" -" {\n" -" dst[i] = ldsData[i];\n" -" }\n" -" if( gIdx == 0 )\n" -" {\n" -" dst[cb.m_numBlocks] = sum;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl deleted file mode 100644 index 7402e2f3b3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl +++ /dev/null @@ -1,1071 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Author Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AtomAdd(x, value) atom_add(&(x), value) - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -#define WG_SIZE 64 -#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE) -#define BITS_PER_PASS 4 -#define NUM_BUCKET (1<<BITS_PER_PASS) -typedef uchar u8; - -// this isn't optimization for VLIW. But just reducing writes. -#define USE_2LEVEL_REDUCE 1 - -//#define CHECK_BOUNDARY 1 - -//#define NV_GPU 1 - - -// Cypress -#define nPerWI 16 -// Cayman -//#define nPerWI 20 - -#define m_n x -#define m_nWGs y -#define m_startBit z -#define m_nBlocksPerWG w - -/* -typedef struct -{ - int m_n; - int m_nWGs; - int m_startBit; - int m_nBlocksPerWG; -} ConstBuffer; -*/ - -typedef struct -{ - unsigned int m_key; - unsigned int m_value; -} SortDataCL; - - -uint prefixScanVectorEx( uint4* data ) -{ - u32 sum = 0; - u32 tmp = data[0].x; - data[0].x = sum; - sum += tmp; - tmp = data[0].y; - data[0].y = sum; - sum += tmp; - tmp = data[0].z; - data[0].z = sum; - sum += tmp; - tmp = data[0].w; - data[0].w = sum; - sum += tmp; - return sum; -} - -u32 localPrefixSum( u32 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory, int wgSize /*64 or 128*/ ) -{ - { // Set data - sorterSharedMemory[lIdx] = 0; - sorterSharedMemory[lIdx+wgSize] = pData; - } - - GROUP_LDS_BARRIER; - - { // Prefix sum - int idx = 2*lIdx + (wgSize+1); -#if defined(USE_2LEVEL_REDUCE) - if( lIdx < 64 ) - { - u32 u0, u1, u2; - u0 = sorterSharedMemory[idx-3]; - u1 = sorterSharedMemory[idx-2]; - u2 = sorterSharedMemory[idx-1]; - AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); - GROUP_MEM_FENCE; - - u0 = sorterSharedMemory[idx-12]; - u1 = sorterSharedMemory[idx-8]; - u2 = sorterSharedMemory[idx-4]; - AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); - GROUP_MEM_FENCE; - - u0 = sorterSharedMemory[idx-48]; - u1 = sorterSharedMemory[idx-32]; - u2 = sorterSharedMemory[idx-16]; - AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); - GROUP_MEM_FENCE; - if( wgSize > 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; - GROUP_MEM_FENCE; - } - - sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - } -#else - if( lIdx < 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-1]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-4]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-8]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-16]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-32]; - GROUP_MEM_FENCE; - if( wgSize > 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; - GROUP_MEM_FENCE; - } - - sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - } -#endif - } - - GROUP_LDS_BARRIER; - - *totalSum = sorterSharedMemory[wgSize*2-1]; - u32 addValue = sorterSharedMemory[lIdx+wgSize-1]; - return addValue; -} - -//__attribute__((reqd_work_group_size(128,1,1))) -uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory ) -{ - u32 s4 = prefixScanVectorEx( &pData ); - u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 ); - return pData + make_uint4( rank, rank, rank, rank ); -} - - -//__attribute__((reqd_work_group_size(64,1,1))) -uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory ) -{ - u32 s4 = prefixScanVectorEx( &pData ); - u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 ); - return pData + make_uint4( rank, rank, rank, rank ); -} - -u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;} - -u32 bit8Scan(u32 v) -{ - return (v<<8) + (v<<16) + (v<<24); -} - -//=== - - - - -#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx] - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb ) -{ - __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - for(int i=0; i<NUM_BUCKET; i++) - { - MY_HISTOGRAM(i) = 0; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - u32 localKey; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - { -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - { - localKey = (gSrc[addr+i]>>startBit) & 0xf; -#if defined(NV_GPU) - MY_HISTOGRAM( localKey )++; -#else - AtomInc( MY_HISTOGRAM( localKey ) ); -#endif - } - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<GET_GROUP_SIZE; i++) - { - sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE]; - } - histogramOut[lIdx*nWGs+wgIdx] = sum; - } -} - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void StreamCountSortDataKernel( __global SortDataCL* gSrc, __global u32* histogramOut, int4 cb ) -{ - __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - for(int i=0; i<NUM_BUCKET; i++) - { - MY_HISTOGRAM(i) = 0; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - u32 localKey; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - { -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - { - localKey = (gSrc[addr+i].m_key>>startBit) & 0xf; -#if defined(NV_GPU) - MY_HISTOGRAM( localKey )++; -#else - AtomInc( MY_HISTOGRAM( localKey ) ); -#endif - } - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<GET_GROUP_SIZE; i++) - { - sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE]; - } - histogramOut[lIdx*nWGs+wgIdx] = sum; - } -} - -#define nPerLane (nPerWI/4) - -// NUM_BUCKET*nWGs < 128*nPerWI -__kernel -__attribute__((reqd_work_group_size(128,1,1))) -void PrefixScanKernel( __global u32* wHistogram1, int4 cb ) -{ - __local u32 ldsTopScanData[128*2]; - - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - const int nWGs = cb.m_nWGs; - - u32 data[nPerWI]; - for(int i=0; i<nPerWI; i++) - { - data[i] = 0; - if( (nPerWI*lIdx+i) < NUM_BUCKET*nWGs ) - data[i] = wHistogram1[nPerWI*lIdx+i]; - } - - uint4 myData = make_uint4(0,0,0,0); - - for(int i=0; i<nPerLane; i++) - { - myData.x += data[nPerLane*0+i]; - myData.y += data[nPerLane*1+i]; - myData.z += data[nPerLane*2+i]; - myData.w += data[nPerLane*3+i]; - } - - uint totalSum; - uint4 scanned = localPrefixSum128V( myData, lIdx, &totalSum, ldsTopScanData ); - -// for(int j=0; j<4; j++) // somehow it introduces a lot of branches - { int j = 0; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - { int j = 1; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - { int j = 2; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - { int j = 3; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - - for(int i=0; i<nPerLane; i++) - { - data[nPerLane*0+i] += scanned.x; - data[nPerLane*1+i] += scanned.y; - data[nPerLane*2+i] += scanned.z; - data[nPerLane*3+i] += scanned.w; - } - - for(int i=0; i<nPerWI; i++) - { - int index = nPerWI*lIdx+i; - if (index < NUM_BUCKET*nWGs) - wHistogram1[nPerWI*lIdx+i] = data[i]; - } -} - -// 4 scan, 4 exchange -void sort4Bits(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData) -{ - for(int bitIdx=0; bitIdx<BITS_PER_PASS; bitIdx++) - { - u32 mask = (1<<bitIdx); - uint4 cmpResult = make_uint4( (sortData[0]>>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask ); - uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) ); - u32 total; - prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData ); - { - uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3); - uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total ); - dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) ); - - GROUP_LDS_BARRIER; - - ldsSortData[dstAddr.x] = sortData[0]; - ldsSortData[dstAddr.y] = sortData[1]; - ldsSortData[dstAddr.z] = sortData[2]; - ldsSortData[dstAddr.w] = sortData[3]; - - GROUP_LDS_BARRIER; - - sortData[0] = ldsSortData[localAddr.x]; - sortData[1] = ldsSortData[localAddr.y]; - sortData[2] = ldsSortData[localAddr.z]; - sortData[3] = ldsSortData[localAddr.w]; - - GROUP_LDS_BARRIER; - } - } -} - -// 2 scan, 2 exchange -void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData) -{ - for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2) - { - uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, - (sortData[1]>>(startBit+ibit)) & 0x3, - (sortData[2]>>(startBit+ibit)) & 0x3, - (sortData[3]>>(startBit+ibit)) & 0x3); - - u32 key4; - u32 sKeyPacked[4] = { 0, 0, 0, 0 }; - { - sKeyPacked[0] |= 1<<(8*b.x); - sKeyPacked[1] |= 1<<(8*b.y); - sKeyPacked[2] |= 1<<(8*b.z); - sKeyPacked[3] |= 1<<(8*b.w); - - key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; - } - - u32 rankPacked; - u32 sumPacked; - { - rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); - } - - GROUP_LDS_BARRIER; - - u32 newOffset[4] = { 0,0,0,0 }; - { - u32 sumScanned = bit8Scan( sumPacked ); - - u32 scannedKeys[4]; - scannedKeys[0] = 1<<(8*b.x); - scannedKeys[1] = 1<<(8*b.y); - scannedKeys[2] = 1<<(8*b.z); - scannedKeys[3] = 1<<(8*b.w); - { // 4 scans at once - u32 sum4 = 0; - for(int ie=0; ie<4; ie++) - { - u32 tmp = scannedKeys[ie]; - scannedKeys[ie] = sum4; - sum4 += tmp; - } - } - - { - u32 sumPlusRank = sumScanned + rankPacked; - { u32 ie = b.x; - scannedKeys[0] += sumPlusRank; - newOffset[0] = unpack4Key( scannedKeys[0], ie ); - } - { u32 ie = b.y; - scannedKeys[1] += sumPlusRank; - newOffset[1] = unpack4Key( scannedKeys[1], ie ); - } - { u32 ie = b.z; - scannedKeys[2] += sumPlusRank; - newOffset[2] = unpack4Key( scannedKeys[2], ie ); - } - { u32 ie = b.w; - scannedKeys[3] += sumPlusRank; - newOffset[3] = unpack4Key( scannedKeys[3], ie ); - } - } - } - - - GROUP_LDS_BARRIER; - - { - ldsSortData[newOffset[0]] = sortData[0]; - ldsSortData[newOffset[1]] = sortData[1]; - ldsSortData[newOffset[2]] = sortData[2]; - ldsSortData[newOffset[3]] = sortData[3]; - - GROUP_LDS_BARRIER; - - u32 dstAddr = 4*lIdx; - sortData[0] = ldsSortData[dstAddr+0]; - sortData[1] = ldsSortData[dstAddr+1]; - sortData[2] = ldsSortData[dstAddr+2]; - sortData[3] = ldsSortData[dstAddr+3]; - - GROUP_LDS_BARRIER; - } - } -} - -#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key] - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb ) -{ - __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local u32 localHistogramToCarry[NUM_BUCKET]; - __local u32 localHistogram[NUM_BUCKET*2]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int startBit = cb.m_startBit; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - if( lIdx < (NUM_BUCKET) ) - { - localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - u32 myHistogram = 0; - - u32 sortData[ELEMENTS_PER_WORK_ITEM]; - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - sortData[i] = ( addr+i < n )? gSrc[ addr+i ] : 0xffffffff; -#else - sortData[i] = gSrc[ addr+i ]; -#endif - - sort4Bits(sortData, startBit, lIdx, ldsSortData); - - u32 keys[ELEMENTS_PER_WORK_ITEM]; - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - keys[i] = (sortData[i]>>startBit) & 0xf; - - { // create histogram - u32 setIdx = lIdx/16; - if( lIdx < NUM_BUCKET ) - { - localHistogram[lIdx] = 0; - } - ldsSortData[lIdx] = 0; - GROUP_LDS_BARRIER; - - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - -#if defined(NV_GPU) - SET_HISTOGRAM( setIdx, keys[i] )++; -#else - AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) ); -#endif - - GROUP_LDS_BARRIER; - - uint hIdx = NUM_BUCKET+lIdx; - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<WG_SIZE/16; i++) - { - sum += SET_HISTOGRAM( i, lIdx ); - } - myHistogram = sum; - localHistogram[hIdx] = sum; - } - GROUP_LDS_BARRIER; - -#if defined(USE_2LEVEL_REDUCE) - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - - u32 u0, u1, u2; - u0 = localHistogram[hIdx-3]; - u1 = localHistogram[hIdx-2]; - u2 = localHistogram[hIdx-1]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - u0 = localHistogram[hIdx-12]; - u1 = localHistogram[hIdx-8]; - u2 = localHistogram[hIdx-4]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - } -#else - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-2]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-4]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-8]; - GROUP_MEM_FENCE; - } -#endif - GROUP_LDS_BARRIER; - } - - { - for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++) - { - int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie; - int binIdx = keys[ie]; - int groupOffset = localHistogramToCarry[binIdx]; - int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx]; -#if defined(CHECK_BOUNDARY) - if( addr+ie < n ) -#endif - gDst[ groupOffset + myIdx ] = sortData[ie]; - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - localHistogramToCarry[lIdx] += myHistogram; - } - GROUP_LDS_BARRIER; - } -} - -// 2 scan, 2 exchange -void sort4Bits1KeyValue(u32 sortData[4], int sortVal[4], int startBit, int lIdx, __local u32* ldsSortData, __local int *ldsSortVal) -{ - for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2) - { - uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, - (sortData[1]>>(startBit+ibit)) & 0x3, - (sortData[2]>>(startBit+ibit)) & 0x3, - (sortData[3]>>(startBit+ibit)) & 0x3); - - u32 key4; - u32 sKeyPacked[4] = { 0, 0, 0, 0 }; - { - sKeyPacked[0] |= 1<<(8*b.x); - sKeyPacked[1] |= 1<<(8*b.y); - sKeyPacked[2] |= 1<<(8*b.z); - sKeyPacked[3] |= 1<<(8*b.w); - - key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; - } - - u32 rankPacked; - u32 sumPacked; - { - rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); - } - - GROUP_LDS_BARRIER; - - u32 newOffset[4] = { 0,0,0,0 }; - { - u32 sumScanned = bit8Scan( sumPacked ); - - u32 scannedKeys[4]; - scannedKeys[0] = 1<<(8*b.x); - scannedKeys[1] = 1<<(8*b.y); - scannedKeys[2] = 1<<(8*b.z); - scannedKeys[3] = 1<<(8*b.w); - { // 4 scans at once - u32 sum4 = 0; - for(int ie=0; ie<4; ie++) - { - u32 tmp = scannedKeys[ie]; - scannedKeys[ie] = sum4; - sum4 += tmp; - } - } - - { - u32 sumPlusRank = sumScanned + rankPacked; - { u32 ie = b.x; - scannedKeys[0] += sumPlusRank; - newOffset[0] = unpack4Key( scannedKeys[0], ie ); - } - { u32 ie = b.y; - scannedKeys[1] += sumPlusRank; - newOffset[1] = unpack4Key( scannedKeys[1], ie ); - } - { u32 ie = b.z; - scannedKeys[2] += sumPlusRank; - newOffset[2] = unpack4Key( scannedKeys[2], ie ); - } - { u32 ie = b.w; - scannedKeys[3] += sumPlusRank; - newOffset[3] = unpack4Key( scannedKeys[3], ie ); - } - } - } - - - GROUP_LDS_BARRIER; - - { - ldsSortData[newOffset[0]] = sortData[0]; - ldsSortData[newOffset[1]] = sortData[1]; - ldsSortData[newOffset[2]] = sortData[2]; - ldsSortData[newOffset[3]] = sortData[3]; - - ldsSortVal[newOffset[0]] = sortVal[0]; - ldsSortVal[newOffset[1]] = sortVal[1]; - ldsSortVal[newOffset[2]] = sortVal[2]; - ldsSortVal[newOffset[3]] = sortVal[3]; - - GROUP_LDS_BARRIER; - - u32 dstAddr = 4*lIdx; - sortData[0] = ldsSortData[dstAddr+0]; - sortData[1] = ldsSortData[dstAddr+1]; - sortData[2] = ldsSortData[dstAddr+2]; - sortData[3] = ldsSortData[dstAddr+3]; - - sortVal[0] = ldsSortVal[dstAddr+0]; - sortVal[1] = ldsSortVal[dstAddr+1]; - sortVal[2] = ldsSortVal[dstAddr+2]; - sortVal[3] = ldsSortVal[dstAddr+3]; - - GROUP_LDS_BARRIER; - } - } -} - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb) -{ - __local int ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local u32 localHistogramToCarry[NUM_BUCKET]; - __local u32 localHistogram[NUM_BUCKET*2]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int startBit = cb.m_startBit; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - if( lIdx < (NUM_BUCKET) ) - { - localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; - } - - GROUP_LDS_BARRIER; - - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - - u32 myHistogram = 0; - - int sortData[ELEMENTS_PER_WORK_ITEM]; - int sortVal[ELEMENTS_PER_WORK_ITEM]; - - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - { - sortData[i] = ( addr+i < n )? gSrc[ addr+i ].m_key : 0xffffffff; - sortVal[i] = ( addr+i < n )? gSrc[ addr+i ].m_value : 0xffffffff; - } -#else - { - sortData[i] = gSrc[ addr+i ].m_key; - sortVal[i] = gSrc[ addr+i ].m_value; - } -#endif - - sort4Bits1KeyValue(sortData, sortVal, startBit, lIdx, ldsSortData, ldsSortVal); - - u32 keys[ELEMENTS_PER_WORK_ITEM]; - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - keys[i] = (sortData[i]>>startBit) & 0xf; - - { // create histogram - u32 setIdx = lIdx/16; - if( lIdx < NUM_BUCKET ) - { - localHistogram[lIdx] = 0; - } - ldsSortData[lIdx] = 0; - GROUP_LDS_BARRIER; - - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - -#if defined(NV_GPU) - SET_HISTOGRAM( setIdx, keys[i] )++; -#else - AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) ); -#endif - - GROUP_LDS_BARRIER; - - uint hIdx = NUM_BUCKET+lIdx; - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<WG_SIZE/16; i++) - { - sum += SET_HISTOGRAM( i, lIdx ); - } - myHistogram = sum; - localHistogram[hIdx] = sum; - } - GROUP_LDS_BARRIER; - -#if defined(USE_2LEVEL_REDUCE) - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - - u32 u0, u1, u2; - u0 = localHistogram[hIdx-3]; - u1 = localHistogram[hIdx-2]; - u2 = localHistogram[hIdx-1]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - u0 = localHistogram[hIdx-12]; - u1 = localHistogram[hIdx-8]; - u2 = localHistogram[hIdx-4]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - } -#else - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-2]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-4]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-8]; - GROUP_MEM_FENCE; - } -#endif - GROUP_LDS_BARRIER; - } - - { - for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++) - { - int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie; - int binIdx = keys[ie]; - int groupOffset = localHistogramToCarry[binIdx]; - int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx]; -#if defined(CHECK_BOUNDARY) - if( addr+ie < n ) - { - if ((groupOffset + myIdx)<n) - { - if (sortData[ie]==sortVal[ie]) - { - - SortDataCL tmp; - tmp.m_key = sortData[ie]; - tmp.m_value = sortVal[ie]; - if (tmp.m_key == tmp.m_value) - gDst[groupOffset + myIdx ] = tmp; - } - - } - } -#else - if ((groupOffset + myIdx)<n) - { - gDst[ groupOffset + myIdx ].m_key = sortData[ie]; - gDst[ groupOffset + myIdx ].m_value = sortVal[ie]; - } -#endif - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - localHistogramToCarry[lIdx] += myHistogram; - } - GROUP_LDS_BARRIER; - } -} - - - - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterSortDataKernelSerial( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb) -{ - - u32 gIdx = GET_GLOBAL_IDX; - u32 realLocalIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - int counter[NUM_BUCKET]; - - if (realLocalIdx>0) - return; - - for (int c=0;c<NUM_BUCKET;c++) - counter[c]=0; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx=0;lIdx<WG_SIZE;lIdx++) - { - int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++) - { - int i = addr2+j; - if( i < n ) - { - int tableIdx; - tableIdx = (gSrc[i].m_key>>startBit) & 0xf;//0xf = NUM_TABLES-1 - gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i]; - counter[tableIdx] ++; - } - } - } - } - -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb ) -{ - - u32 gIdx = GET_GLOBAL_IDX; - u32 realLocalIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - int counter[NUM_BUCKET]; - - if (realLocalIdx>0) - return; - - for (int c=0;c<NUM_BUCKET;c++) - counter[c]=0; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx=0;lIdx<WG_SIZE;lIdx++) - { - int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++) - { - int i = addr2+j; - if( i < n ) - { - int tableIdx; - tableIdx = (gSrc[i]>>startBit) & 0xf;//0xf = NUM_TABLES-1 - gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i]; - counter[tableIdx] ++; - } - } - } - } - -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h b/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h deleted file mode 100644 index 8876c16aa6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h +++ /dev/null @@ -1,910 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* radixSort32KernelsCL= \ -"/*\n" -"Bullet Continuous Collision Detection and Physics Library\n" -"Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org\n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Author Takahiro Harada\n" -"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"typedef unsigned int u32;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_uint4 (uint4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define WG_SIZE 64\n" -"#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE)\n" -"#define BITS_PER_PASS 4\n" -"#define NUM_BUCKET (1<<BITS_PER_PASS)\n" -"typedef uchar u8;\n" -"// this isn't optimization for VLIW. But just reducing writes. \n" -"#define USE_2LEVEL_REDUCE 1\n" -"//#define CHECK_BOUNDARY 1\n" -"//#define NV_GPU 1\n" -"// Cypress\n" -"#define nPerWI 16\n" -"// Cayman\n" -"//#define nPerWI 20\n" -"#define m_n x\n" -"#define m_nWGs y\n" -"#define m_startBit z\n" -"#define m_nBlocksPerWG w\n" -"/*\n" -"typedef struct\n" -"{\n" -" int m_n;\n" -" int m_nWGs;\n" -" int m_startBit;\n" -" int m_nBlocksPerWG;\n" -"} ConstBuffer;\n" -"*/\n" -"typedef struct\n" -"{\n" -" unsigned int m_key;\n" -" unsigned int m_value;\n" -"} SortDataCL;\n" -"uint prefixScanVectorEx( uint4* data )\n" -"{\n" -" u32 sum = 0;\n" -" u32 tmp = data[0].x;\n" -" data[0].x = sum;\n" -" sum += tmp;\n" -" tmp = data[0].y;\n" -" data[0].y = sum;\n" -" sum += tmp;\n" -" tmp = data[0].z;\n" -" data[0].z = sum;\n" -" sum += tmp;\n" -" tmp = data[0].w;\n" -" data[0].w = sum;\n" -" sum += tmp;\n" -" return sum;\n" -"}\n" -"u32 localPrefixSum( u32 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory, int wgSize /*64 or 128*/ )\n" -"{\n" -" { // Set data\n" -" sorterSharedMemory[lIdx] = 0;\n" -" sorterSharedMemory[lIdx+wgSize] = pData;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" { // Prefix sum\n" -" int idx = 2*lIdx + (wgSize+1);\n" -"#if defined(USE_2LEVEL_REDUCE)\n" -" if( lIdx < 64 )\n" -" {\n" -" u32 u0, u1, u2;\n" -" u0 = sorterSharedMemory[idx-3];\n" -" u1 = sorterSharedMemory[idx-2];\n" -" u2 = sorterSharedMemory[idx-1];\n" -" AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n" -" GROUP_MEM_FENCE;\n" -" u0 = sorterSharedMemory[idx-12];\n" -" u1 = sorterSharedMemory[idx-8];\n" -" u2 = sorterSharedMemory[idx-4];\n" -" AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n" -" GROUP_MEM_FENCE;\n" -" u0 = sorterSharedMemory[idx-48];\n" -" u1 = sorterSharedMemory[idx-32];\n" -" u2 = sorterSharedMemory[idx-16];\n" -" AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n" -" GROUP_MEM_FENCE;\n" -" if( wgSize > 64 )\n" -" {\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" -" GROUP_MEM_FENCE;\n" -" }\n" -" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" -" GROUP_MEM_FENCE;\n" -" }\n" -"#else\n" -" if( lIdx < 64 )\n" -" {\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-1];\n" -" GROUP_MEM_FENCE;\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; \n" -" GROUP_MEM_FENCE;\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-4];\n" -" GROUP_MEM_FENCE;\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-8];\n" -" GROUP_MEM_FENCE;\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-16];\n" -" GROUP_MEM_FENCE;\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-32];\n" -" GROUP_MEM_FENCE;\n" -" if( wgSize > 64 )\n" -" {\n" -" sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" -" GROUP_MEM_FENCE;\n" -" }\n" -" sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" -" GROUP_MEM_FENCE;\n" -" }\n" -"#endif\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" *totalSum = sorterSharedMemory[wgSize*2-1];\n" -" u32 addValue = sorterSharedMemory[lIdx+wgSize-1];\n" -" return addValue;\n" -"}\n" -"//__attribute__((reqd_work_group_size(128,1,1)))\n" -"uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n" -"{\n" -" u32 s4 = prefixScanVectorEx( &pData );\n" -" u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 );\n" -" return pData + make_uint4( rank, rank, rank, rank );\n" -"}\n" -"//__attribute__((reqd_work_group_size(64,1,1)))\n" -"uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n" -"{\n" -" u32 s4 = prefixScanVectorEx( &pData );\n" -" u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 );\n" -" return pData + make_uint4( rank, rank, rank, rank );\n" -"}\n" -"u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;}\n" -"u32 bit8Scan(u32 v)\n" -"{\n" -" return (v<<8) + (v<<16) + (v<<24);\n" -"}\n" -"//===\n" -"#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb )\n" -"{\n" -" __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" -" u32 gIdx = GET_GLOBAL_IDX;\n" -" u32 lIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" u32 wgSize = GET_GROUP_SIZE;\n" -" const int startBit = cb.m_startBit;\n" -" const int n = cb.m_n;\n" -" const int nWGs = cb.m_nWGs;\n" -" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -" for(int i=0; i<NUM_BUCKET; i++)\n" -" {\n" -" MY_HISTOGRAM(i) = 0;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -" u32 localKey;\n" -" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" -" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" -" {\n" -" // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD\n" -" // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops\n" -" // AMD: AtomInc performs better while NV prefers ++\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -" {\n" -"#if defined(CHECK_BOUNDARY)\n" -" if( addr+i < n )\n" -"#endif\n" -" {\n" -" localKey = (gSrc[addr+i]>>startBit) & 0xf;\n" -"#if defined(NV_GPU)\n" -" MY_HISTOGRAM( localKey )++;\n" -"#else\n" -" AtomInc( MY_HISTOGRAM( localKey ) );\n" -"#endif\n" -" }\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" u32 sum = 0;\n" -" for(int i=0; i<GET_GROUP_SIZE; i++)\n" -" {\n" -" sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE];\n" -" }\n" -" histogramOut[lIdx*nWGs+wgIdx] = sum;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void StreamCountSortDataKernel( __global SortDataCL* gSrc, __global u32* histogramOut, int4 cb )\n" -"{\n" -" __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" -" u32 gIdx = GET_GLOBAL_IDX;\n" -" u32 lIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" u32 wgSize = GET_GROUP_SIZE;\n" -" const int startBit = cb.m_startBit;\n" -" const int n = cb.m_n;\n" -" const int nWGs = cb.m_nWGs;\n" -" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -" for(int i=0; i<NUM_BUCKET; i++)\n" -" {\n" -" MY_HISTOGRAM(i) = 0;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -" u32 localKey;\n" -" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" -" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" -" {\n" -" // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD\n" -" // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops\n" -" // AMD: AtomInc performs better while NV prefers ++\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -" {\n" -"#if defined(CHECK_BOUNDARY)\n" -" if( addr+i < n )\n" -"#endif\n" -" {\n" -" localKey = (gSrc[addr+i].m_key>>startBit) & 0xf;\n" -"#if defined(NV_GPU)\n" -" MY_HISTOGRAM( localKey )++;\n" -"#else\n" -" AtomInc( MY_HISTOGRAM( localKey ) );\n" -"#endif\n" -" }\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" u32 sum = 0;\n" -" for(int i=0; i<GET_GROUP_SIZE; i++)\n" -" {\n" -" sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE];\n" -" }\n" -" histogramOut[lIdx*nWGs+wgIdx] = sum;\n" -" }\n" -"}\n" -"#define nPerLane (nPerWI/4)\n" -"// NUM_BUCKET*nWGs < 128*nPerWI\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(128,1,1)))\n" -"void PrefixScanKernel( __global u32* wHistogram1, int4 cb )\n" -"{\n" -" __local u32 ldsTopScanData[128*2];\n" -" u32 lIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" const int nWGs = cb.m_nWGs;\n" -" u32 data[nPerWI];\n" -" for(int i=0; i<nPerWI; i++)\n" -" {\n" -" data[i] = 0;\n" -" if( (nPerWI*lIdx+i) < NUM_BUCKET*nWGs )\n" -" data[i] = wHistogram1[nPerWI*lIdx+i];\n" -" }\n" -" uint4 myData = make_uint4(0,0,0,0);\n" -" for(int i=0; i<nPerLane; i++)\n" -" {\n" -" myData.x += data[nPerLane*0+i];\n" -" myData.y += data[nPerLane*1+i];\n" -" myData.z += data[nPerLane*2+i];\n" -" myData.w += data[nPerLane*3+i];\n" -" }\n" -" uint totalSum;\n" -" uint4 scanned = localPrefixSum128V( myData, lIdx, &totalSum, ldsTopScanData );\n" -"// for(int j=0; j<4; j++) // somehow it introduces a lot of branches\n" -" { int j = 0;\n" -" u32 sum = 0;\n" -" for(int i=0; i<nPerLane; i++)\n" -" {\n" -" u32 tmp = data[nPerLane*j+i];\n" -" data[nPerLane*j+i] = sum;\n" -" sum += tmp;\n" -" }\n" -" }\n" -" { int j = 1;\n" -" u32 sum = 0;\n" -" for(int i=0; i<nPerLane; i++)\n" -" {\n" -" u32 tmp = data[nPerLane*j+i];\n" -" data[nPerLane*j+i] = sum;\n" -" sum += tmp;\n" -" }\n" -" }\n" -" { int j = 2;\n" -" u32 sum = 0;\n" -" for(int i=0; i<nPerLane; i++)\n" -" {\n" -" u32 tmp = data[nPerLane*j+i];\n" -" data[nPerLane*j+i] = sum;\n" -" sum += tmp;\n" -" }\n" -" }\n" -" { int j = 3;\n" -" u32 sum = 0;\n" -" for(int i=0; i<nPerLane; i++)\n" -" {\n" -" u32 tmp = data[nPerLane*j+i];\n" -" data[nPerLane*j+i] = sum;\n" -" sum += tmp;\n" -" }\n" -" }\n" -" for(int i=0; i<nPerLane; i++)\n" -" {\n" -" data[nPerLane*0+i] += scanned.x;\n" -" data[nPerLane*1+i] += scanned.y;\n" -" data[nPerLane*2+i] += scanned.z;\n" -" data[nPerLane*3+i] += scanned.w;\n" -" }\n" -" for(int i=0; i<nPerWI; i++)\n" -" {\n" -" int index = nPerWI*lIdx+i;\n" -" if (index < NUM_BUCKET*nWGs)\n" -" wHistogram1[nPerWI*lIdx+i] = data[i];\n" -" }\n" -"}\n" -"// 4 scan, 4 exchange\n" -"void sort4Bits(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n" -"{\n" -" for(int bitIdx=0; bitIdx<BITS_PER_PASS; bitIdx++)\n" -" {\n" -" u32 mask = (1<<bitIdx);\n" -" uint4 cmpResult = make_uint4( (sortData[0]>>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask );\n" -" uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) );\n" -" u32 total;\n" -" prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData );\n" -" {\n" -" uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3);\n" -" uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total );\n" -" dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) );\n" -" GROUP_LDS_BARRIER;\n" -" ldsSortData[dstAddr.x] = sortData[0];\n" -" ldsSortData[dstAddr.y] = sortData[1];\n" -" ldsSortData[dstAddr.z] = sortData[2];\n" -" ldsSortData[dstAddr.w] = sortData[3];\n" -" GROUP_LDS_BARRIER;\n" -" sortData[0] = ldsSortData[localAddr.x];\n" -" sortData[1] = ldsSortData[localAddr.y];\n" -" sortData[2] = ldsSortData[localAddr.z];\n" -" sortData[3] = ldsSortData[localAddr.w];\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" }\n" -"}\n" -"// 2 scan, 2 exchange\n" -"void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n" -"{\n" -" for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2)\n" -" {\n" -" uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, \n" -" (sortData[1]>>(startBit+ibit)) & 0x3, \n" -" (sortData[2]>>(startBit+ibit)) & 0x3, \n" -" (sortData[3]>>(startBit+ibit)) & 0x3);\n" -" u32 key4;\n" -" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" -" {\n" -" sKeyPacked[0] |= 1<<(8*b.x);\n" -" sKeyPacked[1] |= 1<<(8*b.y);\n" -" sKeyPacked[2] |= 1<<(8*b.z);\n" -" sKeyPacked[3] |= 1<<(8*b.w);\n" -" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" -" }\n" -" u32 rankPacked;\n" -" u32 sumPacked;\n" -" {\n" -" rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" u32 newOffset[4] = { 0,0,0,0 };\n" -" {\n" -" u32 sumScanned = bit8Scan( sumPacked );\n" -" u32 scannedKeys[4];\n" -" scannedKeys[0] = 1<<(8*b.x);\n" -" scannedKeys[1] = 1<<(8*b.y);\n" -" scannedKeys[2] = 1<<(8*b.z);\n" -" scannedKeys[3] = 1<<(8*b.w);\n" -" { // 4 scans at once\n" -" u32 sum4 = 0;\n" -" for(int ie=0; ie<4; ie++)\n" -" {\n" -" u32 tmp = scannedKeys[ie];\n" -" scannedKeys[ie] = sum4;\n" -" sum4 += tmp;\n" -" }\n" -" }\n" -" {\n" -" u32 sumPlusRank = sumScanned + rankPacked;\n" -" { u32 ie = b.x;\n" -" scannedKeys[0] += sumPlusRank;\n" -" newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" -" }\n" -" { u32 ie = b.y;\n" -" scannedKeys[1] += sumPlusRank;\n" -" newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" -" }\n" -" { u32 ie = b.z;\n" -" scannedKeys[2] += sumPlusRank;\n" -" newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" -" }\n" -" { u32 ie = b.w;\n" -" scannedKeys[3] += sumPlusRank;\n" -" newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" -" }\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" {\n" -" ldsSortData[newOffset[0]] = sortData[0];\n" -" ldsSortData[newOffset[1]] = sortData[1];\n" -" ldsSortData[newOffset[2]] = sortData[2];\n" -" ldsSortData[newOffset[3]] = sortData[3];\n" -" GROUP_LDS_BARRIER;\n" -" u32 dstAddr = 4*lIdx;\n" -" sortData[0] = ldsSortData[dstAddr+0];\n" -" sortData[1] = ldsSortData[dstAddr+1];\n" -" sortData[2] = ldsSortData[dstAddr+2];\n" -" sortData[3] = ldsSortData[dstAddr+3];\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" }\n" -"}\n" -"#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" -"{\n" -" __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" -" __local u32 localHistogramToCarry[NUM_BUCKET];\n" -" __local u32 localHistogram[NUM_BUCKET*2];\n" -" u32 gIdx = GET_GLOBAL_IDX;\n" -" u32 lIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" u32 wgSize = GET_GROUP_SIZE;\n" -" const int n = cb.m_n;\n" -" const int nWGs = cb.m_nWGs;\n" -" const int startBit = cb.m_startBit;\n" -" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -" if( lIdx < (NUM_BUCKET) )\n" -" {\n" -" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" -" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" -" {\n" -" u32 myHistogram = 0;\n" -" u32 sortData[ELEMENTS_PER_WORK_ITEM];\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -"#if defined(CHECK_BOUNDARY)\n" -" sortData[i] = ( addr+i < n )? gSrc[ addr+i ] : 0xffffffff;\n" -"#else\n" -" sortData[i] = gSrc[ addr+i ];\n" -"#endif\n" -" sort4Bits(sortData, startBit, lIdx, ldsSortData);\n" -" u32 keys[ELEMENTS_PER_WORK_ITEM];\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -" keys[i] = (sortData[i]>>startBit) & 0xf;\n" -" { // create histogram\n" -" u32 setIdx = lIdx/16;\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogram[lIdx] = 0;\n" -" }\n" -" ldsSortData[lIdx] = 0;\n" -" GROUP_LDS_BARRIER;\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -"#if defined(CHECK_BOUNDARY)\n" -" if( addr+i < n )\n" -"#endif\n" -"#if defined(NV_GPU)\n" -" SET_HISTOGRAM( setIdx, keys[i] )++;\n" -"#else\n" -" AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) );\n" -"#endif\n" -" \n" -" GROUP_LDS_BARRIER;\n" -" \n" -" uint hIdx = NUM_BUCKET+lIdx;\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" u32 sum = 0;\n" -" for(int i=0; i<WG_SIZE/16; i++)\n" -" {\n" -" sum += SET_HISTOGRAM( i, lIdx );\n" -" }\n" -" myHistogram = sum;\n" -" localHistogram[hIdx] = sum;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -"#if defined(USE_2LEVEL_REDUCE)\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogram[hIdx] = localHistogram[hIdx-1];\n" -" GROUP_MEM_FENCE;\n" -" u32 u0, u1, u2;\n" -" u0 = localHistogram[hIdx-3];\n" -" u1 = localHistogram[hIdx-2];\n" -" u2 = localHistogram[hIdx-1];\n" -" AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" -" GROUP_MEM_FENCE;\n" -" u0 = localHistogram[hIdx-12];\n" -" u1 = localHistogram[hIdx-8];\n" -" u2 = localHistogram[hIdx-4];\n" -" AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" -" GROUP_MEM_FENCE;\n" -" }\n" -"#else\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogram[hIdx] = localHistogram[hIdx-1];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-1];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-2];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-4];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-8];\n" -" GROUP_MEM_FENCE;\n" -" }\n" -"#endif\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" {\n" -" for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++)\n" -" {\n" -" int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie;\n" -" int binIdx = keys[ie];\n" -" int groupOffset = localHistogramToCarry[binIdx];\n" -" int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx];\n" -"#if defined(CHECK_BOUNDARY)\n" -" if( addr+ie < n )\n" -"#endif\n" -" gDst[ groupOffset + myIdx ] = sortData[ie];\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogramToCarry[lIdx] += myHistogram;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -"}\n" -"// 2 scan, 2 exchange\n" -"void sort4Bits1KeyValue(u32 sortData[4], int sortVal[4], int startBit, int lIdx, __local u32* ldsSortData, __local int *ldsSortVal)\n" -"{\n" -" for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2)\n" -" {\n" -" uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, \n" -" (sortData[1]>>(startBit+ibit)) & 0x3, \n" -" (sortData[2]>>(startBit+ibit)) & 0x3, \n" -" (sortData[3]>>(startBit+ibit)) & 0x3);\n" -" u32 key4;\n" -" u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" -" {\n" -" sKeyPacked[0] |= 1<<(8*b.x);\n" -" sKeyPacked[1] |= 1<<(8*b.y);\n" -" sKeyPacked[2] |= 1<<(8*b.z);\n" -" sKeyPacked[3] |= 1<<(8*b.w);\n" -" key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" -" }\n" -" u32 rankPacked;\n" -" u32 sumPacked;\n" -" {\n" -" rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" u32 newOffset[4] = { 0,0,0,0 };\n" -" {\n" -" u32 sumScanned = bit8Scan( sumPacked );\n" -" u32 scannedKeys[4];\n" -" scannedKeys[0] = 1<<(8*b.x);\n" -" scannedKeys[1] = 1<<(8*b.y);\n" -" scannedKeys[2] = 1<<(8*b.z);\n" -" scannedKeys[3] = 1<<(8*b.w);\n" -" { // 4 scans at once\n" -" u32 sum4 = 0;\n" -" for(int ie=0; ie<4; ie++)\n" -" {\n" -" u32 tmp = scannedKeys[ie];\n" -" scannedKeys[ie] = sum4;\n" -" sum4 += tmp;\n" -" }\n" -" }\n" -" {\n" -" u32 sumPlusRank = sumScanned + rankPacked;\n" -" { u32 ie = b.x;\n" -" scannedKeys[0] += sumPlusRank;\n" -" newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" -" }\n" -" { u32 ie = b.y;\n" -" scannedKeys[1] += sumPlusRank;\n" -" newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" -" }\n" -" { u32 ie = b.z;\n" -" scannedKeys[2] += sumPlusRank;\n" -" newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" -" }\n" -" { u32 ie = b.w;\n" -" scannedKeys[3] += sumPlusRank;\n" -" newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" -" }\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" {\n" -" ldsSortData[newOffset[0]] = sortData[0];\n" -" ldsSortData[newOffset[1]] = sortData[1];\n" -" ldsSortData[newOffset[2]] = sortData[2];\n" -" ldsSortData[newOffset[3]] = sortData[3];\n" -" ldsSortVal[newOffset[0]] = sortVal[0];\n" -" ldsSortVal[newOffset[1]] = sortVal[1];\n" -" ldsSortVal[newOffset[2]] = sortVal[2];\n" -" ldsSortVal[newOffset[3]] = sortVal[3];\n" -" GROUP_LDS_BARRIER;\n" -" u32 dstAddr = 4*lIdx;\n" -" sortData[0] = ldsSortData[dstAddr+0];\n" -" sortData[1] = ldsSortData[dstAddr+1];\n" -" sortData[2] = ldsSortData[dstAddr+2];\n" -" sortData[3] = ldsSortData[dstAddr+3];\n" -" sortVal[0] = ldsSortVal[dstAddr+0];\n" -" sortVal[1] = ldsSortVal[dstAddr+1];\n" -" sortVal[2] = ldsSortVal[dstAddr+2];\n" -" sortVal[3] = ldsSortVal[dstAddr+3];\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n" -"{\n" -" __local int ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" -" __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" -" __local u32 localHistogramToCarry[NUM_BUCKET];\n" -" __local u32 localHistogram[NUM_BUCKET*2];\n" -" u32 gIdx = GET_GLOBAL_IDX;\n" -" u32 lIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" u32 wgSize = GET_GROUP_SIZE;\n" -" const int n = cb.m_n;\n" -" const int nWGs = cb.m_nWGs;\n" -" const int startBit = cb.m_startBit;\n" -" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -" if( lIdx < (NUM_BUCKET) )\n" -" {\n" -" localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -" int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" -" int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" -" {\n" -" u32 myHistogram = 0;\n" -" int sortData[ELEMENTS_PER_WORK_ITEM];\n" -" int sortVal[ELEMENTS_PER_WORK_ITEM];\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -"#if defined(CHECK_BOUNDARY)\n" -" {\n" -" sortData[i] = ( addr+i < n )? gSrc[ addr+i ].m_key : 0xffffffff;\n" -" sortVal[i] = ( addr+i < n )? gSrc[ addr+i ].m_value : 0xffffffff;\n" -" }\n" -"#else\n" -" {\n" -" sortData[i] = gSrc[ addr+i ].m_key;\n" -" sortVal[i] = gSrc[ addr+i ].m_value;\n" -" }\n" -"#endif\n" -" sort4Bits1KeyValue(sortData, sortVal, startBit, lIdx, ldsSortData, ldsSortVal);\n" -" u32 keys[ELEMENTS_PER_WORK_ITEM];\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -" keys[i] = (sortData[i]>>startBit) & 0xf;\n" -" { // create histogram\n" -" u32 setIdx = lIdx/16;\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogram[lIdx] = 0;\n" -" }\n" -" ldsSortData[lIdx] = 0;\n" -" GROUP_LDS_BARRIER;\n" -" for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" -"#if defined(CHECK_BOUNDARY)\n" -" if( addr+i < n )\n" -"#endif\n" -"#if defined(NV_GPU)\n" -" SET_HISTOGRAM( setIdx, keys[i] )++;\n" -"#else\n" -" AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) );\n" -"#endif\n" -" \n" -" GROUP_LDS_BARRIER;\n" -" \n" -" uint hIdx = NUM_BUCKET+lIdx;\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" u32 sum = 0;\n" -" for(int i=0; i<WG_SIZE/16; i++)\n" -" {\n" -" sum += SET_HISTOGRAM( i, lIdx );\n" -" }\n" -" myHistogram = sum;\n" -" localHistogram[hIdx] = sum;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -"#if defined(USE_2LEVEL_REDUCE)\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogram[hIdx] = localHistogram[hIdx-1];\n" -" GROUP_MEM_FENCE;\n" -" u32 u0, u1, u2;\n" -" u0 = localHistogram[hIdx-3];\n" -" u1 = localHistogram[hIdx-2];\n" -" u2 = localHistogram[hIdx-1];\n" -" AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" -" GROUP_MEM_FENCE;\n" -" u0 = localHistogram[hIdx-12];\n" -" u1 = localHistogram[hIdx-8];\n" -" u2 = localHistogram[hIdx-4];\n" -" AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" -" GROUP_MEM_FENCE;\n" -" }\n" -"#else\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogram[hIdx] = localHistogram[hIdx-1];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-1];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-2];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-4];\n" -" GROUP_MEM_FENCE;\n" -" localHistogram[hIdx] += localHistogram[hIdx-8];\n" -" GROUP_MEM_FENCE;\n" -" }\n" -"#endif\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" {\n" -" for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++)\n" -" {\n" -" int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie;\n" -" int binIdx = keys[ie];\n" -" int groupOffset = localHistogramToCarry[binIdx];\n" -" int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx];\n" -"#if defined(CHECK_BOUNDARY)\n" -" if( addr+ie < n )\n" -" {\n" -" if ((groupOffset + myIdx)<n)\n" -" {\n" -" if (sortData[ie]==sortVal[ie])\n" -" {\n" -" \n" -" SortDataCL tmp;\n" -" tmp.m_key = sortData[ie];\n" -" tmp.m_value = sortVal[ie];\n" -" if (tmp.m_key == tmp.m_value)\n" -" gDst[groupOffset + myIdx ] = tmp;\n" -" }\n" -" \n" -" }\n" -" }\n" -"#else\n" -" if ((groupOffset + myIdx)<n)\n" -" {\n" -" gDst[ groupOffset + myIdx ].m_key = sortData[ie];\n" -" gDst[ groupOffset + myIdx ].m_value = sortVal[ie];\n" -" }\n" -"#endif\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx < NUM_BUCKET )\n" -" {\n" -" localHistogramToCarry[lIdx] += myHistogram;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SortAndScatterSortDataKernelSerial( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n" -"{\n" -" \n" -" u32 gIdx = GET_GLOBAL_IDX;\n" -" u32 realLocalIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" u32 wgSize = GET_GROUP_SIZE;\n" -" const int startBit = cb.m_startBit;\n" -" const int n = cb.m_n;\n" -" const int nWGs = cb.m_nWGs;\n" -" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -" int counter[NUM_BUCKET];\n" -" \n" -" if (realLocalIdx>0)\n" -" return;\n" -" \n" -" for (int c=0;c<NUM_BUCKET;c++)\n" -" counter[c]=0;\n" -" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -" \n" -" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" -" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++)\n" -" {\n" -" for (int lIdx=0;lIdx<WG_SIZE;lIdx++)\n" -" {\n" -" int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -" \n" -" for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++)\n" -" {\n" -" int i = addr2+j;\n" -" if( i < n )\n" -" {\n" -" int tableIdx;\n" -" tableIdx = (gSrc[i].m_key>>startBit) & 0xf;//0xf = NUM_TABLES-1\n" -" gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i];\n" -" counter[tableIdx] ++;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" \n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" -"{\n" -" \n" -" u32 gIdx = GET_GLOBAL_IDX;\n" -" u32 realLocalIdx = GET_LOCAL_IDX;\n" -" u32 wgIdx = GET_GROUP_IDX;\n" -" u32 wgSize = GET_GROUP_SIZE;\n" -" const int startBit = cb.m_startBit;\n" -" const int n = cb.m_n;\n" -" const int nWGs = cb.m_nWGs;\n" -" const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" -" int counter[NUM_BUCKET];\n" -" \n" -" if (realLocalIdx>0)\n" -" return;\n" -" \n" -" for (int c=0;c<NUM_BUCKET;c++)\n" -" counter[c]=0;\n" -" const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" -" \n" -" int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" -" for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++)\n" -" {\n" -" for (int lIdx=0;lIdx<WG_SIZE;lIdx++)\n" -" {\n" -" int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" -" \n" -" for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++)\n" -" {\n" -" int i = addr2+j;\n" -" if( i < n )\n" -" {\n" -" int tableIdx;\n" -" tableIdx = (gSrc[i]>>startBit) & 0xf;//0xf = NUM_TABLES-1\n" -" gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i];\n" -" counter[tableIdx] ++;\n" -" }\n" -" }\n" -" }\n" -" }\n" -" \n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/thirdparty/bullet/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp deleted file mode 100644 index 161e304f09..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ /dev/null @@ -1,391 +0,0 @@ - -#include "b3GpuRaycast.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" - - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h" - -#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" - - -#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" - - - -struct b3GpuRaycastInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_q; - cl_kernel m_raytraceKernel; - cl_kernel m_raytracePairsKernel; - cl_kernel m_findRayRigidPairIndexRanges; - - b3GpuParallelLinearBvh* m_plbvh; - b3RadixSort32CL* m_radixSorter; - b3FillCL* m_fill; - - //1 element per ray - b3OpenCLArray<b3RayInfo>* m_gpuRays; - b3OpenCLArray<b3RayHit>* m_gpuHitResults; - b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay; - b3OpenCLArray<int>* m_numRayRigidPairsPerRay; - - //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB - b3OpenCLArray<int>* m_gpuNumRayRigidPairs; - b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index - - int m_test; -}; - -b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q) -{ - m_data = new b3GpuRaycastInternalData; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_q = q; - m_data->m_raytraceKernel = 0; - m_data->m_raytracePairsKernel = 0; - m_data->m_findRayRigidPairIndexRanges = 0; - - m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q); - m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q); - m_data->m_fill = new b3FillCL(ctx, device, q); - - m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q); - m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q); - m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q); - m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q); - m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q); - m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q); - - { - cl_int errNum=0; - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH); - b3Assert(errNum==CL_SUCCESS); - m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - clReleaseProgram(prog); - } - - -} - -b3GpuRaycast::~b3GpuRaycast() -{ - clReleaseKernel(m_data->m_raytraceKernel); - clReleaseKernel(m_data->m_raytracePairsKernel); - clReleaseKernel(m_data->m_findRayRigidPairIndexRanges); - - delete m_data->m_plbvh; - delete m_data->m_radixSorter; - delete m_data->m_fill; - - delete m_data->m_gpuRays; - delete m_data->m_gpuHitResults; - delete m_data->m_firstRayRigidPairIndexPerRay; - delete m_data->m_numRayRigidPairsPerRay; - delete m_data->m_gpuNumRayRigidPairs; - delete m_data->m_gpuRayRigidPairs; - - delete m_data; -} - -bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction) -{ - b3Vector3 rs = rayFrom - spherePos; - b3Vector3 rayDir = rayTo-rayFrom; - - float A = b3Dot(rayDir,rayDir); - float B = b3Dot(rs, rayDir); - float C = b3Dot(rs, rs) - (radius * radius); - - float D = B * B - A*C; - - if (D > 0.0) - { - float t = (-B - sqrt(D))/A; - - if ( (t >= 0.0f) && (t < hitFraction) ) - { - hitFraction = t; - return true; - } - } - return false; -} - -bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly, - const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal) -{ - float exitFraction = hitFraction; - float enterFraction = -0.1f; - b3Vector3 curHitNormal=b3MakeVector3(0,0,0); - for (int i=0;i<poly.m_numFaces;i++) - { - const b3GpuFace& face = faces[poly.m_faceOffset+i]; - float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w; - float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w; - if (fromPlaneDist<0.f) - { - if (toPlaneDist >= 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (exitFraction>fraction) - { - exitFraction = fraction; - } - } - } else - { - if (toPlaneDist<0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (enterFraction <= fraction) - { - enterFraction = fraction; - curHitNormal = face.m_plane; - curHitNormal.w = 0.f; - } - } else - { - return false; - } - } - if (exitFraction <= enterFraction) - return false; - } - - if (enterFraction < 0.f) - return false; - - hitFraction = enterFraction; - hitNormal = curHitNormal; - return true; -} - -void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) -{ - -// return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); - - B3_PROFILE("castRaysHost"); - for (int r=0;r<rays.size();r++) - { - b3Vector3 rayFrom = rays[r].m_from; - b3Vector3 rayTo = rays[r].m_to; - float hitFraction = hitResults[r].m_hitFraction; - - int hitBodyIndex= -1; - b3Vector3 hitNormal; - - for (int b=0;b<numBodies;b++) - { - - const b3Vector3& pos = bodies[b].m_pos; - //const b3Quaternion& orn = bodies[b].m_quat; - - switch (collidables[bodies[b].m_collidableIdx].m_shapeType) - { - case SHAPE_SPHERE: - { - b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; - if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction)) - { - hitBodyIndex = b; - b3Vector3 hitPoint; - hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); - hitNormal = (hitPoint-bodies[b].m_pos).normalize(); - } - } - case SHAPE_CONVEX_HULL: - { - - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(bodies[b].m_pos); - convexWorldTransform.setRotation(bodies[b].m_quat); - b3Transform convexWorld2Local = convexWorldTransform.inverse(); - - b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); - b3Vector3 rayToLocal = convexWorld2Local(rayTo); - - - int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; - const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; - if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal)) - { - hitBodyIndex = b; - } - - - break; - } - default: - { - static bool once=true; - if (once) - { - once=false; - b3Warning("Raytest: unsupported shape type\n"); - } - } - } - } - if (hitBodyIndex>=0) - { - - hitResults[r].m_hitFraction = hitFraction; - hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); - hitResults[r].m_hitNormal = hitNormal; - hitResults[r].m_hitBody = hitBodyIndex; - } - - } -} -///todo: add some acceleration structure (AABBs, tree etc) -void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) -{ - //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); - - B3_PROFILE("castRaysGPU"); - - { - B3_PROFILE("raycast copyFromHost"); - m_data->m_gpuRays->copyFromHost(rays); - m_data->m_gpuHitResults->copyFromHost(hitResults); - - } - - int numRays = hitResults.size(); - { - m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); - m_data->m_numRayRigidPairsPerRay->resize(numRays); - - m_data->m_gpuNumRayRigidPairs->resize(1); - m_data->m_gpuRayRigidPairs->resize(numRays * 16); - } - - //run kernel - const bool USE_BRUTE_FORCE_RAYCAST = false; - if(USE_BRUTE_FORCE_RAYCAST) - { - B3_PROFILE("raycast launch1D"); - - b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); - int numRays = rays.size(); - launcher.setConst(numRays); - - launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); - launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); - - launcher.setConst(numBodies); - launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); - - launcher.launch1D(numRays); - clFinish(m_data->m_q); - } - else - { - m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() ); - - m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); - - int numRayRigidPairs = -1; - m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); - if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() ) - { - numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); - m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); - } - - m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct - - //Sort ray-rigid pairs by ray index - { - B3_PROFILE("sort ray-rigid pairs"); - m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) ); - } - - //detect start,count of each ray pair - { - B3_PROFILE("detect ray-rigid pair index ranges"); - - { - B3_PROFILE("reset ray-rigid pair index ranges"); - - m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index - m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); - clFinish(m_data->m_q); - } - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ), - - b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), - b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ) - }; - - b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numRayRigidPairs); - - launcher.launch1D(numRayRigidPairs); - clFinish(m_data->m_q); - } - - { - B3_PROFILE("ray-rigid intersection"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ), - b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ), - b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), - b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ), - - b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ), - b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ), - b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ), - b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ), - - b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ) - }; - - b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); - launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_data->m_q); - } - } - - - - //copy results - { - B3_PROFILE("raycast copyToHost"); - m_data->m_gpuHitResults->copyToHost(hitResults); - } - -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/thirdparty/bullet/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h deleted file mode 100644 index 3a5cf44b79..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/b3GpuRaycast.h +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef B3_GPU_RAYCAST_H -#define B3_GPU_RAYCAST_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - - - -class b3GpuRaycast -{ -protected: - struct b3GpuRaycastInternalData* m_data; -public: - b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q); - virtual ~b3GpuRaycast(); - - void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData); - - void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase); - - - -}; - -#endif //B3_GPU_RAYCAST_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl deleted file mode 100644 index e72d96876b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl +++ /dev/null @@ -1,439 +0,0 @@ - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_PLANE 4 -#define SHAPE_CONCAVE_TRIMESH 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - - -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; - -typedef struct -{ - float m_hitFraction; - int m_hitResult0; - int m_hitResult1; - int m_hitResult2; - float4 m_hitPoint; - float4 m_hitNormal; -} b3RayHit; - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct Collidable -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - float m_radius; - int m_shapeType; - int m_shapeIndex; -} Collidable; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; - -} ConvexPolyhedronCL; - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} b3GpuFace; - - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline - Quaternion qtMul(Quaternion a, Quaternion b); - -__inline - Quaternion qtNormalize(Quaternion in); - - -__inline - Quaternion qtInvert(Quaternion q); - - -__inline - float dot3F4(float4 a, float4 b) -{ - float4 a1 = (float4)(a.xyz,0.f); - float4 b1 = (float4)(b.xyz,0.f); - return dot(a1, b1); -} - - -__inline - Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross( a, b ); - ans += a.w*b+b.w*a; - // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline - Quaternion qtNormalize(Quaternion in) -{ - return fast_normalize(in); - // in /= length( in ); - // return in; -} -__inline - float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(q,vcpy); - out = qtMul(out,qInv); - return out; -} - -__inline - Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline - float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - - - - - -bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset, - __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal) -{ - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - bool result = true; - - float exitFraction = hitFraction[0]; - float enterFraction = -0.3f; - float4 curHitNormal = (float4)(0,0,0,0); - for (int i=0;i<numFaces && result;i++) - { - b3GpuFace face = faces[faceOffset+i]; - float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w; - float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w; - if (fromPlaneDist<0.f) - { - if (toPlaneDist >= 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (exitFraction>fraction) - { - exitFraction = fraction; - } - } - } else - { - if (toPlaneDist<0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (enterFraction <= fraction) - { - enterFraction = fraction; - curHitNormal = face.m_plane; - curHitNormal.w = 0.f; - } - } else - { - result = false; - } - } - if (exitFraction <= enterFraction) - result = false; - } - - if (enterFraction < 0.f) - { - result = false; - } - - if (result) - { - hitFraction[0] = enterFraction; - hitNormal[0] = curHitNormal; - } - return result; -} - - - - - - -bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction) -{ - float4 rs = rayFrom - spherePos; - rs.w = 0.f; - float4 rayDir = rayTo-rayFrom; - rayDir.w = 0.f; - float A = dot(rayDir,rayDir); - float B = dot(rs, rayDir); - float C = dot(rs, rs) - (radius * radius); - - float D = B * B - A*C; - - if (D > 0.0f) - { - float t = (-B - sqrt(D))/A; - - if ( (t >= 0.0f) && (t < (*hitFraction)) ) - { - *hitFraction = t; - return true; - } - } - return false; -} - -float4 setInterpolate3(float4 from, float4 to, float t) -{ - float s = 1.0f - t; - float4 result; - result = s * from + t * to; - result.w = 0.f; - return result; -} - -__kernel void rayCastKernel( - int numRays, - const __global b3RayInfo* rays, - __global b3RayHit* hitResults, - const int numBodies, - __global Body* bodies, - __global Collidable* collidables, - __global const b3GpuFace* faces, - __global const ConvexPolyhedronCL* convexShapes ) -{ - - int i = get_global_id(0); - if (i>=numRays) - return; - - hitResults[i].m_hitFraction = 1.f; - - float4 rayFrom = rays[i].m_from; - float4 rayTo = rays[i].m_to; - float hitFraction = 1.f; - float4 hitPoint; - float4 hitNormal; - int hitBodyIndex= -1; - - int cachedCollidableIndex = -1; - Collidable cachedCollidable; - - for (int b=0;b<numBodies;b++) - { - if (hitResults[i].m_hitResult2==b) - continue; - Body body = bodies[b]; - float4 pos = body.m_pos; - float4 orn = body.m_quat; - if (cachedCollidableIndex != body.m_collidableIdx) - { - cachedCollidableIndex = body.m_collidableIdx; - cachedCollidable = collidables[cachedCollidableIndex]; - } - if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 invPos = (float4)(0,0,0,0); - float4 invOrn = (float4)(0,0,0,0); - float4 rayFromLocal = (float4)(0,0,0,0); - float4 rayToLocal = (float4)(0,0,0,0); - invOrn = qtInvert(orn); - invPos = qtRotate(invOrn, -pos); - rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; - rayToLocal = qtRotate( invOrn, rayTo) + invPos; - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces; - int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset; - if (numFaces) - { - if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) - { - hitBodyIndex = b; - - } - } - } - if (cachedCollidable.m_shapeType == SHAPE_SPHERE) - { - float radius = cachedCollidable.m_radius; - - if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) - { - hitBodyIndex = b; - hitNormal = (float4) (hitPoint-bodies[b].m_pos); - } - } - } - - if (hitBodyIndex>=0) - { - hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction); - hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = hitPoint; - hitResults[i].m_hitNormal = normalize(hitNormal); - hitResults[i].m_hitResult0 = hitBodyIndex; - } - -} - - -__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, - __global int* out_firstRayRigidPairIndexPerRay, - __global int* out_numRayRigidPairsPerRay, - int numRayRigidPairs) -{ - int rayRigidPairIndex = get_global_id(0); - if (rayRigidPairIndex >= numRayRigidPairs) return; - - int rayIndex = rayRigidPairs[rayRigidPairIndex].x; - - atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex); - atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]); -} - -__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, - __global b3RayHit* hitResults, - __global int* firstRayRigidPairIndexPerRay, - __global int* numRayRigidPairsPerRay, - - __global Body* bodies, - __global Collidable* collidables, - __global const b3GpuFace* faces, - __global const ConvexPolyhedronCL* convexShapes, - - __global int2* rayRigidPairs, - int numRays) -{ - int i = get_global_id(0); - if (i >= numRays) return; - - float4 rayFrom = rays[i].m_from; - float4 rayTo = rays[i].m_to; - - hitResults[i].m_hitFraction = 1.f; - - float hitFraction = 1.f; - float4 hitPoint; - float4 hitNormal; - int hitBodyIndex = -1; - - // - for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair) - { - int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i]; - int b = rayRigidPairs[rayRigidPairIndex].y; - - if (hitResults[i].m_hitResult2 == b) continue; - - Body body = bodies[b]; - Collidable rigidCollidable = collidables[body.m_collidableIdx]; - - float4 pos = body.m_pos; - float4 orn = body.m_quat; - - if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL) - { - float4 invPos = (float4)(0,0,0,0); - float4 invOrn = (float4)(0,0,0,0); - float4 rayFromLocal = (float4)(0,0,0,0); - float4 rayToLocal = (float4)(0,0,0,0); - invOrn = qtInvert(orn); - invPos = qtRotate(invOrn, -pos); - rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; - rayToLocal = qtRotate( invOrn, rayTo) + invPos; - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces; - int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset; - - if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) - { - hitBodyIndex = b; - hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); - } - } - - if (rigidCollidable.m_shapeType == SHAPE_SPHERE) - { - float radius = rigidCollidable.m_radius; - - if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) - { - hitBodyIndex = b; - hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); - hitNormal = (float4) (hitPoint - bodies[b].m_pos); - } - } - } - - if (hitBodyIndex >= 0) - { - hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = hitPoint; - hitResults[i].m_hitNormal = normalize(hitNormal); - hitResults[i].m_hitResult0 = hitBodyIndex; - } - -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h deleted file mode 100644 index 6257909a4d..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h +++ /dev/null @@ -1,381 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* rayCastKernelCL= \ -"#define SHAPE_CONVEX_HULL 3\n" -"#define SHAPE_PLANE 4\n" -"#define SHAPE_CONCAVE_TRIMESH 5\n" -"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" -"#define SHAPE_SPHERE 7\n" -"typedef struct\n" -"{\n" -" float4 m_from;\n" -" float4 m_to;\n" -"} b3RayInfo;\n" -"typedef struct\n" -"{\n" -" float m_hitFraction;\n" -" int m_hitResult0;\n" -" int m_hitResult1;\n" -" int m_hitResult2;\n" -" float4 m_hitPoint;\n" -" float4 m_hitNormal;\n" -"} b3RayHit;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" float4 m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" unsigned int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct Collidable\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" float m_radius;\n" -" int m_shapeType;\n" -" int m_shapeIndex;\n" -"} Collidable;\n" -"typedef struct \n" -"{\n" -" float4 m_localCenter;\n" -" float4 m_extents;\n" -" float4 mC;\n" -" float4 mE;\n" -" float m_radius;\n" -" int m_faceOffset;\n" -" int m_numFaces;\n" -" int m_numVertices;\n" -" int m_vertexOffset;\n" -" int m_uniqueEdgesOffset;\n" -" int m_numUniqueEdges;\n" -" int m_unused;\n" -"} ConvexPolyhedronCL;\n" -"typedef struct\n" -"{\n" -" float4 m_plane;\n" -" int m_indexOffset;\n" -" int m_numIndices;\n" -"} b3GpuFace;\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -" Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -" Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -" Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -" float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = (float4)(a.xyz,0.f);\n" -" float4 b1 = (float4)(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -" Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross( a, b );\n" -" ans += a.w*b+b.w*a;\n" -" // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -" Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fast_normalize(in);\n" -" // in /= length( in );\n" -" // return in;\n" -"}\n" -"__inline\n" -" float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(q,vcpy);\n" -" out = qtMul(out,qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -" Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -" float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"void trInverse(float4 translationIn, Quaternion orientationIn,\n" -" float4* translationOut, Quaternion* orientationOut)\n" -"{\n" -" *orientationOut = qtInvert(orientationIn);\n" -" *translationOut = qtRotate(*orientationOut, -translationIn);\n" -"}\n" -"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n" -" __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n" -"{\n" -" rayFromLocal.w = 0.f;\n" -" rayToLocal.w = 0.f;\n" -" bool result = true;\n" -" float exitFraction = hitFraction[0];\n" -" float enterFraction = -0.3f;\n" -" float4 curHitNormal = (float4)(0,0,0,0);\n" -" for (int i=0;i<numFaces && result;i++)\n" -" {\n" -" b3GpuFace face = faces[faceOffset+i];\n" -" float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n" -" float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n" -" if (fromPlaneDist<0.f)\n" -" {\n" -" if (toPlaneDist >= 0.f)\n" -" {\n" -" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" -" if (exitFraction>fraction)\n" -" {\n" -" exitFraction = fraction;\n" -" }\n" -" } \n" -" } else\n" -" {\n" -" if (toPlaneDist<0.f)\n" -" {\n" -" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" -" if (enterFraction <= fraction)\n" -" {\n" -" enterFraction = fraction;\n" -" curHitNormal = face.m_plane;\n" -" curHitNormal.w = 0.f;\n" -" }\n" -" } else\n" -" {\n" -" result = false;\n" -" }\n" -" }\n" -" if (exitFraction <= enterFraction)\n" -" result = false;\n" -" }\n" -" if (enterFraction < 0.f)\n" -" {\n" -" result = false;\n" -" }\n" -" if (result)\n" -" { \n" -" hitFraction[0] = enterFraction;\n" -" hitNormal[0] = curHitNormal;\n" -" }\n" -" return result;\n" -"}\n" -"bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n" -"{\n" -" float4 rs = rayFrom - spherePos;\n" -" rs.w = 0.f;\n" -" float4 rayDir = rayTo-rayFrom;\n" -" rayDir.w = 0.f;\n" -" float A = dot(rayDir,rayDir);\n" -" float B = dot(rs, rayDir);\n" -" float C = dot(rs, rs) - (radius * radius);\n" -" float D = B * B - A*C;\n" -" if (D > 0.0f)\n" -" {\n" -" float t = (-B - sqrt(D))/A;\n" -" if ( (t >= 0.0f) && (t < (*hitFraction)) )\n" -" {\n" -" *hitFraction = t;\n" -" return true;\n" -" }\n" -" }\n" -" return false;\n" -"}\n" -"float4 setInterpolate3(float4 from, float4 to, float t)\n" -"{\n" -" float s = 1.0f - t;\n" -" float4 result;\n" -" result = s * from + t * to;\n" -" result.w = 0.f; \n" -" return result; \n" -"}\n" -"__kernel void rayCastKernel( \n" -" int numRays, \n" -" const __global b3RayInfo* rays, \n" -" __global b3RayHit* hitResults, \n" -" const int numBodies, \n" -" __global Body* bodies,\n" -" __global Collidable* collidables,\n" -" __global const b3GpuFace* faces,\n" -" __global const ConvexPolyhedronCL* convexShapes )\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numRays)\n" -" return;\n" -" hitResults[i].m_hitFraction = 1.f;\n" -" float4 rayFrom = rays[i].m_from;\n" -" float4 rayTo = rays[i].m_to;\n" -" float hitFraction = 1.f;\n" -" float4 hitPoint;\n" -" float4 hitNormal;\n" -" int hitBodyIndex= -1;\n" -" int cachedCollidableIndex = -1;\n" -" Collidable cachedCollidable;\n" -" for (int b=0;b<numBodies;b++)\n" -" {\n" -" if (hitResults[i].m_hitResult2==b)\n" -" continue;\n" -" Body body = bodies[b];\n" -" float4 pos = body.m_pos;\n" -" float4 orn = body.m_quat;\n" -" if (cachedCollidableIndex != body.m_collidableIdx)\n" -" {\n" -" cachedCollidableIndex = body.m_collidableIdx;\n" -" cachedCollidable = collidables[cachedCollidableIndex];\n" -" }\n" -" if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" -" {\n" -" float4 invPos = (float4)(0,0,0,0);\n" -" float4 invOrn = (float4)(0,0,0,0);\n" -" float4 rayFromLocal = (float4)(0,0,0,0);\n" -" float4 rayToLocal = (float4)(0,0,0,0);\n" -" invOrn = qtInvert(orn);\n" -" invPos = qtRotate(invOrn, -pos);\n" -" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" -" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" -" rayFromLocal.w = 0.f;\n" -" rayToLocal.w = 0.f;\n" -" int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n" -" int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n" -" if (numFaces)\n" -" {\n" -" if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" -" {\n" -" hitBodyIndex = b;\n" -" \n" -" }\n" -" }\n" -" }\n" -" if (cachedCollidable.m_shapeType == SHAPE_SPHERE)\n" -" {\n" -" float radius = cachedCollidable.m_radius;\n" -" \n" -" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" -" {\n" -" hitBodyIndex = b;\n" -" hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n" -" }\n" -" }\n" -" }\n" -" if (hitBodyIndex>=0)\n" -" {\n" -" hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n" -" hitResults[i].m_hitFraction = hitFraction;\n" -" hitResults[i].m_hitPoint = hitPoint;\n" -" hitResults[i].m_hitNormal = normalize(hitNormal);\n" -" hitResults[i].m_hitResult0 = hitBodyIndex;\n" -" }\n" -"}\n" -"__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n" -" __global int* out_firstRayRigidPairIndexPerRay,\n" -" __global int* out_numRayRigidPairsPerRay,\n" -" int numRayRigidPairs)\n" -"{\n" -" int rayRigidPairIndex = get_global_id(0);\n" -" if (rayRigidPairIndex >= numRayRigidPairs) return;\n" -" \n" -" int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n" -" \n" -" atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n" -" atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n" -"}\n" -"__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n" -" __global b3RayHit* hitResults, \n" -" __global int* firstRayRigidPairIndexPerRay,\n" -" __global int* numRayRigidPairsPerRay,\n" -" \n" -" __global Body* bodies,\n" -" __global Collidable* collidables,\n" -" __global const b3GpuFace* faces,\n" -" __global const ConvexPolyhedronCL* convexShapes,\n" -" \n" -" __global int2* rayRigidPairs,\n" -" int numRays)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i >= numRays) return;\n" -" \n" -" float4 rayFrom = rays[i].m_from;\n" -" float4 rayTo = rays[i].m_to;\n" -" \n" -" hitResults[i].m_hitFraction = 1.f;\n" -" \n" -" float hitFraction = 1.f;\n" -" float4 hitPoint;\n" -" float4 hitNormal;\n" -" int hitBodyIndex = -1;\n" -" \n" -" //\n" -" for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n" -" {\n" -" int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n" -" int b = rayRigidPairs[rayRigidPairIndex].y;\n" -" \n" -" if (hitResults[i].m_hitResult2 == b) continue;\n" -" \n" -" Body body = bodies[b];\n" -" Collidable rigidCollidable = collidables[body.m_collidableIdx];\n" -" \n" -" float4 pos = body.m_pos;\n" -" float4 orn = body.m_quat;\n" -" \n" -" if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" -" {\n" -" float4 invPos = (float4)(0,0,0,0);\n" -" float4 invOrn = (float4)(0,0,0,0);\n" -" float4 rayFromLocal = (float4)(0,0,0,0);\n" -" float4 rayToLocal = (float4)(0,0,0,0);\n" -" invOrn = qtInvert(orn);\n" -" invPos = qtRotate(invOrn, -pos);\n" -" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" -" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" -" rayFromLocal.w = 0.f;\n" -" rayToLocal.w = 0.f;\n" -" int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n" -" int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n" -" \n" -" if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" -" {\n" -" hitBodyIndex = b;\n" -" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" -" }\n" -" }\n" -" \n" -" if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n" -" {\n" -" float radius = rigidCollidable.m_radius;\n" -" \n" -" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" -" {\n" -" hitBodyIndex = b;\n" -" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" -" hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n" -" }\n" -" }\n" -" }\n" -" \n" -" if (hitBodyIndex >= 0)\n" -" {\n" -" hitResults[i].m_hitFraction = hitFraction;\n" -" hitResults[i].m_hitPoint = hitPoint;\n" -" hitResults[i].m_hitNormal = normalize(hitNormal);\n" -" hitResults[i].m_hitResult0 = hitBodyIndex;\n" -" }\n" -" \n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h deleted file mode 100644 index c7478f54a1..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h +++ /dev/null @@ -1,18 +0,0 @@ - -#ifndef B3_CONSTRAINT4_h -#define B3_CONSTRAINT4_h -#include "Bullet3Common/b3Vector3.h" - -#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" - - -B3_ATTRIBUTE_ALIGNED16(struct) b3GpuConstraint4 : public b3ContactConstraint4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - inline void setFrictionCoeff(float value) { m_linear[3] = value; } - inline float getFrictionCoeff() const { return m_linear[3]; } -}; - -#endif //B3_CONSTRAINT4_h - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp deleted file mode 100644 index af687b54e9..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp +++ /dev/null @@ -1,137 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3GpuGenericConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include <new> -#include "Bullet3Common/b3Transform.h" - -void b3GpuGenericConstraint::getInfo1 (unsigned int* info,const b3RigidBodyData* bodies) -{ - switch (m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - *info = 3; - break; - }; - default: - { - b3Assert(0); - } - }; -} - -void getInfo2Point2Point(b3GpuGenericConstraint* constraint, b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - b3Transform trA; - trA.setIdentity(); - trA.setOrigin(bodies[constraint->m_rbA].m_pos); - trA.setRotation(bodies[constraint->m_rbA].m_quat); - - b3Transform trB; - trB.setIdentity(); - trB.setOrigin(bodies[constraint->m_rbB].m_pos); - trB.setRotation(bodies[constraint->m_rbB].m_quat); - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip+1] = 1; - info->m_J1linearAxis[2*info->rowskip+2] = 1; - - b3Vector3 a1 = trA.getBasis()*constraint->getPivotInA(); - //b3Vector3 a1a = b3QuatRotate(trA.getRotation(),constraint->getPivotInA()); - - { - b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis+info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis+2*info->rowskip); - b3Vector3 a1neg = -a1; - a1neg.getSkewSymmetricMatrix(angular0,angular1,angular2); - } - - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip+1] = -1; - info->m_J2linearAxis[2*info->rowskip+2] = -1; - } - - b3Vector3 a2 = trB.getBasis()*constraint->getPivotInB(); - - { - // b3Vector3 a2n = -a2; - b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis+info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis+2*info->rowskip); - a2.getSkewSymmetricMatrix(angular0,angular1,angular2); - } - - - - // set right hand side -// b3Scalar currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - b3Scalar currERP = info->erp; - - b3Scalar k = info->fps * currERP; - int j; - for (j=0; j<3; j++) - { - info->m_constraintError[j*info->rowskip] = k * (a2[j] + trB.getOrigin()[j] - a1[j] - trA.getOrigin()[j]); - //printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]); - } -#if 0 - if(m_flags & B3_P2P_FLAGS_CFM) - { - for (j=0; j<3; j++) - { - info->cfm[j*info->rowskip] = m_cfm; - } - } -#endif - -#if 0 - b3Scalar impulseClamp = m_setting.m_impulseClamp;// - for (j=0; j<3; j++) - { - if (m_setting.m_impulseClamp > 0) - { - info->m_lowerLimit[j*info->rowskip] = -impulseClamp; - info->m_upperLimit[j*info->rowskip] = impulseClamp; - } - } - info->m_damping = m_setting.m_damping; -#endif - -} - -void b3GpuGenericConstraint::getInfo2 (b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - switch (m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - getInfo2Point2Point(this,info,bodies); - break; - }; - default: - { - b3Assert(0); - } - }; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h deleted file mode 100644 index 14b3ba7fec..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h +++ /dev/null @@ -1,132 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_GENERIC_CONSTRAINT_H -#define B3_GPU_GENERIC_CONSTRAINT_H - -#include "Bullet3Common/b3Quaternion.h" -struct b3RigidBodyData; -enum B3_CONSTRAINT_FLAGS -{ - B3_CONSTRAINT_FLAG_ENABLED=1, -}; - -enum b3GpuGenericConstraintType -{ - B3_GPU_POINT2POINT_CONSTRAINT_TYPE=3, - B3_GPU_FIXED_CONSTRAINT_TYPE=4, -// B3_HINGE_CONSTRAINT_TYPE, -// B3_CONETWIST_CONSTRAINT_TYPE, -// B3_D6_CONSTRAINT_TYPE, -// B3_SLIDER_CONSTRAINT_TYPE, -// B3_CONTACT_CONSTRAINT_TYPE, -// B3_D6_SPRING_CONSTRAINT_TYPE, -// B3_GEAR_CONSTRAINT_TYPE, - - B3_GPU_MAX_CONSTRAINT_TYPE -}; - - - -struct b3GpuConstraintInfo2 -{ - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - b3Scalar fps,erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - b3Scalar *m_J1linearAxis,*m_J1angularAxis,*m_J2linearAxis,*m_J2angularAxis; - - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - b3Scalar *m_constraintError,*cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - b3Scalar *m_lowerLimit,*m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - int *findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - b3Scalar m_damping; -}; - - -B3_ATTRIBUTE_ALIGNED16(struct) b3GpuGenericConstraint -{ - int m_constraintType; - int m_rbA; - int m_rbB; - float m_breakingImpulseThreshold; - - b3Vector3 m_pivotInA; - b3Vector3 m_pivotInB; - b3Quaternion m_relTargetAB; - - int m_flags; - int m_uid; - int m_padding[2]; - - int getRigidBodyA() const - { - return m_rbA; - } - int getRigidBodyB() const - { - return m_rbB; - } - - const b3Vector3& getPivotInA() const - { - return m_pivotInA; - } - - const b3Vector3& getPivotInB() const - { - return m_pivotInB; - } - - int isEnabled() const - { - return m_flags & B3_CONSTRAINT_FLAG_ENABLED; - } - - float getBreakingImpulseThreshold() const - { - return m_breakingImpulseThreshold; - } - - ///internal method used by the constraint solver, don't use them directly - void getInfo1 (unsigned int* info,const b3RigidBodyData* bodies); - - ///internal method used by the constraint solver, don't use them directly - void getInfo2 (b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies); - - -}; - -#endif //B3_GPU_GENERIC_CONSTRAINT_H
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp deleted file mode 100644 index 179dfc4f26..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp +++ /dev/null @@ -1,1382 +0,0 @@ - -#include "b3GpuJacobiContactSolver.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 -class b3Vector3; -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h" -#include "Bullet3Common/b3Logging.h" -#include "b3GpuConstraint4.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" - - -struct b3GpuJacobiSolverInternalData -{ - //btRadixSort32CL* m_sort32; - //btBoundSearchCL* m_search; - b3PrefixScanCL* m_scan; - - b3OpenCLArray<unsigned int>* m_bodyCount; - b3OpenCLArray<b3Int2>* m_contactConstraintOffsets; - b3OpenCLArray<unsigned int>* m_offsetSplitBodies; - - b3OpenCLArray<b3Vector3>* m_deltaLinearVelocities; - b3OpenCLArray<b3Vector3>* m_deltaAngularVelocities; - - b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocitiesCPU; - b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocitiesCPU; - - - - b3OpenCLArray<b3GpuConstraint4>* m_contactConstraints; - - b3FillCL* m_filler; - - - cl_kernel m_countBodiesKernel; - cl_kernel m_contactToConstraintSplitKernel; - cl_kernel m_clearVelocitiesKernel; - cl_kernel m_averageVelocitiesKernel; - cl_kernel m_updateBodyVelocitiesKernel; - cl_kernel m_solveContactKernel; - cl_kernel m_solveFrictionKernel; - - - -}; - - -b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - :m_context(ctx), - m_device(device), - m_queue(queue) -{ - m_data = new b3GpuJacobiSolverInternalData; - m_data->m_scan = new b3PrefixScanCL(m_context,m_device,m_queue); - m_data->m_bodyCount = new b3OpenCLArray<unsigned int>(m_context,m_queue); - m_data->m_filler = new b3FillCL(m_context,m_device,m_queue); - m_data->m_contactConstraintOffsets = new b3OpenCLArray<b3Int2>(m_context,m_queue); - m_data->m_offsetSplitBodies = new b3OpenCLArray<unsigned int>(m_context,m_queue); - m_data->m_contactConstraints = new b3OpenCLArray<b3GpuConstraint4>(m_context,m_queue); - m_data->m_deltaLinearVelocities = new b3OpenCLArray<b3Vector3>(m_context,m_queue); - m_data->m_deltaAngularVelocities = new b3OpenCLArray<b3Vector3>(m_context,m_queue); - - cl_int pErrNum; - const char* additionalMacros=""; - const char* solverUtilsSource = solverUtilsCL; - { - cl_program solverUtilsProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverUtilsSource, &pErrNum,additionalMacros, SOLVER_UTILS_KERNEL_PATH); - b3Assert(solverUtilsProg); - m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_countBodiesKernel); - - m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_contactToConstraintSplitKernel); - m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_clearVelocitiesKernel); - - m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_averageVelocitiesKernel); - - m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_updateBodyVelocitiesKernel); - - - m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_solveContactKernel ); - - m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg,additionalMacros ); - b3Assert(m_data->m_solveFrictionKernel); - } - -} - - -b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver() -{ - clReleaseKernel(m_data->m_solveContactKernel); - clReleaseKernel(m_data->m_solveFrictionKernel); - clReleaseKernel(m_data->m_countBodiesKernel); - clReleaseKernel(m_data->m_contactToConstraintSplitKernel); - clReleaseKernel(m_data->m_averageVelocitiesKernel); - clReleaseKernel(m_data->m_updateBodyVelocitiesKernel); - clReleaseKernel(m_data->m_clearVelocitiesKernel ); - - delete m_data->m_deltaLinearVelocities; - delete m_data->m_deltaAngularVelocities; - delete m_data->m_contactConstraints; - delete m_data->m_offsetSplitBodies; - delete m_data->m_contactConstraintOffsets; - delete m_data->m_bodyCount; - delete m_data->m_filler; - delete m_data->m_scan; - delete m_data; -} - - - -b3Vector3 make_float4(float v) -{ - return b3MakeVector3 (v,v,v); -} - -b3Vector4 make_float4(float x,float y, float z, float w) -{ - return b3MakeVector4 (x,y,z,w); -} - - - static - inline - float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1, - const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1) - { - return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1); - } - - - static - inline - void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1, - b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1) - { - linear = n; - angular0 = b3Cross(r0, n); - angular1 = -b3Cross(r1, n); - } - - -static __inline void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) -{ - - - for(int ic=0; ic<4; ic++) - { - // dont necessary because this makes change to 0 - if( cs.m_jacCoeffInv[ic] == 0.f ) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, linear, angular0, angular1 ); - - float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1, - linVelARO+dLinVelA, angVelARO+dAngVelA, linVelBRO+dLinVelB, angVelBRO+dAngVelB ) + cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max( updated, minRambdaDt[ic] ); - updated = b3Min( updated, maxRambdaDt[ic] ); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - - if (invMassA) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - } - if (invMassB) - { - dLinVelB += linImp1; - dAngVelB += angImp1; - } - } - } -} - - - -void solveContact3(b3GpuConstraint4* cs, - b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr; - b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr; - setLinearAndAngular( cs->m_linear, r0, r1, linear, angular0, angular1 ); - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max( updated, minRambdaDt ); - updated = b3Min( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; - - if (invMassA) - { - *dLinVelA += linImp0; - *dAngVelA += angImp0; - } - if (invMassB) - { - *dLinVelB += linImp1; - *dAngVelB += angImp1; - } - } -} - - -static inline void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) -{ - - b3Vector3 linVelA = linVelARO+dLinVelA; - b3Vector3 linVelB = linVelBRO+dLinVelB; - b3Vector3 angVelA = angVelARO+dAngVelA; - b3Vector3 angVelB = angVelBRO+dAngVelB; - - if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1 (n, tangent[0],tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0]-center; - tangent[0] = cross3( n, r ); - tangent[1] = cross3( tangent[0], n ); - tangent[0] = normalize3( tangent[0] ); - tangent[1] = normalize3( tangent[1] ); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, linear, angular0, angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max( updated, minRambdaDt[i] ); - updated = b3Min( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if (invMassA) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - } - if (invMassB) - { - dLinVelB += linImp1; - dAngVelB += angImp1; - } - } - - { // angular damping for point constraint - b3Vector3 ab = ( posB - posA ).normalized(); - b3Vector3 ac = ( center - posA ).normalized(); - if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot( n, angVelA ); - float angNB = b3Dot( n, angVelB ); - - if (invMassA) - dAngVelA -= (angNA*0.1f)*n; - if (invMassB) - dAngVelB -= (angNB*0.1f)*n; - } - } - -} - - - - -float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1, - float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - - float jmj1 = b3Dot(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = b3Dot(mtMul3(angular1,*invInertia1), angular1); - return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB); -// return -1.f/((jmj0+jmj1)+(jmj2+jmj3)); - -} - - -void setConstraint4( const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB, - b3GpuConstraint4* dstC ) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f/dt; - for(int ic=0; ic<4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear[3] = 0.7f ;//src->getFrictionCoeff() ); - for(int ic=0; ic<4; ic++) - { - b3Vector3 r0 = src->m_worldPosB[ic] - posA; - b3Vector3 r1 = src->m_worldPosB[ic] - posB; - - if( ic >= src->m_worldNormalOnB[3] )//npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - b3Vector3 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ,countA,countB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f;//src->getRestituitionCoeff(); - if( relVelN*relVelN < 0.004f ) - { - e = 0.f; - } - - dstC->m_b[ic] = e*relVelN; - //float penetration = src->m_worldPos[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift)*positionConstraintCoeff*dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if( src->m_worldNormalOnB[3] > 0 )//npoints - { // prepare friction - b3Vector3 center = make_float4(0.f); - for(int i=0; i<src->m_worldNormalOnB[3]; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB[3]; - - b3Vector3 tangent[2]; - b3PlaneSpace1(src->m_worldNormalOnB,tangent[0],tangent[1]); - - b3Vector3 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for(int i=0; i<2; i++) - { - b3Vector3 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ,countA,countB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for(int i=0; i<4; i++) - { - if( i<src->m_worldNormalOnB[3] ) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} - - - -void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyData* gBodies, b3InertiaData* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts, -float dt, -float positionDrift, -float positionConstraintCoeff, int gIdx, b3AlignedObjectArray<unsigned int>& bodyCount -) -{ - //int gIdx = 0;//GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Vector3 posA = gBodies[aIdx].m_pos; - b3Vector3 linVelA = gBodies[aIdx].m_linVel; - b3Vector3 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld;//.m_invInertia; - - b3Vector3 posB = gBodies[bIdx].m_pos; - b3Vector3 linVelB = gBodies[bIdx].m_linVel; - b3Vector3 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld;//m_invInertia; - - b3GpuConstraint4 cs; - float countA = invMassA ? (float)(bodyCount[aIdx]) : 1; - float countB = invMassB ? (float)(bodyCount[bIdx]) : 1; - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, - &cs ); - - - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -} - - -void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyData* bodies,b3InertiaData* inertias,int numBodies,b3Contact4* manifoldPtr, int numManifolds,const b3JacobiSolverInfo& solverInfo) -{ - B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); - - b3AlignedObjectArray<unsigned int> bodyCount; - bodyCount.resize(numBodies); - for (int i=0;i<numBodies;i++) - bodyCount[i] = 0; - - b3AlignedObjectArray<b3Int2> contactConstraintOffsets; - contactConstraintOffsets.resize(numManifolds); - - - for (int i=0;i<numManifolds;i++) - { - int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; - int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; - - bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex); - bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex); - - int bodyIndexA = manifoldPtr[i].getBodyA(); - int bodyIndexB = manifoldPtr[i].getBodyB(); - - if (!isFixedA) - { - contactConstraintOffsets[i].x = bodyCount[bodyIndexA]; - bodyCount[bodyIndexA]++; - } - if (!isFixedB) - { - contactConstraintOffsets[i].y = bodyCount[bodyIndexB]; - bodyCount[bodyIndexB]++; - } - } - - b3AlignedObjectArray<unsigned int> offsetSplitBodies; - offsetSplitBodies.resize(numBodies); - unsigned int totalNumSplitBodies; - m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodies,&totalNumSplitBodies); - int numlastBody = bodyCount[numBodies-1]; - totalNumSplitBodies += numlastBody; - printf("totalNumSplitBodies = %d\n",totalNumSplitBodies); - - - - - - b3AlignedObjectArray<b3GpuConstraint4> contactConstraints; - contactConstraints.resize(numManifolds); - - for (int i=0;i<numManifolds;i++) - { - ContactToConstraintKernel(&manifoldPtr[0],bodies,inertias,&contactConstraints[0],numManifolds, - solverInfo.m_deltaTime, - solverInfo.m_positionDrift, - solverInfo.m_positionConstraintCoeff, - i, bodyCount); - } - int maxIter = solverInfo.m_numIterations; - - - b3AlignedObjectArray<b3Vector3> deltaLinearVelocities; - b3AlignedObjectArray<b3Vector3> deltaAngularVelocities; - deltaLinearVelocities.resize(totalNumSplitBodies); - deltaAngularVelocities.resize(totalNumSplitBodies); - for (unsigned int i=0;i<totalNumSplitBodies;i++) - { - deltaLinearVelocities[i].setZero(); - deltaAngularVelocities[i].setZero(); - } - - - - for (int iter = 0;iter<maxIter;iter++) - { - int i=0; - for( i=0; i<numManifolds; i++) - { - - //float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodies[aIdx]; - b3RigidBodyData& bodyB = bodies[bIdx]; - - b3Vector3 zero = b3MakeVector3(0,0,0); - - b3Vector3* dlvAPtr=&zero; - b3Vector3* davAPtr=&zero; - b3Vector3* dlvBPtr=&zero; - b3Vector3* davBPtr=&zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - dlvBPtr =&deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr ); - - - } - - } - - - //easy - for (int i=0;i<numBodies;i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - } - } - } - for (int iter = 0;iter<maxIter;iter++) - { - //int i=0; - - //solve friction - - for(int i=0; i<numManifolds; i++) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=contactConstraints[i].m_appliedRambdaDt[j]; - } - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodies[aIdx]; - b3RigidBodyData& bodyB = bodies[bIdx]; - - b3Vector3 zero = b3MakeVector3(0,0,0); - - b3Vector3* dlvAPtr=&zero; - b3Vector3* davAPtr=&zero; - b3Vector3* dlvBPtr=&zero; - b3Vector3* davBPtr=&zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - dlvBPtr =&deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - solveFriction( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,inertias[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr); - - } - - //easy - for (int i=0;i<numBodies;i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - } - } - - - - } - - - //easy - for (int i=0;i<numBodies;i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - bodies[i].m_linVel += deltaLinearVelocities[bodyOffset]; - bodies[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } -} - - - -void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index) -// -// -//void b3GpuJacobiContactSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* bodies,b3OpenCLArray<b3InertiaData>* inertias,b3OpenCLArray<b3Contact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo) -{ - b3JacobiSolverInfo solverInfo; - solverInfo.m_fixedBodyIndex = static0Index; - - - B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); - - //int numBodies = bodies->size(); - int numManifolds = numContacts;//manifoldPtr->size(); - - { - B3_PROFILE("resize"); - m_data->m_bodyCount->resize(numBodies); - } - - unsigned int val=0; - b3Int2 val2; - val2.x=0; - val2.y=0; - - { - B3_PROFILE("m_filler"); - m_data->m_contactConstraintOffsets->resize(numManifolds); - m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); - - - m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); - } - - { - B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel,"m_countBodiesKernel"); - launcher.setBuffer(contactBuf);//manifoldPtr->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setConst(numManifolds); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.launch1D(numManifolds); - } - unsigned int totalNumSplitBodies=0; - { - B3_PROFILE("m_scan->execute"); - - m_data->m_offsetSplitBodies->resize(numBodies); - m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); - totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); - } - - { - B3_PROFILE("m_data->m_contactConstraints->resize"); - //int numContacts = manifoldPtr->size(); - m_data->m_contactConstraints->resize(numContacts); - } - - { - B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel,"m_contactToConstraintSplitKernel"); - launcher.setBuffer(contactBuf); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setConst(numContacts); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.launch1D( numContacts, 64 ); - - } - - - { - B3_PROFILE("m_data->m_deltaLinearVelocities->resize"); - m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies); - m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); - } - - - - { - B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel,"m_clearVelocitiesKernel"); - launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launch.setConst(totalNumSplitBodies); - launch.launch1D(totalNumSplitBodies); - clFinish(m_queue); - } - - - int maxIter = solverInfo.m_numIterations; - - for (int iter = 0;iter<maxIter;iter++) - { - { - B3_PROFILE("m_solveContactKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" ); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - - { - B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel"); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel,"m_averageVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - - - } - - - { - B3_PROFILE("update body velocities"); - b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel,"m_updateBodyVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - - -} - -#if 0 - -void b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray<b3RigidBodyData>* bodiesGPU,b3OpenCLArray<b3InertiaData>* inertiasGPU,b3OpenCLArray<b3Contact4>* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo) -{ - - b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; - bodiesGPU->copyToHost(bodiesCPU); - b3AlignedObjectArray<b3InertiaData> inertiasCPU; - inertiasGPU->copyToHost(inertiasCPU); - b3AlignedObjectArray<b3Contact4> manifoldPtrCPU; - manifoldPtrGPU->copyToHost(manifoldPtrCPU); - - int numBodiesCPU = bodiesGPU->size(); - int numManifoldsCPU = manifoldPtrGPU->size(); - B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed"); - - b3AlignedObjectArray<unsigned int> bodyCount; - bodyCount.resize(numBodiesCPU); - for (int i=0;i<numBodiesCPU;i++) - bodyCount[i] = 0; - - b3AlignedObjectArray<b3Int2> contactConstraintOffsets; - contactConstraintOffsets.resize(numManifoldsCPU); - - - for (int i=0;i<numManifoldsCPU;i++) - { - int pa = manifoldPtrCPU[i].m_bodyAPtrAndSignBit; - int pb = manifoldPtrCPU[i].m_bodyBPtrAndSignBit; - - bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex); - bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex); - - int bodyIndexA = manifoldPtrCPU[i].getBodyA(); - int bodyIndexB = manifoldPtrCPU[i].getBodyB(); - - if (!isFixedA) - { - contactConstraintOffsets[i].x = bodyCount[bodyIndexA]; - bodyCount[bodyIndexA]++; - } - if (!isFixedB) - { - contactConstraintOffsets[i].y = bodyCount[bodyIndexB]; - bodyCount[bodyIndexB]++; - } - } - - b3AlignedObjectArray<unsigned int> offsetSplitBodies; - offsetSplitBodies.resize(numBodiesCPU); - unsigned int totalNumSplitBodiesCPU; - m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU); - int numlastBody = bodyCount[numBodiesCPU-1]; - totalNumSplitBodiesCPU += numlastBody; - - int numBodies = bodiesGPU->size(); - int numManifolds = manifoldPtrGPU->size(); - - m_data->m_bodyCount->resize(numBodies); - - unsigned int val=0; - b3Int2 val2; - val2.x=0; - val2.y=0; - - { - B3_PROFILE("m_filler"); - m_data->m_contactConstraintOffsets->resize(numManifolds); - m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); - - - m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); - } - - { - B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); - launcher.setBuffer(manifoldPtrGPU->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setConst(numManifolds); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.launch1D(numManifolds); - } - - unsigned int totalNumSplitBodies=0; - m_data->m_offsetSplitBodies->resize(numBodies); - m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); - totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); - - if (totalNumSplitBodies != totalNumSplitBodiesCPU) - { - printf("error in totalNumSplitBodies!\n"); - } - - int numContacts = manifoldPtrGPU->size(); - m_data->m_contactConstraints->resize(numContacts); - - - { - B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); - launcher.setBuffer(manifoldPtrGPU->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setConst(numContacts); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.launch1D( numContacts, 64 ); - clFinish(m_queue); - } - - - - b3AlignedObjectArray<b3GpuConstraint4> contactConstraints; - contactConstraints.resize(numManifoldsCPU); - - for (int i=0;i<numManifoldsCPU;i++) - { - ContactToConstraintKernel(&manifoldPtrCPU[0],&bodiesCPU[0],&inertiasCPU[0],&contactConstraints[0],numManifoldsCPU, - solverInfo.m_deltaTime, - solverInfo.m_positionDrift, - solverInfo.m_positionConstraintCoeff, - i, bodyCount); - } - int maxIter = solverInfo.m_numIterations; - - - b3AlignedObjectArray<b3Vector3> deltaLinearVelocities; - b3AlignedObjectArray<b3Vector3> deltaAngularVelocities; - deltaLinearVelocities.resize(totalNumSplitBodiesCPU); - deltaAngularVelocities.resize(totalNumSplitBodiesCPU); - for (int i=0;i<totalNumSplitBodiesCPU;i++) - { - deltaLinearVelocities[i].setZero(); - deltaAngularVelocities[i].setZero(); - } - - m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies); - m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); - - - - { - B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); - launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launch.setConst(totalNumSplitBodies); - launch.launch1D(totalNumSplitBodies); - } - - - ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - m_data->m_contactConstraints->copyToHost(contactConstraints); - m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies); - m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets); - m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); - m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); - - for (int iter = 0;iter<maxIter;iter++) - { - - { - B3_PROFILE("m_solveContactKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel ); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - - int i=0; - for( i=0; i<numManifoldsCPU; i++) - { - - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodiesCPU[aIdx]; - b3RigidBodyData& bodyB = bodiesCPU[bIdx]; - - b3Vector3 zero(0,0,0); - - b3Vector3* dlvAPtr=&zero; - b3Vector3* davAPtr=&zero; - b3Vector3* dlvBPtr=&zero; - b3Vector3* davBPtr=&zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - dlvBPtr =&deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr ); - - - } - } - - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - //easy - for (int i=0;i<numBodiesCPU;i++) - { - if (bodiesCPU[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - } - } -// m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); - //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); - m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); - m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); - -#if 0 - - { - B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - //solve friction - - for(int i=0; i<numManifoldsCPU; i++) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=contactConstraints[i].m_appliedRambdaDt[j]; - } - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodiesCPU[aIdx]; - b3RigidBodyData& bodyB = bodiesCPU[bIdx]; - - b3Vector3 zero(0,0,0); - - b3Vector3* dlvAPtr=&zero; - b3Vector3* davAPtr=&zero; - b3Vector3* dlvBPtr=&zero; - b3Vector3* davBPtr=&zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - dlvBPtr =&deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - solveFriction( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,inertiasCPU[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr); - - } - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - //easy - for (int i=0;i<numBodiesCPU;i++) - { - if (bodiesCPU[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - } - } - -#endif - - } - - { - B3_PROFILE("update body velocities"); - b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - - //easy - for (int i=0;i<numBodiesCPU;i++) - { - if (bodiesCPU[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset]; - bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } - - -// bodiesGPU->copyFromHost(bodiesCPU); - - -} -#endif diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h deleted file mode 100644 index b418f29ec4..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h +++ /dev/null @@ -1,62 +0,0 @@ - -#ifndef B3_GPU_JACOBI_CONTACT_SOLVER_H -#define B3_GPU_JACOBI_CONTACT_SOLVER_H -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -//#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" - - -//struct b3InertiaData; -//b3InertiaData - -class b3TypedConstraint; - -struct b3JacobiSolverInfo -{ - int m_fixedBodyIndex; - - float m_deltaTime; - float m_positionDrift; - float m_positionConstraintCoeff; - int m_numIterations; - - b3JacobiSolverInfo() - :m_fixedBodyIndex(0), - m_deltaTime(1./60.f), - m_positionDrift( 0.005f ), - m_positionConstraintCoeff( 0.99f ), - m_numIterations(7) - { - } -}; -class b3GpuJacobiContactSolver -{ -protected: - - struct b3GpuJacobiSolverInternalData* m_data; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - -public: - - b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); - virtual ~b3GpuJacobiContactSolver(); - - - void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); - void solveGroupHost(b3RigidBodyData* bodies,b3InertiaData* inertias,int numBodies,struct b3Contact4* manifoldPtr, int numManifolds,const b3JacobiSolverInfo& solverInfo); - //void solveGroupHost(btRigidBodyCL* bodies,b3InertiaData* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); - - //b3Scalar solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies,b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies,b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - - //void solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo); - //void solveGroupMixed(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo); - -}; -#endif //B3_GPU_JACOBI_CONTACT_SOLVER_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp deleted file mode 100644 index 698fa15f96..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ /dev/null @@ -1,1107 +0,0 @@ -#include "b3GpuNarrowPhase.h" - - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include <string.h> -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" - -#include "b3GpuNarrowPhaseInternalData.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h" - - - - -b3GpuNarrowPhase::b3GpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const b3Config& config) -:m_data(0) ,m_planeBodyIndex(-1),m_static0Index(-1), -m_context(ctx), -m_device(device), -m_queue(queue) -{ - - m_data = new b3GpuNarrowPhaseInternalData(); - m_data->m_currentContactBuffer = 0; - - memset(m_data,0,sizeof(b3GpuNarrowPhaseInternalData)); - - - m_data->m_config = config; - - m_data->m_gpuSatCollision = new GpuSatCollision(ctx,device,queue); - - - m_data->m_triangleConvexPairs = new b3OpenCLArray<b3Int4>(m_context,m_queue, config.m_maxTriConvexPairCapacity); - - - //m_data->m_convexPairsOutGPU = new b3OpenCLArray<b3Int2>(ctx,queue,config.m_maxBroadphasePairs,false); - //m_data->m_planePairs = new b3OpenCLArray<b3Int2>(ctx,queue,config.m_maxBroadphasePairs,false); - - m_data->m_pBufContactOutCPU = new b3AlignedObjectArray<b3Contact4>(); - m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs); - m_data->m_bodyBufferCPU = new b3AlignedObjectArray<b3RigidBodyData>(); - m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies); - - m_data->m_inertiaBufferCPU = new b3AlignedObjectArray<b3InertiaData>(); - m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies); - - m_data->m_pBufContactBuffersGPU[0] = new b3OpenCLArray<b3Contact4>(ctx,queue, config.m_maxContactCapacity,true); - m_data->m_pBufContactBuffersGPU[1] = new b3OpenCLArray<b3Contact4>(ctx,queue, config.m_maxContactCapacity,true); - - m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaData>(ctx,queue,config.m_maxConvexBodies,false); - m_data->m_collidablesGPU = new b3OpenCLArray<b3Collidable>(ctx,queue,config.m_maxConvexShapes); - m_data->m_collidablesCPU.reserve(config.m_maxConvexShapes); - - m_data->m_localShapeAABBCPU = new b3AlignedObjectArray<b3SapAabb>; - m_data->m_localShapeAABBGPU = new b3OpenCLArray<b3SapAabb>(ctx,queue,config.m_maxConvexShapes); - - - //m_data->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate(ctx,queue, config.m_maxBroadphasePairs,false); - m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyData>(ctx,queue, config.m_maxConvexBodies,false); - - m_data->m_convexFacesGPU = new b3OpenCLArray<b3GpuFace>(ctx,queue,config.m_maxConvexShapes*config.m_maxFacesPerShape,false); - m_data->m_convexFaces.reserve(config.m_maxConvexShapes*config.m_maxFacesPerShape); - - m_data->m_gpuChildShapes = new b3OpenCLArray<b3GpuChildShape>(ctx,queue,config.m_maxCompoundChildShapes,false); - - m_data->m_convexPolyhedraGPU = new b3OpenCLArray<b3ConvexPolyhedronData>(ctx,queue,config.m_maxConvexShapes,false); - m_data->m_convexPolyhedra.reserve(config.m_maxConvexShapes); - - m_data->m_uniqueEdgesGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexUniqueEdges,true); - m_data->m_uniqueEdges.reserve(config.m_maxConvexUniqueEdges); - - - - m_data->m_convexVerticesGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexVertices,true); - m_data->m_convexVertices.reserve(config.m_maxConvexVertices); - - m_data->m_convexIndicesGPU = new b3OpenCLArray<int>(ctx,queue,config.m_maxConvexIndices,true); - m_data->m_convexIndices.reserve(config.m_maxConvexIndices); - - m_data->m_worldVertsB1GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace); - m_data->m_clippingFacesOutGPU = new b3OpenCLArray<b3Int4>(ctx,queue,config.m_maxConvexBodies); - m_data->m_worldNormalsAGPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies); - m_data->m_worldVertsA1GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace); - m_data->m_worldVertsB2GPU = new b3OpenCLArray<b3Vector3>(ctx,queue,config.m_maxConvexBodies*config.m_maxVerticesPerFace); - - - - m_data->m_convexData = new b3AlignedObjectArray<b3ConvexUtility* >(); - - m_data->m_convexData->resize(config.m_maxConvexShapes); - m_data->m_convexPolyhedra.resize(config.m_maxConvexShapes); - - m_data->m_numAcceleratedShapes = 0; - m_data->m_numAcceleratedRigidBodies = 0; - - - m_data->m_subTreesGPU = new b3OpenCLArray<b3BvhSubtreeInfo>(this->m_context,this->m_queue); - m_data->m_treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context,this->m_queue); - m_data->m_bvhInfoGPU = new b3OpenCLArray<b3BvhInfo>(this->m_context,this->m_queue); - - //m_data->m_contactCGPU = new b3OpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false); - //m_data->m_frictionCGPU = new b3OpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs); - - - -} - - -b3GpuNarrowPhase::~b3GpuNarrowPhase() -{ - delete m_data->m_gpuSatCollision; - - delete m_data->m_triangleConvexPairs; - //delete m_data->m_convexPairsOutGPU; - //delete m_data->m_planePairs; - delete m_data->m_pBufContactOutCPU; - delete m_data->m_bodyBufferCPU; - delete m_data->m_inertiaBufferCPU; - delete m_data->m_pBufContactBuffersGPU[0]; - delete m_data->m_pBufContactBuffersGPU[1]; - - - delete m_data->m_inertiaBufferGPU; - delete m_data->m_collidablesGPU; - delete m_data->m_localShapeAABBCPU; - delete m_data->m_localShapeAABBGPU; - delete m_data->m_bodyBufferGPU; - delete m_data->m_convexFacesGPU; - delete m_data->m_gpuChildShapes; - delete m_data->m_convexPolyhedraGPU; - delete m_data->m_uniqueEdgesGPU; - delete m_data->m_convexVerticesGPU; - delete m_data->m_convexIndicesGPU; - delete m_data->m_worldVertsB1GPU; - delete m_data->m_clippingFacesOutGPU; - delete m_data->m_worldNormalsAGPU; - delete m_data->m_worldVertsA1GPU; - delete m_data->m_worldVertsB2GPU; - - delete m_data->m_bvhInfoGPU; - - for (int i=0;i<m_data->m_bvhData.size();i++) - { - delete m_data->m_bvhData[i]; - } - for (int i=0;i<m_data->m_meshInterfaces.size();i++) - { - delete m_data->m_meshInterfaces[i]; - } - m_data->m_meshInterfaces.clear(); - m_data->m_bvhData.clear(); - delete m_data->m_treeNodesGPU; - delete m_data->m_subTreesGPU; - - - delete m_data->m_convexData; - delete m_data; -} - - -int b3GpuNarrowPhase::allocateCollidable() -{ - int curSize = m_data->m_collidablesCPU.size(); - if (curSize<m_data->m_config.m_maxConvexShapes) - { - m_data->m_collidablesCPU.expand(); - return curSize; - } - else - { - b3Error("allocateCollidable out-of-range %d\n",m_data->m_config.m_maxConvexShapes); - } - return -1; - -} - - - - - -int b3GpuNarrowPhase::registerSphereShape(float radius) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex<0) - return collidableIndex; - - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_SPHERE; - col.m_shapeIndex = 0; - col.m_radius = radius; - - if (col.m_shapeIndex>=0) - { - b3SapAabb aabb; - b3Vector3 myAabbMin=b3MakeVector3(-radius,-radius,-radius); - b3Vector3 myAabbMax=b3MakeVector3(radius,radius,radius); - - aabb.m_min[0] = myAabbMin[0];//s_convexHeightField->m_aabb.m_min.x; - aabb.m_min[1] = myAabbMin[1];//s_convexHeightField->m_aabb.m_min.y; - aabb.m_min[2] = myAabbMin[2];//s_convexHeightField->m_aabb.m_min.z; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0];//s_convexHeightField->m_aabb.m_max.x; - aabb.m_max[1] = myAabbMax[1];//s_convexHeightField->m_aabb.m_max.y; - aabb.m_max[2] = myAabbMax[2];//s_convexHeightField->m_aabb.m_max.z; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); -// m_data->m_localShapeAABBGPU->push_back(aabb); - clFinish(m_queue); - } - - return collidableIndex; -} - - -int b3GpuNarrowPhase::registerFace(const b3Vector3& faceNormal, float faceConstant) -{ - int faceOffset = m_data->m_convexFaces.size(); - b3GpuFace& face = m_data->m_convexFaces.expand(); - face.m_plane = b3MakeVector3(faceNormal.x,faceNormal.y,faceNormal.z,faceConstant); - return faceOffset; -} - -int b3GpuNarrowPhase::registerPlaneShape(const b3Vector3& planeNormal, float planeConstant) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex<0) - return collidableIndex; - - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_PLANE; - col.m_shapeIndex = registerFace(planeNormal,planeConstant); - col.m_radius = planeConstant; - - if (col.m_shapeIndex>=0) - { - b3SapAabb aabb; - aabb.m_min[0] = -1e30f; - aabb.m_min[1] = -1e30f; - aabb.m_min[2] = -1e30f; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = 1e30f; - aabb.m_max[1] = 1e30f; - aabb.m_max[2] = 1e30f; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); -// m_data->m_localShapeAABBGPU->push_back(aabb); - clFinish(m_queue); - } - - return collidableIndex; -} - - -int b3GpuNarrowPhase::registerConvexHullShapeInternal(b3ConvexUtility* convexPtr,b3Collidable& col) -{ - - m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1); - - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1); - convex.mC = convexPtr->mC; - convex.mE = convexPtr->mE; - convex.m_extents= convexPtr->m_extents; - convex.m_localCenter = convexPtr->m_localCenter; - convex.m_radius = convexPtr->m_radius; - - convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size(); - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - m_data->m_uniqueEdges.resize(edgeOffset+convex.m_numUniqueEdges); - - //convex data here - int i; - for ( i=0;i<convexPtr->m_uniqueEdges.size();i++) - { - m_data->m_uniqueEdges[edgeOffset+i] = convexPtr->m_uniqueEdges[i]; - } - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - convex.m_numFaces = convexPtr->m_faces.size(); - - m_data->m_convexFaces.resize(faceOffset+convex.m_numFaces); - - - for (i=0;i<convexPtr->m_faces.size();i++) - { - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0], - convexPtr->m_faces[i].m_plane[1], - convexPtr->m_faces[i].m_plane[2], - convexPtr->m_faces[i].m_plane[3]); - - - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = convexPtr->m_faces[i].m_indices.size(); - m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset+i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset+numIndices); - for (int p=0;p<numIndices;p++) - { - m_data->m_convexIndices[indexOffset+p] = convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = convexPtr->m_vertices.size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset =vertexOffset; - - m_data->m_convexVertices.resize(vertexOffset+convex.m_numVertices); - for (int i=0;i<convexPtr->m_vertices.size();i++) - { - m_data->m_convexVertices[vertexOffset+i] = convexPtr->m_vertices[i]; - } - - (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr; - - - - return m_data->m_numAcceleratedShapes++; -} - - -int b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling) -{ - b3AlignedObjectArray<b3Vector3> verts; - - unsigned char* vts = (unsigned char*) vertices; - for (int i=0;i<numVertices;i++) - { - float* vertex = (float*) &vts[i*strideInBytes]; - verts.push_back(b3MakeVector3(vertex[0]*scaling[0],vertex[1]*scaling[1],vertex[2]*scaling[2])); - } - - b3ConvexUtility* utilPtr = new b3ConvexUtility(); - bool merge = true; - if (numVertices) - { - utilPtr->initializePolyhedralFeatures(&verts[0],verts.size(),merge); - } - - int collidableIndex = registerConvexHullShape(utilPtr); - delete utilPtr; - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex<0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_CONVEX_HULL; - col.m_shapeIndex = -1; - - - { - b3Vector3 localCenter=b3MakeVector3(0,0,0); - for (int i=0;i<utilPtr->m_vertices.size();i++) - localCenter+=utilPtr->m_vertices[i]; - localCenter*= (1.f/utilPtr->m_vertices.size()); - utilPtr->m_localCenter = localCenter; - - col.m_shapeIndex = registerConvexHullShapeInternal(utilPtr,col); - } - - if (col.m_shapeIndex>=0) - { - b3SapAabb aabb; - - b3Vector3 myAabbMin=b3MakeVector3(1e30f,1e30f,1e30f); - b3Vector3 myAabbMax=b3MakeVector3(-1e30f,-1e30f,-1e30f); - - for (int i=0;i<utilPtr->m_vertices.size();i++) - { - myAabbMin.setMin(utilPtr->m_vertices[i]); - myAabbMax.setMax(utilPtr->m_vertices[i]); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1] = myAabbMax[1]; - aabb.m_max[2] = myAabbMax[2]; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); -// m_data->m_localShapeAABBGPU->push_back(aabb); - } - - return collidableIndex; - -} - -int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes) -{ - - int collidableIndex = allocateCollidable(); - if (collidableIndex<0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_COMPOUND_OF_CONVEX_HULLS; - col.m_shapeIndex = m_data->m_cpuChildShapes.size(); - col.m_compoundBvhIndex = m_data->m_bvhInfoCPU.size(); - - { - b3Assert(col.m_shapeIndex+childShapes->size()<m_data->m_config.m_maxCompoundChildShapes); - for (int i=0;i<childShapes->size();i++) - { - m_data->m_cpuChildShapes.push_back(childShapes->at(i)); - } - } - - - - col.m_numChildShapes = childShapes->size(); - - - b3SapAabb aabbLocalSpace; - b3Vector3 myAabbMin=b3MakeVector3(1e30f,1e30f,1e30f); - b3Vector3 myAabbMax=b3MakeVector3(-1e30f,-1e30f,-1e30f); - - b3AlignedObjectArray<b3Aabb> childLocalAabbs; - childLocalAabbs.resize(childShapes->size()); - - //compute local AABB of the compound of all children - for (int i=0;i<childShapes->size();i++) - { - int childColIndex = childShapes->at(i).m_shapeIndex; - //b3Collidable& childCol = getCollidableCpu(childColIndex); - b3SapAabb aabbLoc =m_data->m_localShapeAABBCPU->at(childColIndex); - - b3Vector3 childLocalAabbMin=b3MakeVector3(aabbLoc.m_min[0],aabbLoc.m_min[1],aabbLoc.m_min[2]); - b3Vector3 childLocalAabbMax=b3MakeVector3(aabbLoc.m_max[0],aabbLoc.m_max[1],aabbLoc.m_max[2]); - b3Vector3 aMin,aMax; - b3Scalar margin(0.f); - b3Transform childTr; - childTr.setIdentity(); - - childTr.setOrigin(childShapes->at(i).m_childPosition); - childTr.setRotation(b3Quaternion(childShapes->at(i).m_childOrientation)); - b3TransformAabb(childLocalAabbMin,childLocalAabbMax,margin,childTr,aMin,aMax); - myAabbMin.setMin(aMin); - myAabbMax.setMax(aMax); - childLocalAabbs[i].m_min[0] = aMin[0]; - childLocalAabbs[i].m_min[1] = aMin[1]; - childLocalAabbs[i].m_min[2] = aMin[2]; - childLocalAabbs[i].m_min[3] = 0; - childLocalAabbs[i].m_max[0] = aMax[0]; - childLocalAabbs[i].m_max[1] = aMax[1]; - childLocalAabbs[i].m_max[2] = aMax[2]; - childLocalAabbs[i].m_max[3] = 0; - } - - aabbLocalSpace.m_min[0] = myAabbMin[0];//s_convexHeightField->m_aabb.m_min.x; - aabbLocalSpace.m_min[1]= myAabbMin[1];//s_convexHeightField->m_aabb.m_min.y; - aabbLocalSpace.m_min[2]= myAabbMin[2];//s_convexHeightField->m_aabb.m_min.z; - aabbLocalSpace.m_minIndices[3] = 0; - - aabbLocalSpace.m_max[0] = myAabbMax[0];//s_convexHeightField->m_aabb.m_max.x; - aabbLocalSpace.m_max[1]= myAabbMax[1];//s_convexHeightField->m_aabb.m_max.y; - aabbLocalSpace.m_max[2]= myAabbMax[2];//s_convexHeightField->m_aabb.m_max.z; - aabbLocalSpace.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabbLocalSpace); - - - b3QuantizedBvh* bvh = new b3QuantizedBvh; - bvh->setQuantizationValues(myAabbMin,myAabbMax); - QuantizedNodeArray& nodes = bvh->getLeafNodeArray(); - int numNodes = childShapes->size(); - - for (int i=0;i<numNodes;i++) - { - b3QuantizedBvhNode node; - b3Vector3 aabbMin,aabbMax; - aabbMin = (b3Vector3&) childLocalAabbs[i].m_min; - aabbMax = (b3Vector3&) childLocalAabbs[i].m_max; - - bvh->quantize(&node.m_quantizedAabbMin[0],aabbMin,0); - bvh->quantize(&node.m_quantizedAabbMax[0],aabbMax,1); - int partId = 0; - node.m_escapeIndexOrTriangleIndex = (partId<<(31-MAX_NUM_PARTS_IN_BITS)) | i; - nodes.push_back(node); - } - bvh->buildInternal(); - - int numSubTrees = bvh->getSubtreeInfoArray().size(); - - //void setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0)); - //QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } - ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized - //void buildInternal(); - - b3BvhInfo bvhInfo; - - bvhInfo.m_aabbMin = bvh->m_bvhAabbMin; - bvhInfo.m_aabbMax = bvh->m_bvhAabbMax; - bvhInfo.m_quantization = bvh->m_bvhQuantization; - bvhInfo.m_numNodes = numNodes; - bvhInfo.m_numSubTrees = numSubTrees; - bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size(); - bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size(); - - int numNewNodes = bvh->getQuantizedNodeArray().size(); - - for (int i=0;i<numNewNodes-1;i++) - { - - if (bvh->getQuantizedNodeArray()[i].isLeafNode()) - { - int orgIndex = bvh->getQuantizedNodeArray()[i].getTriangleIndex(); - - b3Vector3 nodeMinVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMin); - b3Vector3 nodeMaxVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMax); - - for (int c=0;c<3;c++) - { - if (childLocalAabbs[orgIndex].m_min[c] < nodeMinVec[c]) - { - printf("min org (%f) and new (%f) ? at i:%d,c:%d\n",childLocalAabbs[i].m_min[c],nodeMinVec[c],i,c); - } - if (childLocalAabbs[orgIndex].m_max[c] > nodeMaxVec[c]) - { - printf("max org (%f) and new (%f) ? at i:%d,c:%d\n",childLocalAabbs[i].m_max[c],nodeMaxVec[c],i,c); - } - - } - } - - } - - m_data->m_bvhInfoCPU.push_back(bvhInfo); - - int numNewSubtrees = bvh->getSubtreeInfoArray().size(); - m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size()+numNewSubtrees); - for (int i=0;i<numNewSubtrees;i++) - { - m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]); - } - int numNewTreeNodes = bvh->getQuantizedNodeArray().size(); - - for (int i=0;i<numNewTreeNodes;i++) - { - m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]); - } - -// m_data->m_localShapeAABBGPU->push_back(aabbWS); - clFinish(m_queue); - return collidableIndex; - -} - - -int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices,const float* scaling1) -{ - - - b3Vector3 scaling=b3MakeVector3(scaling1[0],scaling1[1],scaling1[2]); - - int collidableIndex = allocateCollidable(); - if (collidableIndex<0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - - col.m_shapeType = SHAPE_CONCAVE_TRIMESH; - col.m_shapeIndex = registerConcaveMeshShape(vertices,indices,col,scaling); - col.m_bvhIndex = m_data->m_bvhInfoCPU.size(); - - - b3SapAabb aabb; - b3Vector3 myAabbMin=b3MakeVector3(1e30f,1e30f,1e30f); - b3Vector3 myAabbMax=b3MakeVector3(-1e30f,-1e30f,-1e30f); - - for (int i=0;i<vertices->size();i++) - { - b3Vector3 vtx(vertices->at(i)*scaling); - myAabbMin.setMin(vtx); - myAabbMax.setMax(vtx); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1]= myAabbMax[1]; - aabb.m_max[2]= myAabbMax[2]; - aabb.m_signedMaxIndices[3]= 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); -// m_data->m_localShapeAABBGPU->push_back(aabb); - - b3OptimizedBvh* bvh = new b3OptimizedBvh(); - //void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) - - bool useQuantizedAabbCompression = true; - b3TriangleIndexVertexArray* meshInterface=new b3TriangleIndexVertexArray(); - m_data->m_meshInterfaces.push_back(meshInterface); - b3IndexedMesh mesh; - mesh.m_numTriangles = indices->size()/3; - mesh.m_numVertices = vertices->size(); - mesh.m_vertexBase = (const unsigned char *)&vertices->at(0).x; - mesh.m_vertexStride = sizeof(b3Vector3); - mesh.m_triangleIndexStride = 3 * sizeof(int);// or sizeof(int) - mesh.m_triangleIndexBase = (const unsigned char *)&indices->at(0); - - meshInterface->addIndexedMesh(mesh); - bvh->build(meshInterface, useQuantizedAabbCompression, (b3Vector3&)aabb.m_min, (b3Vector3&)aabb.m_max); - m_data->m_bvhData.push_back(bvh); - int numNodes = bvh->getQuantizedNodeArray().size(); - //b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context,this->m_queue,numNodes); - int numSubTrees = bvh->getSubtreeInfoArray().size(); - - b3BvhInfo bvhInfo; - - bvhInfo.m_aabbMin = bvh->m_bvhAabbMin; - bvhInfo.m_aabbMax = bvh->m_bvhAabbMax; - bvhInfo.m_quantization = bvh->m_bvhQuantization; - bvhInfo.m_numNodes = numNodes; - bvhInfo.m_numSubTrees = numSubTrees; - bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size(); - bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size(); - - m_data->m_bvhInfoCPU.push_back(bvhInfo); - - - int numNewSubtrees = bvh->getSubtreeInfoArray().size(); - m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size()+numNewSubtrees); - for (int i=0;i<numNewSubtrees;i++) - { - m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]); - } - int numNewTreeNodes = bvh->getQuantizedNodeArray().size(); - - for (int i=0;i<numNewTreeNodes;i++) - { - m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]); - } - - - - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices,b3Collidable& col, const float* scaling1) -{ - - - b3Vector3 scaling=b3MakeVector3(scaling1[0],scaling1[1],scaling1[2]); - - m_data->m_convexData->resize(m_data->m_numAcceleratedShapes+1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes+1); - - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size()-1); - convex.mC = b3MakeVector3(0,0,0); - convex.mE = b3MakeVector3(0,0,0); - convex.m_extents= b3MakeVector3(0,0,0); - convex.m_localCenter = b3MakeVector3(0,0,0); - convex.m_radius = 0.f; - - convex.m_numUniqueEdges = 0; - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - - convex.m_numFaces = indices->size()/3; - m_data->m_convexFaces.resize(faceOffset+convex.m_numFaces); - m_data->m_convexIndices.reserve(convex.m_numFaces*3); - for (int i=0;i<convex.m_numFaces;i++) - { - if (i%256==0) - { - //printf("i=%d out of %d", i,convex.m_numFaces); - } - b3Vector3 vert0(vertices->at(indices->at(i*3))*scaling); - b3Vector3 vert1(vertices->at(indices->at(i*3+1))*scaling); - b3Vector3 vert2(vertices->at(indices->at(i*3+2))*scaling); - - b3Vector3 normal = ((vert1-vert0).cross(vert2-vert0)).normalize(); - b3Scalar c = -(normal.dot(vert0)); - - m_data->m_convexFaces[convex.m_faceOffset+i].m_plane = b3MakeVector4(normal.x,normal.y,normal.z,c); - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = 3; - m_data->m_convexFaces[convex.m_faceOffset+i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset+i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset+numIndices); - for (int p=0;p<numIndices;p++) - { - int vi = indices->at(i*3+p); - m_data->m_convexIndices[indexOffset+p] = vi;//convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = vertices->size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset =vertexOffset; - m_data->m_convexVertices.resize(vertexOffset+convex.m_numVertices); - for (int i=0;i<vertices->size();i++) - { - m_data->m_convexVertices[vertexOffset+i] = vertices->at(i)*scaling; - } - - (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = 0; - - - return m_data->m_numAcceleratedShapes++; -} - - - -cl_mem b3GpuNarrowPhase::getBodiesGpu() -{ - return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL(); -} - -const struct b3RigidBodyData* b3GpuNarrowPhase::getBodiesCpu() const -{ - return &m_data->m_bodyBufferCPU->at(0); -}; - - - - -int b3GpuNarrowPhase::getNumBodiesGpu() const -{ - return m_data->m_bodyBufferGPU->size(); -} - -cl_mem b3GpuNarrowPhase::getBodyInertiasGpu() -{ - return (cl_mem)m_data->m_inertiaBufferGPU->getBufferCL(); -} - -int b3GpuNarrowPhase::getNumBodyInertiasGpu() const -{ - return m_data->m_inertiaBufferGPU->size(); -} - - -b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -const b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) const -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -cl_mem b3GpuNarrowPhase::getCollidablesGpu() -{ - return m_data->m_collidablesGPU->getBufferCL(); -} - -const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const -{ - if (m_data->m_collidablesCPU.size()) - return &m_data->m_collidablesCPU[0]; - return 0; -} - -const struct b3SapAabb* b3GpuNarrowPhase::getLocalSpaceAabbsCpu() const -{ - if (m_data->m_localShapeAABBCPU->size()) - { - return &m_data->m_localShapeAABBCPU->at(0); - } - return 0; -} - - -cl_mem b3GpuNarrowPhase::getAabbLocalSpaceBufferGpu() -{ - return m_data->m_localShapeAABBGPU->getBufferCL(); -} -int b3GpuNarrowPhase::getNumCollidablesGpu() const -{ - return m_data->m_collidablesGPU->size(); -} - - - - - -int b3GpuNarrowPhase::getNumContactsGpu() const -{ - return m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->size(); -} -cl_mem b3GpuNarrowPhase::getContactsGpu() -{ - return m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->getBufferCL(); -} - -const b3Contact4* b3GpuNarrowPhase::getContactsCPU() const -{ - m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->copyToHost(*m_data->m_pBufContactOutCPU); - return &m_data->m_pBufContactOutCPU->at(0); -} - -void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects) -{ - - cl_mem aabbsLocalSpace = m_data->m_localShapeAABBGPU->getBufferCL(); - - int nContactOut = 0; - - //swap buffer - m_data->m_currentContactBuffer=1-m_data->m_currentContactBuffer; - - //int curSize = m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->size(); - - int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity; - int numTriConvexPairsOut=0; - - b3OpenCLArray<b3Int4> broadphasePairsGPU(m_context,m_queue); - broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs,numBroadphasePairs); - - - - - b3OpenCLArray<b3Aabb> clAabbArrayWorldSpace(this->m_context,this->m_queue); - clAabbArrayWorldSpace.setFromOpenCLBuffer(aabbsWorldSpace,numObjects); - - b3OpenCLArray<b3Aabb> clAabbArrayLocalSpace(this->m_context,this->m_queue); - clAabbArrayLocalSpace.setFromOpenCLBuffer(aabbsLocalSpace,numObjects); - - m_data->m_gpuSatCollision->computeConvexConvexContactsGPUSAT( - &broadphasePairsGPU, numBroadphasePairs, - m_data->m_bodyBufferGPU, - m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer], - nContactOut, - m_data->m_pBufContactBuffersGPU[1-m_data->m_currentContactBuffer], - m_data->m_config.m_maxContactCapacity, - m_data->m_config.m_compoundPairCapacity, - *m_data->m_convexPolyhedraGPU, - *m_data->m_convexVerticesGPU, - *m_data->m_uniqueEdgesGPU, - *m_data->m_convexFacesGPU, - *m_data->m_convexIndicesGPU, - *m_data->m_collidablesGPU, - *m_data->m_gpuChildShapes, - clAabbArrayWorldSpace, - clAabbArrayLocalSpace, - *m_data->m_worldVertsB1GPU, - *m_data->m_clippingFacesOutGPU, - *m_data->m_worldNormalsAGPU, - *m_data->m_worldVertsA1GPU, - *m_data->m_worldVertsB2GPU, - m_data->m_bvhData, - m_data->m_treeNodesGPU, - m_data->m_subTreesGPU, - m_data->m_bvhInfoGPU, - numObjects, - maxTriConvexPairCapacity, - *m_data->m_triangleConvexPairs, - numTriConvexPairsOut - ); - - /*b3AlignedObjectArray<b3Int4> broadphasePairsCPU; - broadphasePairsGPU.copyToHost(broadphasePairsCPU); - printf("checking pairs\n"); - */ -} - -const b3SapAabb& b3GpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const -{ - return m_data->m_localShapeAABBCPU->at(collidableIndex); -} - - - - - -int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation , const float* aabbMinPtr, const float* aabbMaxPtr,bool writeToGpu) -{ - b3Vector3 aabbMin=b3MakeVector3(aabbMinPtr[0],aabbMinPtr[1],aabbMinPtr[2]); - b3Vector3 aabbMax=b3MakeVector3(aabbMaxPtr[0],aabbMaxPtr[1],aabbMaxPtr[2]); - - - if (m_data->m_numAcceleratedRigidBodies >= (m_data->m_config.m_maxConvexBodies)) - { - b3Error("registerRigidBody: exceeding the number of rigid bodies, %d > %d \n",m_data->m_numAcceleratedRigidBodies,m_data->m_config.m_maxConvexBodies); - return -1; - } - - m_data->m_bodyBufferCPU->resize(m_data->m_numAcceleratedRigidBodies+1); - - b3RigidBodyData& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies); - - float friction = 1.f; - float restitution = 0.f; - - body.m_frictionCoeff = friction; - body.m_restituitionCoeff = restitution; - body.m_angVel = b3MakeVector3(0,0,0); - body.m_linVel=b3MakeVector3(0,0,0);//.setZero(); - body.m_pos =b3MakeVector3(position[0],position[1],position[2]); - body.m_quat.setValue(orientation[0],orientation[1],orientation[2],orientation[3]); - body.m_collidableIdx = collidableIndex; - if (collidableIndex>=0) - { -// body.m_shapeType = m_data->m_collidablesCPU.at(collidableIndex).m_shapeType; - } else - { - // body.m_shapeType = CollisionShape::SHAPE_PLANE; - m_planeBodyIndex = m_data->m_numAcceleratedRigidBodies; - } - //body.m_shapeType = shapeType; - - - body.m_invMass = mass? 1.f/mass : 0.f; - - if (writeToGpu) - { - m_data->m_bodyBufferGPU->copyFromHostPointer(&body,1,m_data->m_numAcceleratedRigidBodies); - } - - b3InertiaData& shapeInfo = m_data->m_inertiaBufferCPU->at(m_data->m_numAcceleratedRigidBodies); - - if (mass==0.f) - { - if (m_data->m_numAcceleratedRigidBodies==0) - m_static0Index = 0; - - shapeInfo.m_initInvInertia.setValue(0,0,0,0,0,0,0,0,0); - shapeInfo.m_invInertiaWorld.setValue(0,0,0,0,0,0,0,0,0); - } else - { - - b3Assert(body.m_collidableIdx>=0); - - //approximate using the aabb of the shape - - //Aabb aabb = (*m_data->m_shapePointers)[shapeIndex]->m_aabb; - b3Vector3 halfExtents = (aabbMax-aabbMin);//*0.5f;//fake larger inertia makes demos more stable ;-) - - b3Vector3 localInertia; - - float lx=2.f*halfExtents[0]; - float ly=2.f*halfExtents[1]; - float lz=2.f*halfExtents[2]; - - localInertia.setValue( (mass/12.0f) * (ly*ly + lz*lz), - (mass/12.0f) * (lx*lx + lz*lz), - (mass/12.0f) * (lx*lx + ly*ly)); - - b3Vector3 invLocalInertia; - invLocalInertia[0] = 1.f/localInertia[0]; - invLocalInertia[1] = 1.f/localInertia[1]; - invLocalInertia[2] = 1.f/localInertia[2]; - invLocalInertia[3] = 0.f; - - shapeInfo.m_initInvInertia.setValue( - invLocalInertia[0], 0, 0, - 0, invLocalInertia[1], 0, - 0, 0, invLocalInertia[2]); - - b3Matrix3x3 m (body.m_quat); - - shapeInfo.m_invInertiaWorld = m.scaled(invLocalInertia) * m.transpose(); - - } - - if (writeToGpu) - m_data->m_inertiaBufferGPU->copyFromHostPointer(&shapeInfo,1,m_data->m_numAcceleratedRigidBodies); - - - - return m_data->m_numAcceleratedRigidBodies++; -} - -int b3GpuNarrowPhase::getNumRigidBodies() const -{ - return m_data->m_numAcceleratedRigidBodies; -} - -void b3GpuNarrowPhase::writeAllBodiesToGpu() -{ - - if (m_data->m_localShapeAABBCPU->size()) - { - m_data->m_localShapeAABBGPU->copyFromHost(*m_data->m_localShapeAABBCPU); - } - - - m_data->m_gpuChildShapes->copyFromHost(m_data->m_cpuChildShapes); - m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces); - m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra); - m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges); - m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices); - m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices); - m_data->m_bvhInfoGPU->copyFromHost(m_data->m_bvhInfoCPU); - m_data->m_treeNodesGPU->copyFromHost(m_data->m_treeNodesCPU); - m_data->m_subTreesGPU->copyFromHost(m_data->m_subTreesCPU); - - - m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); - m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); - - if (m_data->m_numAcceleratedRigidBodies) - { - m_data->m_bodyBufferGPU->copyFromHostPointer(&m_data->m_bodyBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies); - m_data->m_inertiaBufferGPU->copyFromHostPointer(&m_data->m_inertiaBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies); - } - if (m_data->m_collidablesCPU.size()) - { - m_data->m_collidablesGPU->copyFromHost(m_data->m_collidablesCPU); - } - - -} - - -void b3GpuNarrowPhase::reset() -{ - m_data->m_numAcceleratedShapes = 0; - m_data->m_numAcceleratedRigidBodies = 0; - this->m_static0Index = -1; - m_data->m_uniqueEdges.resize(0); - m_data->m_convexVertices.resize(0); - m_data->m_convexPolyhedra.resize(0); - m_data->m_convexIndices.resize(0); - m_data->m_cpuChildShapes.resize(0); - m_data->m_convexFaces.resize(0); - m_data->m_collidablesCPU.resize(0); - m_data->m_localShapeAABBCPU->resize(0); - m_data->m_bvhData.resize(0); - m_data->m_treeNodesCPU.resize(0); - m_data->m_subTreesCPU.resize(0); - m_data->m_bvhInfoCPU.resize(0); - -} - - -void b3GpuNarrowPhase::readbackAllBodiesToCpu() -{ - m_data->m_bodyBufferGPU->copyToHostPointer(&m_data->m_bodyBufferCPU->at(0),m_data->m_numAcceleratedRigidBodies); -} - -void b3GpuNarrowPhase::setObjectTransformCpu(float* position, float* orientation , int bodyIndex) -{ - if (bodyIndex>=0 && bodyIndex<m_data->m_bodyBufferCPU->size()) - { - m_data->m_bodyBufferCPU->at(bodyIndex).m_pos=b3MakeVector3(position[0],position[1],position[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.setValue(orientation[0],orientation[1],orientation[2],orientation[3]); - } - else - { - b3Warning("setObjectVelocityCpu out of range.\n"); - } -} -void b3GpuNarrowPhase::setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex) -{ - if (bodyIndex>=0 && bodyIndex<m_data->m_bodyBufferCPU->size()) - { - m_data->m_bodyBufferCPU->at(bodyIndex).m_linVel=b3MakeVector3(linVel[0],linVel[1],linVel[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_angVel=b3MakeVector3(angVel[0],angVel[1],angVel[2]); - } else - { - b3Warning("setObjectVelocityCpu out of range.\n"); - } -} - -bool b3GpuNarrowPhase::getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const -{ - if (bodyIndex>=0 && bodyIndex<m_data->m_bodyBufferCPU->size()) - { - position[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.x; - position[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.y; - position[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.z; - position[3] = 1.f;//or 1 - - orientation[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.x; - orientation[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.y; - orientation[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.z; - orientation[3] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.w; - return true; - } - - b3Warning("getObjectTransformFromCpu out of range.\n"); - return false; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h deleted file mode 100644 index 05ff3fd09e..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h +++ /dev/null @@ -1,109 +0,0 @@ -#ifndef B3_GPU_NARROWPHASE_H -#define B3_GPU_NARROWPHASE_H - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -class b3GpuNarrowPhase -{ -protected: - - struct b3GpuNarrowPhaseInternalData* m_data; - int m_acceleratedCompanionShapeIndex; - int m_planeBodyIndex; - int m_static0Index; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr, b3Collidable& col); - int registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling); - -public: - - - - - b3GpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct b3Config& config); - - virtual ~b3GpuNarrowPhase(void); - - int registerSphereShape(float radius); - int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes); - int registerFace(const b3Vector3& faceNormal, float faceConstant); - - int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices,const float* scaling); - - //do they need to be merged? - - int registerConvexHullShape(b3ConvexUtility* utilPtr); - int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - - int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax,bool writeToGpu); - void setObjectTransform(const float* position, const float* orientation , int bodyIndex); - - void writeAllBodiesToGpu(); - void reset(); - void readbackAllBodiesToCpu(); - bool getObjectTransformFromCpu(float* position, float* orientation , int bodyIndex) const; - - void setObjectTransformCpu(float* position, float* orientation , int bodyIndex); - void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex); - - - virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects); - - - cl_mem getBodiesGpu(); - const struct b3RigidBodyData* getBodiesCpu() const; - //struct b3RigidBodyData* getBodiesCpu(); - - int getNumBodiesGpu() const; - - cl_mem getBodyInertiasGpu(); - int getNumBodyInertiasGpu() const; - - cl_mem getCollidablesGpu(); - const struct b3Collidable* getCollidablesCpu() const; - int getNumCollidablesGpu() const; - - const struct b3SapAabb* getLocalSpaceAabbsCpu() const; - - const struct b3Contact4* getContactsCPU() const; - - cl_mem getContactsGpu(); - int getNumContactsGpu() const; - - cl_mem getAabbLocalSpaceBufferGpu(); - - int getNumRigidBodies() const; - - int allocateCollidable(); - - int getStatic0Index() const - { - return m_static0Index; - } - b3Collidable& getCollidableCpu(int collidableIndex); - const b3Collidable& getCollidableCpu(int collidableIndex) const; - - const b3GpuNarrowPhaseInternalData* getInternalData() const - { - return m_data; - } - - b3GpuNarrowPhaseInternalData* getInternalData() - { - return m_data; - } - - const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const; -}; - -#endif //B3_GPU_NARROWPHASE_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h deleted file mode 100644 index 8a7f1ea859..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h +++ /dev/null @@ -1,95 +0,0 @@ - -#ifndef B3_GPU_NARROWPHASE_INTERNAL_DATA_H -#define B3_GPU_NARROWPHASE_INTERNAL_DATA_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" - -#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Common/shared/b3Int2.h" - - -class b3ConvexUtility; - -struct b3GpuNarrowPhaseInternalData -{ - b3AlignedObjectArray<b3ConvexUtility*>* m_convexData; - - b3AlignedObjectArray<b3ConvexPolyhedronData> m_convexPolyhedra; - b3AlignedObjectArray<b3Vector3> m_uniqueEdges; - b3AlignedObjectArray<b3Vector3> m_convexVertices; - b3AlignedObjectArray<int> m_convexIndices; - - b3OpenCLArray<b3ConvexPolyhedronData>* m_convexPolyhedraGPU; - b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU; - b3OpenCLArray<b3Vector3>* m_convexVerticesGPU; - b3OpenCLArray<int>* m_convexIndicesGPU; - - b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU; - b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU; - b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU; - b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU; - b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU; - - b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes; - b3OpenCLArray<b3GpuChildShape>* m_gpuChildShapes; - - b3AlignedObjectArray<b3GpuFace> m_convexFaces; - b3OpenCLArray<b3GpuFace>* m_convexFacesGPU; - - struct GpuSatCollision* m_gpuSatCollision; - - - b3OpenCLArray<b3Int4>* m_triangleConvexPairs; - - - b3OpenCLArray<b3Contact4>* m_pBufContactBuffersGPU[2]; - int m_currentContactBuffer; - b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU; - - - b3AlignedObjectArray<b3RigidBodyData>* m_bodyBufferCPU; - b3OpenCLArray<b3RigidBodyData>* m_bodyBufferGPU; - - b3AlignedObjectArray<b3InertiaData>* m_inertiaBufferCPU; - b3OpenCLArray<b3InertiaData>* m_inertiaBufferGPU; - - int m_numAcceleratedShapes; - int m_numAcceleratedRigidBodies; - - b3AlignedObjectArray<b3Collidable> m_collidablesCPU; - b3OpenCLArray<b3Collidable>* m_collidablesGPU; - - b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU; - b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU; - - b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData; - b3AlignedObjectArray<class b3TriangleIndexVertexArray*> m_meshInterfaces; - - b3AlignedObjectArray<b3QuantizedBvhNode> m_treeNodesCPU; - b3AlignedObjectArray<b3BvhSubtreeInfo> m_subTreesCPU; - - b3AlignedObjectArray<b3BvhInfo> m_bvhInfoCPU; - b3OpenCLArray<b3BvhInfo>* m_bvhInfoGPU; - - b3OpenCLArray<b3QuantizedBvhNode>* m_treeNodesGPU; - b3OpenCLArray<b3BvhSubtreeInfo>* m_subTreesGPU; - - - b3Config m_config; - -}; - -#endif //B3_GPU_NARROWPHASE_INTERNAL_DATA_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp deleted file mode 100644 index 0d3d50c548..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp +++ /dev/null @@ -1,1158 +0,0 @@ - -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - - -bool useGpuInitSolverBodies = true; -bool useGpuInfo1 = true; -bool useGpuInfo2= true; -bool useGpuSolveJointConstraintRows=true; -bool useGpuWriteBackVelocities = true; -bool gpuBreakConstraints = true; - -#include "b3GpuPgsConstraintSolver.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include <new> -#include "Bullet3Common/b3AlignedObjectArray.h" -#include <string.h> //for memset -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" - -#include "Bullet3OpenCL/RigidBody/kernels/jointSolver.h" //solveConstraintRowsCL -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" - -#define B3_JOINT_SOLVER_PATH "src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl" - - -struct b3GpuPgsJacobiSolverInternalData -{ - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3PrefixScanCL* m_prefixScan; - - cl_kernel m_solveJointConstraintRowsKernels; - cl_kernel m_initSolverBodiesKernel; - cl_kernel m_getInfo1Kernel; - cl_kernel m_initBatchConstraintsKernel; - cl_kernel m_getInfo2Kernel; - cl_kernel m_writeBackVelocitiesKernel; - cl_kernel m_breakViolatedConstraintsKernel; - - b3OpenCLArray<unsigned int>* m_gpuConstraintRowOffsets; - - b3OpenCLArray<b3GpuSolverBody>* m_gpuSolverBodies; - b3OpenCLArray<b3BatchConstraint>* m_gpuBatchConstraints; - b3OpenCLArray<b3GpuSolverConstraint>* m_gpuConstraintRows; - b3OpenCLArray<unsigned int>* m_gpuConstraintInfo1; - -// b3AlignedObjectArray<b3GpuSolverBody> m_cpuSolverBodies; - b3AlignedObjectArray<b3BatchConstraint> m_cpuBatchConstraints; - b3AlignedObjectArray<b3GpuSolverConstraint> m_cpuConstraintRows; - b3AlignedObjectArray<unsigned int> m_cpuConstraintInfo1; - b3AlignedObjectArray<unsigned int> m_cpuConstraintRowOffsets; - - b3AlignedObjectArray<b3RigidBodyData> m_cpuBodies; - b3AlignedObjectArray<b3InertiaData> m_cpuInertias; - - - b3AlignedObjectArray<b3GpuGenericConstraint> m_cpuConstraints; - - b3AlignedObjectArray<int> m_batchSizes; - - -}; - - -/* -static b3Transform getWorldTransform(b3RigidBodyData* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -} - -static const b3Matrix3x3& getInvInertiaTensorWorld(b3InertiaData* inertia) -{ - return inertia->m_invInertiaWorld; -} - -*/ - -static const b3Vector3& getLinearVelocity(b3RigidBodyData* rb) -{ - return rb->m_linVel; -} - -static const b3Vector3& getAngularVelocity(b3RigidBodyData* rb) -{ - return rb->m_angVel; -} - -b3Vector3 getVelocityInLocalPoint(b3RigidBodyData* rb, const b3Vector3& rel_pos) -{ - //we also calculate lin/ang velocity for kinematic objects - return getLinearVelocity(rb) + getAngularVelocity(rb).cross(rel_pos); - -} - - - -b3GpuPgsConstraintSolver::b3GpuPgsConstraintSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs) -{ - m_usePgs = usePgs; - m_gpuData = new b3GpuPgsJacobiSolverInternalData(); - m_gpuData->m_context = ctx; - m_gpuData->m_device = device; - m_gpuData->m_queue = queue; - - m_gpuData->m_prefixScan = new b3PrefixScanCL(ctx,device,queue); - - m_gpuData->m_gpuConstraintRowOffsets = new b3OpenCLArray<unsigned int>(m_gpuData->m_context,m_gpuData->m_queue); - - m_gpuData->m_gpuSolverBodies = new b3OpenCLArray<b3GpuSolverBody>(m_gpuData->m_context,m_gpuData->m_queue); - m_gpuData->m_gpuBatchConstraints = new b3OpenCLArray<b3BatchConstraint>(m_gpuData->m_context,m_gpuData->m_queue); - m_gpuData->m_gpuConstraintRows = new b3OpenCLArray<b3GpuSolverConstraint>(m_gpuData->m_context,m_gpuData->m_queue); - m_gpuData->m_gpuConstraintInfo1 = new b3OpenCLArray<unsigned int>(m_gpuData->m_context,m_gpuData->m_queue); - cl_int errNum=0; - - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,&errNum,"",B3_JOINT_SOLVER_PATH); - //cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,0,&errNum,"",B3_JOINT_SOLVER_PATH,true); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device,solveConstraintRowsCL, "solveJointConstraintRows",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_initSolverBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"initSolverBodies",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_getInfo1Kernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"getInfo1Kernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_initBatchConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"initBatchConstraintsKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_getInfo2Kernel= b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"getInfo2Kernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_writeBackVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"writeBackVelocitiesKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - m_gpuData->m_breakViolatedConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context,m_gpuData->m_device,solveConstraintRowsCL,"breakViolatedConstraintsKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - - - - - clReleaseProgram(prog); - } - - -} - -b3GpuPgsConstraintSolver::~b3GpuPgsConstraintSolver () -{ - clReleaseKernel(m_gpuData->m_solveJointConstraintRowsKernels); - clReleaseKernel(m_gpuData->m_initSolverBodiesKernel); - clReleaseKernel(m_gpuData->m_getInfo1Kernel); - clReleaseKernel(m_gpuData->m_initBatchConstraintsKernel); - clReleaseKernel(m_gpuData->m_getInfo2Kernel); - clReleaseKernel(m_gpuData->m_writeBackVelocitiesKernel); - clReleaseKernel(m_gpuData->m_breakViolatedConstraintsKernel); - - delete m_gpuData->m_prefixScan; - delete m_gpuData->m_gpuConstraintRowOffsets; - delete m_gpuData->m_gpuSolverBodies; - delete m_gpuData->m_gpuBatchConstraints; - delete m_gpuData->m_gpuConstraintRows; - delete m_gpuData->m_gpuConstraintInfo1; - - delete m_gpuData; -} - -struct b3BatchConstraint -{ - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - int m_originalConstraintIndex; - int m_batchId; -}; - -static b3AlignedObjectArray<b3BatchConstraint> batchConstraints; - - -void b3GpuPgsConstraintSolver::recomputeBatches() -{ - m_gpuData->m_batchSizes.clear(); -} - - - - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("GPU solveGroupCacheFriendlySetup"); - batchConstraints.resize(numConstraints); - m_gpuData->m_gpuBatchConstraints->resize(numConstraints); - m_staticIdx = -1; - m_maxOverrideNumSolverIterations = 0; - - - /* m_gpuData->m_gpuBodies->resize(numBodies); - m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies); - - b3OpenCLArray<b3InertiaData> gpuInertias(m_gpuData->m_context,m_gpuData->m_queue); - gpuInertias.resize(numBodies); - gpuInertias.copyFromHostPointer(inertias,numBodies); - */ - - m_gpuData->m_gpuSolverBodies->resize(numBodies); - - - m_tmpSolverBodyPool.resize(numBodies); - { - - if (useGpuInitSolverBodies) - { - B3_PROFILE("m_initSolverBodiesKernel"); - - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initSolverBodiesKernel,"m_initSolverBodiesKernel"); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_gpuData->m_queue); - - // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - } else - { - gpuBodies->copyToHost(m_gpuData->m_cpuBodies); - for (int i=0;i<numBodies;i++) - { - - b3RigidBodyData& body = m_gpuData->m_cpuBodies[i]; - b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i]; - initSolverBody(i,&solverBody,&body); - solverBody.m_originalBodyIndex = i; - } - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - } - } - -// int totalBodies = 0; - int totalNumRows = 0; - //b3RigidBody* rb0=0,*rb1=0; - //if (1) - { - { - - - // int i; - - m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); - - // b3OpenCLArray<b3GpuGenericConstraint> gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); - - - if (useGpuInfo1) - { - B3_PROFILE("info1 and init batchConstraint"); - - m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); - - - if (1) - { - B3_PROFILE("getInfo1Kernel"); - - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo1Kernel,"m_getInfo1Kernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - } - - if (m_gpuData->m_batchSizes.size()==0) - { - B3_PROFILE("initBatchConstraintsKernel"); - - m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints); - unsigned int total=0; - m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1,*m_gpuData->m_gpuConstraintRowOffsets,numConstraints,&total); - unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints-1); - totalNumRows = total+lastElem; - - { - B3_PROFILE("init batch constraints"); - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_initBatchConstraintsKernel,"m_initBatchConstraintsKernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - } - //assume the batching happens on CPU, so copy the data - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - } - } - else - { - totalNumRows = 0; - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - //calculate the total number of contraint rows - for (int i=0;i<numConstraints;i++) - { - unsigned int& info1= m_tmpConstraintSizesPool[i]; - // unsigned int info1; - if (m_gpuData->m_cpuConstraints[i].isEnabled()) - { - - m_gpuData->m_cpuConstraints[i].getInfo1(&info1,&m_gpuData->m_cpuBodies[0]); - } else - { - info1 = 0; - } - - totalNumRows += info1; - } - - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - - } - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); - m_gpuData->m_gpuConstraintRows->resize(totalNumRows); - - // b3GpuConstraintArray verify; - - if (useGpuInfo2) - { - { - B3_PROFILE("getInfo2Kernel"); - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_getInfo2Kernel,"m_getInfo2Kernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setBuffer(gpuInertias->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setConst(infoGlobal.m_timeStep); - launcher.setConst(infoGlobal.m_erp); - launcher.setConst(infoGlobal.m_globalCfm); - launcher.setConst(infoGlobal.m_damping); - launcher.setConst(infoGlobal.m_numIterations); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - - if (m_gpuData->m_batchSizes.size()==0) - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - //m_gpuData->m_gpuConstraintRows->copyToHost(verify); - //m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); - - - - } - } - else - { - - gpuInertias->copyToHost(m_gpuData->m_cpuInertias); - - ///setup the b3SolverConstraints - - for (int i=0;i<numConstraints;i++) - { - const int& info1 = m_tmpConstraintSizesPool[i]; - - if (info1) - { - int constraintIndex = batchConstraints[i].m_originalConstraintIndex; - int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex]; - - b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset]; - b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i]; - - b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[ constraint.getRigidBodyA()]; - //b3RigidBody& rbA = constraint.getRigidBodyA(); - // b3RigidBody& rbB = constraint.getRigidBodyB(); - b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[ constraint.getRigidBodyB()]; - - - - int solverBodyIdA = constraint.getRigidBodyA();//getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias); - int solverBodyIdB = constraint.getRigidBodyB();//getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias); - - b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; - b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; - - if (rbA.m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } else - { - if (!solverBodyIdA) - m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } - - if (rbB.m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } else - { - if (!solverBodyIdB) - m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } - - - int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; - if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations) - m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - - int j; - for ( j=0;j<info1;j++) - { - memset(¤tConstraintRow[j],0,sizeof(b3GpuSolverConstraint)); - currentConstraintRow[j].m_angularComponentA.setValue(0,0,0); - currentConstraintRow[j].m_angularComponentB.setValue(0,0,0); - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_cfm = 0.f; - currentConstraintRow[j].m_contactNormal.setValue(0,0,0); - currentConstraintRow[j].m_friction = 0.f; - currentConstraintRow[j].m_frictionIndex = 0; - currentConstraintRow[j].m_jacDiagABInv = 0.f; - currentConstraintRow[j].m_lowerLimit = 0.f; - currentConstraintRow[j].m_upperLimit = 0.f; - - currentConstraintRow[j].m_originalContactPoint = 0; - currentConstraintRow[j].m_overrideNumSolverIterations = 0; - currentConstraintRow[j].m_relpos1CrossNormal.setValue(0,0,0); - currentConstraintRow[j].m_relpos2CrossNormal.setValue(0,0,0); - currentConstraintRow[j].m_rhs = 0.f; - currentConstraintRow[j].m_rhsPenetration = 0.f; - currentConstraintRow[j].m_solverBodyIdA = 0; - currentConstraintRow[j].m_solverBodyIdB = 0; - - currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; - currentConstraintRow[j].m_upperLimit = B3_INFINITY; - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; - currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; - currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; - } - - bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f); - bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f); - bodyAPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f); - bodyAPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f); - bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f,0.f,0.f); - bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f,0.f,0.f); - bodyBPtr->internalGetPushVelocity().setValue(0.f,0.f,0.f); - bodyBPtr->internalGetTurnVelocity().setValue(0.f,0.f,0.f); - - - b3GpuConstraintInfo2 info2; - info2.fps = 1.f/infoGlobal.m_timeStep; - info2.erp = infoGlobal.m_erp; - info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; - info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxis = 0; - info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3GpuSolverConstraint)/sizeof(b3Scalar);//check this - ///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar - b3Assert(info2.rowskip*sizeof(b3Scalar)== sizeof(b3GpuSolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; - info2.m_damping = infoGlobal.m_damping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = infoGlobal.m_numIterations; - m_gpuData->m_cpuConstraints[i].getInfo2(&info2,&m_gpuData->m_cpuBodies[0]); - - ///finalize the constraint setup - for ( j=0;j<info1;j++) - { - b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j]; - - if (solverConstraint.m_upperLimit>=m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) - { - solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); - } - - if (solverConstraint.m_lowerLimit<=-m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) - { - solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); - } - - // solverConstraint.m_originalContactPoint = constraint; - - b3Matrix3x3& invInertiaWorldA= m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld; - { - - //b3Vector3 angularFactorA(1,1,1); - const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; - solverConstraint.m_angularComponentA = invInertiaWorldA*ftorqueAxis1;//*angularFactorA; - } - - b3Matrix3x3& invInertiaWorldB= m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld; - { - - const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; - solverConstraint.m_angularComponentB = invInertiaWorldB*ftorqueAxis2;//*constraint.getRigidBodyB().getAngularFactor(); - } - - { - //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal - //because it gets multiplied iMJlB - b3Vector3 iMJlA = solverConstraint.m_contactNormal*rbA.m_invMass; - b3Vector3 iMJaA = invInertiaWorldA*solverConstraint.m_relpos1CrossNormal; - b3Vector3 iMJlB = solverConstraint.m_contactNormal*rbB.m_invMass;//sign of normal? - b3Vector3 iMJaB = invInertiaWorldB*solverConstraint.m_relpos2CrossNormal; - - b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); - sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); - sum += iMJlB.dot(solverConstraint.m_contactNormal); - sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); - b3Scalar fsum = b3Fabs(sum); - b3Assert(fsum > B3_EPSILON); - solverConstraint.m_jacDiagABInv = fsum>B3_EPSILON?b3Scalar(1.)/sum : 0.f; - } - - - ///fix rhs - ///todo: add force/torque accelerators - { - b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); - - rel_vel = vel1Dotn+vel2Dotn; - - b3Scalar restitution = 0.f; - b3Scalar positionalError = solverConstraint.m_rhs;//already filled in by getConstraintInfo2 - b3Scalar velocityError = restitution - rel_vel * info2.m_damping; - b3Scalar penetrationImpulse = positionalError*solverConstraint.m_jacDiagABInv; - b3Scalar velocityImpulse = velocityError *solverConstraint.m_jacDiagABInv; - solverConstraint.m_rhs = penetrationImpulse+velocityImpulse; - solverConstraint.m_appliedImpulse = 0.f; - - } - } - - } - } - - - - m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); - m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - - if (m_gpuData->m_batchSizes.size()==0) - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - else - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - - - - }//end useGpuInfo2 - - - } - -#ifdef B3_SUPPORT_CONTACT_CONSTRAINTS - { - int i; - - for (i=0;i<numManifolds;i++) - { - b3Contact4& manifold = manifoldPtr[i]; - convertContact(bodies,inertias,&manifold,infoGlobal); - } - } -#endif //B3_SUPPORT_CONTACT_CONSTRAINTS - } - -// b3ContactSolverInfo info = infoGlobal; - - -// int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); -// int numConstraintPool = m_tmpSolverContactConstraintPool.size(); -// int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); - - - return 0.f; - -} - - - -///a straight copy from GPU/OpenCL kernel, for debugging -__inline void internalApplyImpulse( b3GpuSolverBody* body, const b3Vector3& linearComponent, const b3Vector3& angularComponent,float impulseMagnitude) -{ - body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor; - body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor); -} - - -void resolveSingleConstraintRowGeneric2( b3GpuSolverBody* body1, b3GpuSolverBody* body2, b3GpuSolverConstraint* c) -{ - float deltaImpulse = c->m_rhs-b3Scalar(c->m_appliedImpulse)*c->m_cfm; - float deltaVel1Dotn = b3Dot(c->m_contactNormal,body1->m_deltaLinearVelocity) + b3Dot(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity); - float deltaVel2Dotn = -b3Dot(c->m_contactNormal,body2->m_deltaLinearVelocity) + b3Dot(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity); - - deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv; - - float sum = b3Scalar(c->m_appliedImpulse) + deltaImpulse; - if (sum < c->m_lowerLimit) - { - deltaImpulse = c->m_lowerLimit-b3Scalar(c->m_appliedImpulse); - c->m_appliedImpulse = c->m_lowerLimit; - } - else if (sum > c->m_upperLimit) - { - deltaImpulse = c->m_upperLimit-b3Scalar(c->m_appliedImpulse); - c->m_appliedImpulse = c->m_upperLimit; - } - else - { - c->m_appliedImpulse = sum; - } - - internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse); - internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse); - -} - - - -void b3GpuPgsConstraintSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyData* rb) -{ - - solverBody->m_deltaLinearVelocity.setValue(0.f,0.f,0.f); - solverBody->m_deltaAngularVelocity.setValue(0.f,0.f,0.f); - solverBody->internalGetPushVelocity().setValue(0.f,0.f,0.f); - solverBody->internalGetTurnVelocity().setValue(0.f,0.f,0.f); - - b3Assert(rb); -// solverBody->m_worldTransform = getWorldTransform(rb); - solverBody->internalSetInvMass(b3MakeVector3(rb->m_invMass,rb->m_invMass,rb->m_invMass)); - solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor = b3MakeVector3(1,1,1); - solverBody->m_linearFactor = b3MakeVector3(1,1,1); - solverBody->m_linearVelocity = getLinearVelocity(rb); - solverBody->m_angularVelocity = getAngularVelocity(rb); -} - - -void b3GpuPgsConstraintSolver::averageVelocities() -{ -} - - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal) -{ - //only create the batches once. - //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated - B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); - - bool createBatches = m_gpuData->m_batchSizes.size()==0; - { - - if (createBatches) - { - - m_gpuData->m_batchSizes.resize(0); - - { - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - - B3_PROFILE("batch joints"); - b3Assert(batchConstraints.size()==numConstraints); - int simdWidth =numConstraints+1; - int numBodies = m_tmpSolverBodyPool.size(); - sortConstraintByBatch3( &batchConstraints[0], numConstraints, simdWidth , m_staticIdx, numBodies); - - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - - } - } else - { - /*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches; - m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); - b3Assert(cpuCheckBatches.size()==batchConstraints.size()); - printf(".\n"); - */ - //>copyFromHost(batchConstraints); - } - int maxIterations = infoGlobal.m_numIterations; - - bool useBatching = true; - - if (useBatching ) - { - - if (!useGpuSolveJointConstraintRows) - { - B3_PROFILE("copy to host"); - m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); - m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); - m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); - gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); - - } - - for ( int iteration = 0 ; iteration< maxIterations ; iteration++) - { - - int batchOffset = 0; - int constraintOffset=0; - int numBatches = m_gpuData->m_batchSizes.size(); - for (int bb=0;bb<numBatches;bb++) - { - int numConstraintsInBatch = m_gpuData->m_batchSizes[bb]; - - - if (useGpuSolveJointConstraintRows) - { - B3_PROFILE("solveJointConstraintRowsKernels"); - - /* - __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, - __global b3BatchConstraint* batchConstraints, - __global b3SolverConstraint* rows, - __global unsigned int* numConstraintRowsInfo1, - __global unsigned int* rowOffsets, - __global b3GpuGenericConstraint* constraints, - int batchOffset, - int numConstraintsInBatch*/ - - - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_solveJointConstraintRowsKernels,"m_solveJointConstraintRowsKernels"); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(gpuConstraints1->getBufferCL());//to detect disabled constraints - launcher.setConst(batchOffset); - launcher.setConst(numConstraintsInBatch); - - launcher.launch1D(numConstraintsInBatch); - - - } else//useGpu - { - - - - for (int b=0;b<numConstraintsInBatch;b++) - { - const b3BatchConstraint& c = batchConstraints[batchOffset+b]; - /*printf("-----------\n"); - printf("bb=%d\n",bb); - printf("c.batchId = %d\n", c.m_batchId); - */ - b3Assert(c.m_batchId==bb); - b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex]; - if (constraint->m_flags&B3_CONSTRAINT_FLAG_ENABLED) - { - int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; - int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; - - for (int jj=0;jj<numConstraintRows;jj++) - { - // - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset+jj]; - //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA],&m_tmpSolverBodyPool[constraint.m_solverBodyIdB],&constraint); - } - } - } - }//useGpu - batchOffset+=numConstraintsInBatch; - constraintOffset+=numConstraintsInBatch; - } - }//for (int iteration... - - if (!useGpuSolveJointConstraintRows) - { - { - B3_PROFILE("copy from host"); - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); - } - - //B3_PROFILE("copy to host"); - //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - } - //int sz = sizeof(b3GpuSolverBody); - //printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz); - - - - - - } else - { - for ( int iteration = 0 ; iteration< maxIterations ; iteration++) - { - int numJoints = m_tmpSolverNonContactConstraintPool.size(); - for (int j=0;j<numJoints;j++) - { - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j]; - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA],&m_tmpSolverBodyPool[constraint.m_solverBodyIdB],&constraint); - } - - if (!m_usePgs) - { - averageVelocities(); - } - } - } - - } - clFinish(m_gpuData->m_queue); - return 0.f; -} - - - - -static b3AlignedObjectArray<int> bodyUsed; -static b3AlignedObjectArray<int> curUsed; - - - -inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3( b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) -{ - //int sz = sizeof(b3BatchConstraint); - - B3_PROFILE("sortConstraintByBatch3"); - - static int maxSwaps = 0; - int numSwaps = 0; - - curUsed.resize(2*simdWidth); - - static int maxNumConstraints = 0; - if (maxNumConstraints<numConstraints) - { - maxNumConstraints = numConstraints; - //printf("maxNumConstraints = %d\n",maxNumConstraints ); - } - - int numUsedArray = numBodies/32+1; - bodyUsed.resize(numUsedArray); - - for (int q=0;q<numUsedArray;q++) - bodyUsed[q]=0; - - - int curBodyUsed = 0; - - int numIter = 0; - - -#if defined(_DEBUG) - for(int i=0; i<numConstraints; i++) - cs[i].m_batchId = -1; -#endif - - int numValidConstraints = 0; -// int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - - { - B3_PROFILE("cpu batch innerloop"); - - while( numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - // clear flag - for(int i=0; i<curBodyUsed; i++) - bodyUsed[curUsed[i]/32] = 0; - - curBodyUsed = 0; - - for(int i=numValidConstraints; i<numConstraints; i++) - { - int idx = i; - b3Assert( idx < numConstraints ); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; - bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - aUnavailable = bodyUsed[ bodyA/32 ] & (1<<(bodyA&31)); - } - if (!aUnavailable) - if (!bIsStatic) - { - bUnavailable = bodyUsed[ bodyB/32 ] & (1<<(bodyB&31)); - } - - if( aUnavailable==0 && bUnavailable==0 ) // ok - { - if (!aIsStatic) - { - bodyUsed[ bodyA/32 ] |= (1<<(bodyA&31)); - curUsed[curBodyUsed++]=bodyA; - } - if (!bIsStatic) - { - bodyUsed[ bodyB/32 ] |= (1<<(bodyB&31)); - curUsed[curBodyUsed++]=bodyB; - } - - cs[idx].m_batchId = batchIdx; - - if (i!=numValidConstraints) - { - b3Swap(cs[i],cs[numValidConstraints]); - numSwaps++; - } - - numValidConstraints++; - { - nCurrentBatch++; - if( nCurrentBatch == simdWidth ) - { - nCurrentBatch = 0; - for(int i=0; i<curBodyUsed; i++) - bodyUsed[curUsed[i]/32] = 0; - curBodyUsed = 0; - } - } - } - } - m_gpuData->m_batchSizes.push_back(nCurrentBatch); - batchIdx ++; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for(int i=0; i<numConstraints; i++) - { - b3Assert( cs[i].m_batchId != -1 ); - } -#endif - - if (maxSwaps<numSwaps) - { - maxSwaps = numSwaps; - //printf("maxSwaps = %d\n", maxSwaps); - } - - return batchIdx; -} - - -/// b3PgsJacobiSolver Sequentially applies impulses -b3Scalar b3GpuPgsConstraintSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, - int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - - B3_PROFILE("solveJoints"); - //you need to provide at least some bodies - - solveGroupCacheFriendlySetup( gpuBodies, gpuInertias,numBodies,gpuConstraints, numConstraints,infoGlobal); - - solveGroupCacheFriendlyIterations(gpuConstraints, numConstraints,infoGlobal); - - solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias,numBodies, gpuConstraints, numConstraints, infoGlobal); - - return 0.f; -} - -void b3GpuPgsConstraintSolver::solveJoints(int numBodies, b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, - int numConstraints, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints) -{ - b3ContactSolverInfo infoGlobal; - infoGlobal.m_splitImpulse = false; - infoGlobal.m_timeStep = 1.f/60.f; - infoGlobal.m_numIterations = 4;//4; -// infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS|B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION; - //infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS; - infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS; - - //if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS) - //if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION)) - - - solveGroup(gpuBodies,gpuInertias,numBodies,gpuConstraints,numConstraints,infoGlobal); - -} - -//b3AlignedObjectArray<b3RigidBodyData> testBodies; - - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b3RigidBodyData>* gpuBodies,b3OpenCLArray<b3InertiaData>* gpuInertias,int numBodies,b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroupCacheFriendlyFinish"); -// int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); -// int i,j; - - - { - if (gpuBreakConstraints) - { - B3_PROFILE("breakViolatedConstraintsKernel"); - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_breakViolatedConstraintsKernel,"m_breakViolatedConstraintsKernel"); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - } else - { - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - m_gpuData->m_gpuBatchConstraints->copyToHost(m_gpuData->m_cpuBatchConstraints); - m_gpuData->m_gpuConstraintRows->copyToHost(m_gpuData->m_cpuConstraintRows); - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); - m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); - - for (int cid=0;cid<numConstraints;cid++) - { - int originalConstraintIndex = batchConstraints[cid].m_originalConstraintIndex; - int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[originalConstraintIndex]; - int numRows = m_gpuData->m_cpuConstraintInfo1[originalConstraintIndex]; - if (numRows) - { - - // printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold); - for (int i=0;i<numRows;i++) - { - int rowIndex =constraintRowOffset+i; - int orgConstraintIndex = m_gpuData->m_cpuConstraintRows[rowIndex].m_originalConstraintIndex; - float breakingThreshold = m_gpuData->m_cpuConstraints[orgConstraintIndex].m_breakingImpulseThreshold; - // printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse); - if (b3Fabs(m_gpuData->m_cpuConstraintRows[rowIndex].m_appliedImpulse) >= breakingThreshold) - { - - m_gpuData->m_cpuConstraints[orgConstraintIndex].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED; - } - } - } - } - - - gpuConstraints->copyFromHost(m_gpuData->m_cpuConstraints); - } - } - - { - if (useGpuWriteBackVelocities) - { - B3_PROFILE("GPU write back velocities and transforms"); - - b3LauncherCL launcher(m_gpuData->m_queue,m_gpuData->m_writeBackVelocitiesKernel,"m_writeBackVelocitiesKernel"); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_gpuData->m_queue); -// m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); -// m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies); - //m_gpuData->m_gpuBodies->copyToHost(testBodies); - - } - else - { - B3_PROFILE("CPU write back velocities and transforms"); - - m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - gpuBodies->copyToHost(m_gpuData->m_cpuBodies); - for ( int i=0;i<m_tmpSolverBodyPool.size();i++) - { - int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; - //printf("bodyIndex=%d\n",bodyIndex); - b3Assert(i==bodyIndex); - - b3RigidBodyData* body = &m_gpuData->m_cpuBodies[bodyIndex]; - if (body->m_invMass) - { - if (infoGlobal.m_splitImpulse) - m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp); - else - m_tmpSolverBodyPool[i].writebackVelocity(); - - if (m_usePgs) - { - body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity; - body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity; - } else - { - b3Assert(0); - } - /* - if (infoGlobal.m_splitImpulse) - { - body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin(); - b3Quaternion orn; - orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation(); - body->m_quat = orn; - } - */ - } - }//for - - gpuBodies->copyFromHost(m_gpuData->m_cpuBodies); - - } - } - - clFinish(m_gpuData->m_queue); - - m_tmpSolverContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0); - - m_tmpSolverBodyPool.resizeNoInitialize(0); - return 0.f; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h deleted file mode 100644 index ec0e3f73d6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h +++ /dev/null @@ -1,78 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_PGS_CONSTRAINT_SOLVER_H -#define B3_GPU_PGS_CONSTRAINT_SOLVER_H - -struct b3Contact4; -struct b3ContactPoint; - - -class b3Dispatcher; - -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include "Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h" -#include "b3GpuSolverBody.h" -#include "b3GpuSolverConstraint.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -struct b3RigidBodyData; -struct b3InertiaData; - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "b3GpuGenericConstraint.h" - -class b3GpuPgsConstraintSolver -{ -protected: - int m_staticIdx; - struct b3GpuPgsJacobiSolverInternalData* m_gpuData; - protected: - b3AlignedObjectArray<b3GpuSolverBody> m_tmpSolverBodyPool; - b3GpuConstraintArray m_tmpSolverContactConstraintPool; - b3GpuConstraintArray m_tmpSolverNonContactConstraintPool; - b3GpuConstraintArray m_tmpSolverContactFrictionConstraintPool; - b3GpuConstraintArray m_tmpSolverContactRollingFrictionConstraintPool; - - b3AlignedObjectArray<unsigned int> m_tmpConstraintSizesPool; - - - bool m_usePgs; - void averageVelocities(); - - int m_maxOverrideNumSolverIterations; - - int m_numSplitImpulseRecoveries; - -// int getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies,b3InertiaData* inertias); - void initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyData* rb); - -public: - b3GpuPgsConstraintSolver (cl_context ctx, cl_device_id device, cl_command_queue queue,bool usePgs); - virtual~b3GpuPgsConstraintSolver (); - - virtual b3Scalar solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1,int numConstraints,const b3ContactSolverInfo& infoGlobal); - virtual b3Scalar solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies,b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - b3Scalar solveGroupCacheFriendlyFinish(b3OpenCLArray<b3RigidBodyData>* gpuBodies,b3OpenCLArray<b3InertiaData>* gpuInertias,int numBodies,b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - - - b3Scalar solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies,b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies,b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - void solveJoints(int numBodies, b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, - int numConstraints, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints); - - int sortConstraintByBatch3( struct b3BatchConstraint* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies); - void recomputeBatches(); -}; - -#endif //B3_GPU_PGS_CONSTRAINT_SOLVER_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp deleted file mode 100644 index f0b0abd5e0..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp +++ /dev/null @@ -1,1708 +0,0 @@ - -bool gUseLargeBatches = false; -bool gCpuBatchContacts = false; -bool gCpuSolveConstraint = false; -bool gCpuRadixSort=false; -bool gCpuSetSortData = false; -bool gCpuSortContactsDeterminism = false; -bool gUseCpuCopyConstraints = false; -bool gUseScanHost = false; -bool gReorderContactsOnCpu = false; - -bool optionalSortContactsDeterminism = true; - - -#include "b3GpuPgsContactSolver.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include <string.h> -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "b3Solver.h" - - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - - - - - -struct b3GpuBatchingPgsSolverInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - int m_pairCapacity; - int m_nIterations; - - b3OpenCLArray<b3GpuConstraint4>* m_contactCGPU; - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - - b3Solver* m_solverGPU; - - cl_kernel m_batchingKernel; - cl_kernel m_batchingKernelNew; - cl_kernel m_solveContactKernel; - cl_kernel m_solveSingleContactKernel; - cl_kernel m_solveSingleFrictionKernel; - cl_kernel m_solveFrictionKernel; - cl_kernel m_contactToConstraintKernel; - cl_kernel m_setSortDataKernel; - cl_kernel m_reorderContactKernel; - cl_kernel m_copyConstraintKernel; - - cl_kernel m_setDeterminismSortDataBodyAKernel; - cl_kernel m_setDeterminismSortDataBodyBKernel; - cl_kernel m_setDeterminismSortDataChildShapeAKernel; - cl_kernel m_setDeterminismSortDataChildShapeBKernel; - - - - - class b3RadixSort32CL* m_sort32; - class b3BoundSearchCL* m_search; - class b3PrefixScanCL* m_scan; - - b3OpenCLArray<b3SortData>* m_sortDataBuffer; - b3OpenCLArray<b3Contact4>* m_contactBuffer; - - b3OpenCLArray<b3RigidBodyData>* m_bodyBufferGPU; - b3OpenCLArray<b3InertiaData>* m_inertiaBufferGPU; - b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU; - - b3OpenCLArray<b3Contact4>* m_pBufContactOutGPUCopy; - b3OpenCLArray<b3SortData>* m_contactKeyValues; - - - b3AlignedObjectArray<unsigned int> m_idxBuffer; - b3AlignedObjectArray<b3SortData> m_sortData; - b3AlignedObjectArray<b3Contact4> m_old; - - b3AlignedObjectArray<int> m_batchSizes; - b3OpenCLArray<int>* m_batchSizesGpu; - -}; - - - -b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity) -{ - m_debugOutput=0; - m_data = new b3GpuBatchingPgsSolverInternalData; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_queue = q; - m_data->m_pairCapacity = pairCapacity; - m_data->m_nIterations = 4; - m_data->m_batchSizesGpu = new b3OpenCLArray<int>(ctx,q); - m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyData>(ctx,q); - m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaData>(ctx,q); - m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx,q); - - m_data->m_pBufContactOutGPUCopy = new b3OpenCLArray<b3Contact4>(ctx,q); - m_data->m_contactKeyValues = new b3OpenCLArray<b3SortData>(ctx,q); - - - m_data->m_solverGPU = new b3Solver(ctx,device,q,512*1024); - - m_data->m_sort32 = new b3RadixSort32CL(ctx,device,m_data->m_queue); - m_data->m_scan = new b3PrefixScanCL(ctx,device,m_data->m_queue,B3_SOLVER_N_CELLS); - m_data->m_search = new b3BoundSearchCL(ctx,device,m_data->m_queue,B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 ); - - m_data->m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,m_data->m_queue,sortSize); - m_data->m_contactBuffer = new b3OpenCLArray<b3Contact4>(ctx,m_data->m_queue); - - m_data->m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,m_data->m_queue,B3_SOLVER_N_CELLS); - m_data->m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_data->m_contactCGPU = new b3OpenCLArray<b3GpuConstraint4>(ctx,q,pairCapacity); - - m_data->m_offsets = new b3OpenCLArray<unsigned int>( ctx,m_data->m_queue,B3_SOLVER_N_CELLS); - m_data->m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; - //const char* srcFileNameForCaching=""; - - - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - - { - - cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - - - b3Assert(solverSetup2Prog); - - - cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - - m_data->m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros ); - b3Assert(m_data->m_solveFrictionKernel); - - m_data->m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros ); - b3Assert(m_data->m_solveContactKernel); - - m_data->m_solveSingleContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "solveSingleContactKernel", &pErrNum, solveContactProg,additionalMacros ); - b3Assert(m_data->m_solveSingleContactKernel); - - m_data->m_solveSingleFrictionKernel =b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "solveSingleFrictionKernel", &pErrNum, solveFrictionProg,additionalMacros ); - b3Assert(m_data->m_solveSingleFrictionKernel); - - m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros ); - b3Assert(m_data->m_contactToConstraintKernel); - - m_data->m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_setSortDataKernel); - - m_data->m_setDeterminismSortDataBodyAKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyA", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_setDeterminismSortDataBodyAKernel); - - m_data->m_setDeterminismSortDataBodyBKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyB", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_setDeterminismSortDataBodyBKernel); - - m_data->m_setDeterminismSortDataChildShapeAKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeA", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_setDeterminismSortDataChildShapeAKernel); - - m_data->m_setDeterminismSortDataChildShapeBKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeB", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_setDeterminismSortDataChildShapeBKernel); - - - m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_reorderContactKernel); - - - m_data->m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_data->m_copyConstraintKernel); - - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH); - b3Assert(batchingProg); - - m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros ); - b3Assert(m_data->m_batchingKernel); - } - - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros ); - b3Assert(m_data->m_batchingKernelNew); - } - - - - - - - -} - -b3GpuPgsContactSolver::~b3GpuPgsContactSolver() -{ - delete m_data->m_batchSizesGpu; - delete m_data->m_bodyBufferGPU; - delete m_data->m_inertiaBufferGPU; - delete m_data->m_pBufContactOutGPU; - delete m_data->m_pBufContactOutGPUCopy; - delete m_data->m_contactKeyValues; - - - - delete m_data->m_contactCGPU; - delete m_data->m_numConstraints; - delete m_data->m_offsets; - delete m_data->m_sortDataBuffer; - delete m_data->m_contactBuffer; - - delete m_data->m_sort32; - delete m_data->m_scan; - delete m_data->m_search; - delete m_data->m_solverGPU; - - clReleaseKernel(m_data->m_batchingKernel); - clReleaseKernel(m_data->m_batchingKernelNew); - clReleaseKernel(m_data->m_solveSingleContactKernel); - clReleaseKernel(m_data->m_solveSingleFrictionKernel); - clReleaseKernel( m_data->m_solveContactKernel); - clReleaseKernel( m_data->m_solveFrictionKernel); - - clReleaseKernel( m_data->m_contactToConstraintKernel); - clReleaseKernel( m_data->m_setSortDataKernel); - clReleaseKernel( m_data->m_reorderContactKernel); - clReleaseKernel( m_data->m_copyConstraintKernel); - - clReleaseKernel(m_data->m_setDeterminismSortDataBodyAKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataBodyBKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeAKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeBKernel); - - - - delete m_data; -} - - - -struct b3ConstraintCfg -{ - b3ConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(0) {} - - float m_positionDrift; - float m_positionConstraintCoeff; - float m_dt; - bool m_enableParallelSolve; - float m_batchCellSize; - int m_staticIdx; -}; - - - -void b3GpuPgsContactSolver::solveContactConstraintBatchSizes( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray<int>* batchSizes)//const b3OpenCLArray<int>* gpuBatchSizes) -{ - B3_PROFILE("solveContactConstraintBatchSizes"); - int numBatches = batchSizes->size()/B3_MAX_NUM_BATCHES; - for(int iter=0; iter<numIterations; iter++) - { - - for (int cellId=0;cellId<numBatches;cellId++) - { - int offset = 0; - for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++) - { - int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii); - if (!numInBatch) - break; - - { - b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleContactKernel,"m_solveSingleContactKernel" ); - launcher.setBuffer(bodyBuf->getBufferCL() ); - launcher.setBuffer(shapeBuf->getBufferCL() ); - launcher.setBuffer( constraint->getBufferCL() ); - launcher.setConst(cellId); - launcher.setConst(offset); - launcher.setConst(numInBatch); - launcher.launch1D(numInBatch); - offset+=numInBatch; - } - } - } - } - - - for(int iter=0; iter<numIterations; iter++) - { - for (int cellId=0;cellId<numBatches;cellId++) - { - int offset = 0; - for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++) - { - int numInBatch = batchSizes->at(cellId*B3_MAX_NUM_BATCHES+ii); - if (!numInBatch) - break; - - { - b3LauncherCL launcher( m_data->m_queue, m_data->m_solveSingleFrictionKernel,"m_solveSingleFrictionKernel" ); - launcher.setBuffer(bodyBuf->getBufferCL() ); - launcher.setBuffer(shapeBuf->getBufferCL() ); - launcher.setBuffer( constraint->getBufferCL() ); - launcher.setConst(cellId); - launcher.setConst(offset); - launcher.setConst(numInBatch); - launcher.launch1D(numInBatch); - offset+=numInBatch; - } - } - } - } -} - -void b3GpuPgsContactSolver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,int numIterations, const b3AlignedObjectArray<int>* batchSizes)//,const b3OpenCLArray<int>* gpuBatchSizes) -{ - - //sort the contacts - - - b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 ); - { - - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches;//250; - - - int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); -#endif - - - - { - - B3_PROFILE("m_batchSolveKernel iterations"); - for(int iter=0; iter<numIterations; iter++) - { - for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++) - { -#ifdef DEBUG_ME - memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); - gpuDebugInfo.write(debugInfo,numWorkItems); -#endif - - - cdata.z = ib; - - - b3LauncherCL launcher( m_data->m_queue, m_data->m_solveContactKernel,"m_solveContactKernel" ); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL( bodyBuf->getBufferCL() ), - b3BufferInfoCL( shapeBuf->getBufferCL() ), - b3BufferInfoCL( constraint->getBufferCL() ), - b3BufferInfoCL( m_data->m_solverGPU->m_numConstraints->getBufferCL() ), - b3BufferInfoCL( m_data->m_solverGPU->m_offsets->getBufferCL() ) -#ifdef DEBUG_ME - , b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - - - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setBuffer( m_data->m_solverGPU->m_batchSizes.getBufferCL()); - //launcher.setConst( cdata.x ); - launcher.setConst( cdata.y ); - launcher.setConst( cdata.z ); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst( nSplit ); - launcher.launch1D( numWorkItems, 64 ); - - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName,"rb"); - if (f) - { - int sizeInBytes=0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*) malloc(sizeInBytes); - fread(buf,sizeInBytes,1,f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D( num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo,numWorkItems); - clFinish(m_queue); - for (int i=0;i<numWorkItems;i++) - { - if (debugInfo[i].m_valInt2>0) - { - printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3>0) - { - printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - - - } - } - - clFinish(m_data->m_queue); - - - } - - cdata.x = 1; - bool applyFriction=true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for(int iter=0; iter<numIterations; iter++) - { - for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( bodyBuf->getBufferCL() ), - b3BufferInfoCL( shapeBuf->getBufferCL() ), - b3BufferInfoCL( constraint->getBufferCL() ), - b3BufferInfoCL( m_data->m_solverGPU->m_numConstraints->getBufferCL() ), - b3BufferInfoCL( m_data->m_solverGPU->m_offsets->getBufferCL() ) -#ifdef DEBUG_ME - ,b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher( m_data->m_queue, m_data->m_solveFrictionKernel,"m_solveFrictionKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setBuffer( m_data->m_solverGPU->m_batchSizes.getBufferCL()); - //launcher.setConst( cdata.x ); - launcher.setConst( cdata.y ); - launcher.setConst( cdata.z ); - - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst( nSplit ); - - launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 ); - } - } - clFinish(m_data->m_queue); - - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } - - -} - - - - - - - - - - - -static bool sortfnc(const b3SortData& a,const b3SortData& b) -{ - return (a.m_key<b.m_key); -} - -static bool b3ContactCmp(const b3Contact4& p, const b3Contact4& q) -{ - return ((p.m_bodyAPtrAndSignBit<q.m_bodyAPtrAndSignBit) || - ((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit<q.m_bodyBPtrAndSignBit)) || - ((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit==q.m_bodyBPtrAndSignBit) && p.m_childIndexA<q.m_childIndexA ) || - ((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit==q.m_bodyBPtrAndSignBit) && p.m_childIndexA<q.m_childIndexA ) || - ((p.m_bodyAPtrAndSignBit==q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit==q.m_bodyBPtrAndSignBit) && p.m_childIndexA==q.m_childIndexA && p.m_childIndexB<q.m_childIndexB) - ); -} - - - - - - - - - - - -#define USE_SPATIAL_BATCHING 1 -#define USE_4x4_GRID 1 - -#ifndef USE_SPATIAL_BATCHING -static const int gridTable4x4[] = -{ - 0,1,17,16, - 1,2,18,19, - 17,18,32,3, - 16,19,3,34 -}; -static const int gridTable8x8[] = -{ - 0, 2, 3, 16, 17, 18, 19, 1, - 66, 64, 80, 67, 82, 81, 65, 83, - 131,144,128,130,147,129,145,146, - 208,195,194,192,193,211,210,209, - 21, 22, 23, 5, 4, 6, 7, 20, - 86, 85, 69, 87, 70, 68, 84, 71, - 151,133,149,150,135,148,132,134, - 197,27,214,213,212,199,198,196 - -}; - - -#endif - - -void SetSortDataCPU(b3Contact4* gContact, b3RigidBodyData* gBodies, b3SortData* gSortDataOut, int nContacts,float scale,const b3Int4& nSplit,int staticIdx) -{ - for (int gIdx=0;gIdx<nContacts;gIdx++) - { - if( gIdx < nContacts ) - { - int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit; - int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit; - - int aIdx = abs(aPtrAndSignBit ); - int bIdx = abs(bPtrAndSignBit); - - bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx); - - #if USE_SPATIAL_BATCHING - int idx = (aStatic)? bIdx: aIdx; - b3Vector3 p = gBodies[idx].m_pos; - int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1); - int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1); - int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1); - - int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y); - - #else//USE_SPATIAL_BATCHING - bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx); - - #if USE_4x4_GRID - int aa = aIdx&3; - int bb = bIdx&3; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*4; - int newIndex = gridTable4x4[gridIndex]; - #else//USE_4x4_GRID - int aa = aIdx&7; - int bb = bIdx&7; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*8; - int newIndex = gridTable8x8[gridIndex]; - #endif//USE_4x4_GRID - #endif//USE_SPATIAL_BATCHING - - - gSortDataOut[gIdx].x = newIndex; - gSortDataOut[gIdx].y = gIdx; - } - else - { - gSortDataOut[gIdx].x = 0xffffffff; - } - } -} - - - - - - -void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) -{ - B3_PROFILE("solveContacts"); - m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf,numBodies); - m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf,numBodies); - m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf,numContacts); - - if (optionalSortContactsDeterminism) - { - if (!gCpuSortContactsDeterminism) - { - B3_PROFILE("GPU Sort contact constraints (determinism)"); - - m_data->m_pBufContactOutGPUCopy->resize(numContacts); - m_data->m_contactKeyValues->resize(numContacts); - - m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(),numContacts,0,0); - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel,"m_setDeterminismSortDataChildShapeBKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D( numContacts, 64 ); - } - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel,"m_setDeterminismSortDataChildShapeAKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D( numContacts, 64 ); - } - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel,"m_setDeterminismSortDataBodyBKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D( numContacts, 64 ); - } - - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel,"m_setDeterminismSortDataBodyAKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D( numContacts, 64 ); - } - - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - - { - B3_PROFILE("gpu reorderContactKernel (determinism)"); - - b3Int4 cdata; - cdata.x = numContacts; - - //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) - // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst( cdata ); - launcher.launch1D( numContacts, 64 ); - } - - } else - { - B3_PROFILE("CPU Sort contact constraints (determinism)"); - b3AlignedObjectArray<b3Contact4> cpuConstraints; - m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); - bool sort = true; - if (sort) - { - cpuConstraints.quickSort(b3ContactCmp); - - for (int i=0;i<cpuConstraints.size();i++) - { - cpuConstraints[i].m_batchIdx = i; - } - } - m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); - if (m_debugOutput==100) - { - for (int i=0;i<cpuConstraints.size();i++) - { - printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n",i,cpuConstraints[i].m_bodyAPtrAndSignBit,cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); - } - } - - m_debugOutput++; - } - } - - - - - int nContactOut = m_data->m_pBufContactOutGPU->size(); - - bool useSolver = true; - - - if (useSolver) - { - float dt=1./60.; - b3ConstraintCfg csCfg( dt ); - csCfg.m_enableParallelSolve = true; - csCfg.m_batchCellSize = 6; - csCfg.m_staticIdx = static0Index; - - - b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU; - - void* additionalData = 0;//m_data->m_frictionCGPU; - const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU; - b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU; - int nContacts = nContactOut; - - - int maxNumBatches = 0; - - if (!gUseLargeBatches) - { - - if( m_data->m_solverGPU->m_contactBuffer2) - { - m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); - } - - if( m_data->m_solverGPU->m_contactBuffer2 == 0 ) - { - m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context,m_data->m_queue, nContacts ); - m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); - } - - //clFinish(m_data->m_queue); - - - - { - B3_PROFILE("batching"); - //@todo: just reserve it, without copy of original contact (unless we use warmstarting) - - - - //const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf; - - - { - - //b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); - //b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); - - const int sortAlignment = 512; // todo. get this out of sort - if( csCfg.m_enableParallelSolve ) - { - - - int sortSize = B3NEXTMULTIPLEOF( nContacts, sortAlignment ); - - b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; - b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; - - - if (!gCpuSetSortData) - { // 2. set cell idx - B3_PROFILE("GPU set cell idx"); - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - b3Int4 m_nSplit; - }; - - b3Assert( sortSize%64 == 0 ); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = csCfg.m_staticIdx; - cdata.m_scale = 1.f/csCfg.m_batchCellSize; - cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; - cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; - cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; - - m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); - - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL()), b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel,"m_setSortDataKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata.m_nContacts ); - launcher.setConst( cdata.m_scale ); - launcher.setConst(cdata.m_nSplit); - launcher.setConst(cdata.m_staticIdx); - - - launcher.launch1D( sortSize, 64 ); - } else - { - m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); - b3AlignedObjectArray<b3SortData> sortDataCPU; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); - - b3AlignedObjectArray<b3Contact4> contactCPU; - m_data->m_pBufContactOutGPU->copyToHost(contactCPU); - b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; - bodyBuf->copyToHost(bodiesCPU); - float scale = 1.f/csCfg.m_batchCellSize; - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts,scale,nSplit,csCfg.m_staticIdx); - - - m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); - } - - - - if (!gCpuRadixSort) - { // 3. sort by cell idx - B3_PROFILE("gpuRadixSort"); - //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - //int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); - //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); - b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); - this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); - - - - } else - { - b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); - b3AlignedObjectArray<b3SortData> hostValues; - keyValuesInOut.copyToHost(hostValues); - hostValues.quickSort(sortfnc); - keyValuesInOut.copyFromHost(hostValues); - } - - - if (gUseScanHost) - { - // 4. find entries - B3_PROFILE("cpuBoundSearch"); - b3AlignedObjectArray<unsigned int> countsHost; - countsNative->copyToHost(countsHost); - - b3AlignedObjectArray<b3SortData> sortDataHost; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); - - - //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); - m_data->m_solverGPU->m_search->executeHost(sortDataHost,nContacts,countsHost,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); - - countsNative->copyFromHost(countsHost); - - - //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, - // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); - - //unsigned int sum; - //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); - b3AlignedObjectArray<unsigned int> offsetsHost; - offsetsHost.resize(offsetsNative->size()); - - - m_data->m_solverGPU->m_scan->executeHost(countsHost,offsetsHost, B3_SOLVER_N_CELLS);//,&sum ); - offsetsNative->copyFromHost(offsetsHost); - - //printf("sum = %d\n",sum); - } else - { - // 4. find entries - B3_PROFILE("gpuBoundSearch"); - m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); - m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); - } - - - - - if (nContacts) - { // 5. sort constraints by cellIdx - if (gReorderContactsOnCpu) - { - B3_PROFILE("cpu m_reorderContactKernel"); - b3AlignedObjectArray<b3SortData> sortDataHost; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); - b3AlignedObjectArray<b3Contact4> inContacts; - b3AlignedObjectArray<b3Contact4> outContacts; - m_data->m_pBufContactOutGPU->copyToHost(inContacts); - outContacts.resize(inContacts.size()); - for (int i=0;i<nContacts;i++) - { - int srcIdx = sortDataHost[i].y; - outContacts[i] = inContacts[srcIdx]; - } - m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); - - /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" - "{\n" - " int nContacts = cb.x;\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int srcIdx = sortData[gIdx].y;\n" - " out[gIdx] = in[srcIdx];\n" - " }\n" - "}\n" - */ - } else - { - B3_PROFILE("gpu m_reorderContactKernel"); - - b3Int4 cdata; - cdata.x = nContacts; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), - b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) - , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - - b3LauncherCL launcher(m_data->m_queue,m_data->m_solverGPU->m_reorderContactKernel,"m_reorderContactKernel"); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - } - } - - - - - } - - } - - //clFinish(m_data->m_queue); - - // { - // b3AlignedObjectArray<unsigned int> histogram; - // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); - // printf(",,,\n"); - // } - - - if (nContacts) - { - - if (gUseCpuCopyConstraints) - { - for (int i=0;i<nContacts;i++) - { - m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); - // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); - // m_data->m_pBufContactOutGPU->getBufferCL() - } - - } else - { - B3_PROFILE("gpu m_copyConstraintKernel"); - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL() ), - b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ) - }; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel,"m_copyConstraintKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - //we use the clFinish for proper benchmark/profile - clFinish(m_data->m_queue); - } - } - - -// bool compareGPU = false; - if (nContacts) - { - if (!gCpuBatchContacts) - { - B3_PROFILE("gpu batchContacts"); - maxNumBatches = 250;//250; - m_data->m_solverGPU->batchContacts( m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx ); - clFinish(m_data->m_queue); - } else - { - B3_PROFILE("cpu batchContacts"); - static b3AlignedObjectArray<b3Contact4> cpuContacts; - b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - { - B3_PROFILE("copyToHost"); - contactsIn->copyToHost(cpuContacts); - } - b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; - b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; - - b3AlignedObjectArray<unsigned int> nNativeHost; - b3AlignedObjectArray<unsigned int> offsetsNativeHost; - - { - B3_PROFILE("countsNative/offsetsNative copyToHost"); - countsNative->copyToHost(nNativeHost); - offsetsNative->copyToHost(offsetsNativeHost); - } - - - int numNonzeroGrid=0; - - if (gUseLargeBatches) - { - m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); - int totalNumConstraints = cpuContacts.size(); - //int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU - maxNumBatches = b3Max(numBatches,maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches>globalMaxBatch ) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n",maxNumBatches); - } - - } else - { - m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS*B3_MAX_NUM_BATCHES); - B3_PROFILE("cpu batch grid"); - for(int i=0; i<B3_SOLVER_N_CELLS; i++) - { - int n = (nNativeHost)[i]; - int offset = (offsetsNativeHost)[i]; - if( n ) - { - numNonzeroGrid++; - int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3( &cpuContacts[0]+offset, n, simdWidth,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[i*B3_MAX_NUM_BATCHES]); // on GPU - maxNumBatches = b3Max(numBatches,maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches>globalMaxBatch ) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n",maxNumBatches); - } - //we use the clFinish for proper benchmark/profile - - } - } - //clFinish(m_data->m_queue); - } - { - B3_PROFILE("m_contactBuffer->copyFromHost"); - m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); - } - - } - - } - - - - - - } - - - } - - - //printf("maxNumBatches = %d\n", maxNumBatches); - - if (gUseLargeBatches) - { - if (nContacts) - { - B3_PROFILE("cpu batchContacts"); - static b3AlignedObjectArray<b3Contact4> cpuContacts; -// b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - { - B3_PROFILE("copyToHost"); - m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); - } -// b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; -// b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; - - - -// int numNonzeroGrid=0; - - { - m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); - int totalNumConstraints = cpuContacts.size(); - // int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3( &cpuContacts[0], totalNumConstraints, totalNumConstraints+1,csCfg.m_staticIdx ,numBodies,&m_data->m_batchSizes[0]); // on GPU - maxNumBatches = b3Max(numBatches,maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches>globalMaxBatch ) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n",maxNumBatches); - } - - } - { - B3_PROFILE("m_contactBuffer->copyFromHost"); - m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); - } - - } - - } - - if (nContacts) - { - B3_PROFILE("gpu convertToConstraints"); - m_data->m_solverGPU->convertToConstraints( bodyBuf, - shapeBuf, m_data->m_solverGPU->m_contactBuffer2, - contactConstraintOut, - additionalData, nContacts, - (b3SolverBase::ConstraintCfg&) csCfg ); - clFinish(m_data->m_queue); - } - - - if (1) - { - int numIter = 4; - - m_data->m_solverGPU->m_nIterations = numIter;//10 - if (!gCpuSolveConstraint) - { - B3_PROFILE("GPU solveContactConstraint"); - - /*m_data->m_solverGPU->solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU,0, - nContactOut , - maxNumBatches); - */ - - //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); - - if (gUseLargeBatches) - { - solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU,0, - nContactOut , - maxNumBatches,numIter,&m_data->m_batchSizes); - } else - { - solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU,0, - nContactOut , - maxNumBatches,numIter,&m_data->m_batchSizes);//m_data->m_batchSizesGpu); - } - } - else - { - B3_PROFILE("Host solveContactConstraint"); - - m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU,0, nContactOut ,maxNumBatches,&m_data->m_batchSizes); - } - - - } - - -#if 0 - if (0) - { - B3_PROFILE("read body velocities back to CPU"); - //read body updated linear/angular velocities back to CPU - m_data->m_bodyBufferGPU->read( - m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); - adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); - } -#endif - - } - -} - - -void b3GpuPgsContactSolver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx ) -{ -} - - - - - - - - - - - -b3AlignedObjectArray<unsigned int> idxBuffer; -b3AlignedObjectArray<b3SortData> sortData; -b3AlignedObjectArray<b3Contact4> old; - - -inline int b3GpuPgsContactSolver::sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies) -{ - - B3_PROFILE("sortConstraintByBatch"); - int numIter = 0; - - sortData.resize(n); - idxBuffer.resize(n); - old.resize(n); - - unsigned int* idxSrc = &idxBuffer[0]; - unsigned int* idxDst = &idxBuffer[0]; - int nIdxSrc, nIdxDst; - - const int N_FLG = 256; - const int FLG_MASK = N_FLG-1; - unsigned int flg[N_FLG/32]; -#if defined(_DEBUG) - for(int i=0; i<n; i++) - cs[i].getBatchIdx() = -1; -#endif - for(int i=0; i<n; i++) - idxSrc[i] = i; - nIdxSrc = n; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - while( nIdxSrc ) - { - numIter++; - nIdxDst = 0; - int nCurrentBatch = 0; - - // clear flag - for(int i=0; i<N_FLG/32; i++) flg[i] = 0; - - for(int i=0; i<nIdxSrc; i++) - { - int idx = idxSrc[i]; - - - b3Assert( idx < n ); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - - - - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - - int aIdx = bodyA & FLG_MASK; - int bIdx = bodyB & FLG_MASK; - - unsigned int aUnavailable = flg[ aIdx/32 ] & (1<<(aIdx&31)); - unsigned int bUnavailable = flg[ bIdx/32 ] & (1<<(bIdx&31)); - - bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; - bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; - - //use inv_mass! - aUnavailable = !aIsStatic? aUnavailable:0;// - bUnavailable = !bIsStatic? bUnavailable:0; - - if( aUnavailable==0 && bUnavailable==0 ) // ok - { - if (!aIsStatic) - flg[ aIdx/32 ] |= (1<<(aIdx&31)); - if (!bIsStatic) - flg[ bIdx/32 ] |= (1<<(bIdx&31)); - - cs[idx].getBatchIdx() = batchIdx; - sortData[idx].m_key = batchIdx; - sortData[idx].m_value = idx; - - { - nCurrentBatch++; - if( nCurrentBatch == simdWidth ) - { - nCurrentBatch = 0; - for(int i=0; i<N_FLG/32; i++) flg[i] = 0; - } - } - } - else - { - idxDst[nIdxDst++] = idx; - } - } - b3Swap( idxSrc, idxDst ); - b3Swap( nIdxSrc, nIdxDst ); - batchIdx ++; - } - } - { - B3_PROFILE("quickSort"); - sortData.quickSort(sortfnc); - } - - - { - B3_PROFILE("reorder"); - // reorder - - memcpy( &old[0], cs, sizeof(b3Contact4)*n); - for(int i=0; i<n; i++) - { - int idx = sortData[i].m_value; - cs[i] = old[idx]; - } - } - - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for(int i=0; i<n; i++) - { - b3Assert( cs[i].getBatchIdx() != -1 ); - } -#endif - return batchIdx; -} - - -b3AlignedObjectArray<int> bodyUsed2; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch2( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies) -{ - - B3_PROFILE("sortConstraintByBatch2"); - - - - bodyUsed2.resize(2*simdWidth); - - for (int q=0;q<2*simdWidth;q++) - bodyUsed2[q]=0; - - int curBodyUsed = 0; - - int numIter = 0; - - m_data->m_sortData.resize(numConstraints); - m_data->m_idxBuffer.resize(numConstraints); - m_data->m_old.resize(numConstraints); - - unsigned int* idxSrc = &m_data->m_idxBuffer[0]; - -#if defined(_DEBUG) - for(int i=0; i<numConstraints; i++) - cs[i].getBatchIdx() = -1; -#endif - for(int i=0; i<numConstraints; i++) - idxSrc[i] = i; - - int numValidConstraints = 0; -// int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - - { - B3_PROFILE("cpu batch innerloop"); - - while( numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - // clear flag - for(int i=0; i<curBodyUsed; i++) - bodyUsed2[i] = 0; - curBodyUsed = 0; - - for(int i=numValidConstraints; i<numConstraints; i++) - { - int idx = idxSrc[i]; - b3Assert( idx < numConstraints ); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; - bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - for (int j=0;j<curBodyUsed;j++) - { - if (bodyA == bodyUsed2[j]) - { - aUnavailable=1; - break; - } - } - } - if (!aUnavailable) - if (!bIsStatic) - { - for (int j=0;j<curBodyUsed;j++) - { - if (bodyB == bodyUsed2[j]) - { - bUnavailable=1; - break; - } - } - } - - if( aUnavailable==0 && bUnavailable==0 ) // ok - { - if (!aIsStatic) - { - bodyUsed2[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed2[curBodyUsed++] = bodyB; - } - - cs[idx].getBatchIdx() = batchIdx; - m_data->m_sortData[idx].m_key = batchIdx; - m_data->m_sortData[idx].m_value = idx; - - if (i!=numValidConstraints) - { - b3Swap(idxSrc[i], idxSrc[numValidConstraints]); - } - - numValidConstraints++; - { - nCurrentBatch++; - if( nCurrentBatch == simdWidth ) - { - nCurrentBatch = 0; - for(int i=0; i<curBodyUsed; i++) - bodyUsed2[i] = 0; - - - curBodyUsed = 0; - } - } - } - } - - batchIdx ++; - } - } - { - B3_PROFILE("quickSort"); - //m_data->m_sortData.quickSort(sortfnc); - } - - { - B3_PROFILE("reorder"); - // reorder - - memcpy( &m_data->m_old[0], cs, sizeof(b3Contact4)*numConstraints); - - for(int i=0; i<numConstraints; i++) - { - b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]); - int idx = m_data->m_sortData[idxSrc[i]].m_value; - cs[i] = m_data->m_old[idx]; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for(int i=0; i<numConstraints; i++) - { - b3Assert( cs[i].getBatchIdx() != -1 ); - } -#endif - - - return batchIdx; -} - - -b3AlignedObjectArray<int> bodyUsed; -b3AlignedObjectArray<int> curUsed; - - -inline int b3GpuPgsContactSolver::sortConstraintByBatch3( b3Contact4* cs, int numConstraints, int simdWidth , int staticIdx, int numBodies, int* batchSizes) -{ - - B3_PROFILE("sortConstraintByBatch3"); - - static int maxSwaps = 0; - int numSwaps = 0; - - curUsed.resize(2*simdWidth); - - static int maxNumConstraints = 0; - if (maxNumConstraints<numConstraints) - { - maxNumConstraints = numConstraints; - //printf("maxNumConstraints = %d\n",maxNumConstraints ); - } - - int numUsedArray = numBodies/32+1; - bodyUsed.resize(numUsedArray); - - for (int q=0;q<numUsedArray;q++) - bodyUsed[q]=0; - - - int curBodyUsed = 0; - - int numIter = 0; - - m_data->m_sortData.resize(0); - m_data->m_idxBuffer.resize(0); - m_data->m_old.resize(0); - - -#if defined(_DEBUG) - for(int i=0; i<numConstraints; i++) - cs[i].getBatchIdx() = -1; -#endif - - int numValidConstraints = 0; -// int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - - { - B3_PROFILE("cpu batch innerloop"); - - while( numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - batchSizes[batchIdx] = 0; - - // clear flag - for(int i=0; i<curBodyUsed; i++) - bodyUsed[curUsed[i]/32] = 0; - - curBodyUsed = 0; - - for(int i=numValidConstraints; i<numConstraints; i++) - { - int idx = i; - b3Assert( idx < numConstraints ); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; - bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - aUnavailable = bodyUsed[ bodyA/32 ] & (1<<(bodyA&31)); - } - if (!aUnavailable) - if (!bIsStatic) - { - bUnavailable = bodyUsed[ bodyB/32 ] & (1<<(bodyB&31)); - } - - if( aUnavailable==0 && bUnavailable==0 ) // ok - { - if (!aIsStatic) - { - bodyUsed[ bodyA/32 ] |= (1<<(bodyA&31)); - curUsed[curBodyUsed++]=bodyA; - } - if (!bIsStatic) - { - bodyUsed[ bodyB/32 ] |= (1<<(bodyB&31)); - curUsed[curBodyUsed++]=bodyB; - } - - cs[idx].getBatchIdx() = batchIdx; - - if (i!=numValidConstraints) - { - b3Swap(cs[i],cs[numValidConstraints]); - numSwaps++; - } - - numValidConstraints++; - { - nCurrentBatch++; - if( nCurrentBatch == simdWidth ) - { - batchSizes[batchIdx] += simdWidth; - nCurrentBatch = 0; - for(int i=0; i<curBodyUsed; i++) - bodyUsed[curUsed[i]/32] = 0; - curBodyUsed = 0; - } - } - } - } - - if (batchIdx>=B3_MAX_NUM_BATCHES) - { - b3Error("batchIdx>=B3_MAX_NUM_BATCHES"); - b3Assert(0); - break; - } - - batchSizes[batchIdx] += nCurrentBatch; - - batchIdx ++; - - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for(int i=0; i<numConstraints; i++) - { - b3Assert( cs[i].getBatchIdx() != -1 ); - } -#endif - - batchSizes[batchIdx] =0; - - if (maxSwaps<numSwaps) - { - maxSwaps = numSwaps; - //printf("maxSwaps = %d\n", maxSwaps); - } - - return batchIdx; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h deleted file mode 100644 index 98e2a5b8c4..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h +++ /dev/null @@ -1,43 +0,0 @@ - -#ifndef B3_GPU_BATCHING_PGS_SOLVER_H -#define B3_GPU_BATCHING_PGS_SOLVER_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "b3GpuConstraint4.h" - -class b3GpuPgsContactSolver -{ -protected: - - int m_debugOutput; - - struct b3GpuBatchingPgsSolverInternalData* m_data; - - void batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx ); - - inline int sortConstraintByBatch( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies); - inline int sortConstraintByBatch2( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies); - inline int sortConstraintByBatch3( b3Contact4* cs, int n, int simdWidth , int staticIdx, int numBodies, int* batchSizes); - - - - void solveContactConstraintBatchSizes( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes);//const b3OpenCLArray<int>* gpuBatchSizes); - - void solveContactConstraint( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes);//const b3OpenCLArray<int>* gpuBatchSizes); - -public: - - b3GpuPgsContactSolver(cl_context ctx,cl_device_id device, cl_command_queue q,int pairCapacity); - virtual ~b3GpuPgsContactSolver(); - - void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); - -}; - -#endif //B3_GPU_BATCHING_PGS_SOLVER_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp deleted file mode 100644 index 783e443060..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ /dev/null @@ -1,708 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3GpuRigidBodyPipeline.h" -#include "b3GpuRigidBodyPipelineInternalData.h" -#include "kernels/integrateKernel.h" -#include "kernels/updateAabbsKernel.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3GpuNarrowPhase.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" -#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" - -//#define TEST_OTHER_GPU_SOLVER - -#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" -#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" - -bool useBullet2CpuSolver = true; - -//choice of contact solver -bool gUseJacobi = false; -bool gUseDbvt = false; -bool gDumpContactStats = false; -bool gCalcWorldSpaceAabbOnCpu = false; -bool gUseCalculateOverlappingPairsHost = false; -bool gIntegrateOnCpu = false; -bool gClearPairsOnGpu = true; - -#define TEST_OTHER_GPU_SOLVER 1 -#ifdef TEST_OTHER_GPU_SOLVER -#include "b3GpuJacobiContactSolver.h" -#endif //TEST_OTHER_GPU_SOLVER - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h" - -#include "b3GpuPgsContactSolver.h" -#include "b3Solver.h" - -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3OpenCL/Raycast/b3GpuRaycast.h" - - -#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" -#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" - -b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q,class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap , struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config) -{ - m_data = new b3GpuRigidBodyPipelineInternalData; - m_data->m_constraintUid=0; - m_data->m_config = config; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_queue = q; - - m_data->m_solver = new b3PgsJacobiSolver(true);//new b3PgsJacobiSolver(true); - m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx,device,q,true);//new b3PgsJacobiSolver(true); - - m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx,q,config.m_maxConvexBodies); - m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx,q,config.m_maxBroadphasePairs); - - m_data->m_gpuConstraints = new b3OpenCLArray<b3GpuGenericConstraint>(ctx,q); -#ifdef TEST_OTHER_GPU_SOLVER - m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx,device,q,config.m_maxBroadphasePairs); -#endif // TEST_OTHER_GPU_SOLVER - - m_data->m_solver2 = new b3GpuPgsContactSolver(ctx,device,q,config.m_maxBroadphasePairs); - - m_data->m_raycaster = new b3GpuRaycast(ctx,device,q); - - - m_data->m_broadphaseDbvt = broadphaseDbvt; - m_data->m_broadphaseSap = broadphaseSap; - m_data->m_narrowphase = narrowphase; - m_data->m_gravity.setValue(0.f,-9.8f,0.f); - - cl_int errNum=0; - - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,integrateKernelCL,&errNum,"",B3_RIGIDBODY_INTEGRATE_PATH); - b3Assert(errNum==CL_SUCCESS); - m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,integrateKernelCL, "integrateTransformsKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - clReleaseProgram(prog); - } - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,updateAabbsKernelCL,&errNum,"",B3_RIGIDBODY_UPDATEAABB_PATH); - b3Assert(errNum==CL_SUCCESS); - m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "initializeGpuAabbsFull",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - - - m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,updateAabbsKernelCL, "clearOverlappingPairsKernel",&errNum,prog); - b3Assert(errNum==CL_SUCCESS); - - clReleaseProgram(prog); - } - - -} - -b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline() -{ - if (m_data->m_integrateTransformsKernel) - clReleaseKernel(m_data->m_integrateTransformsKernel); - - if (m_data->m_updateAabbsKernel) - clReleaseKernel(m_data->m_updateAabbsKernel); - - if (m_data->m_clearOverlappingPairsKernel) - clReleaseKernel(m_data->m_clearOverlappingPairsKernel); - delete m_data->m_raycaster; - delete m_data->m_solver; - delete m_data->m_allAabbsGPU; - delete m_data->m_gpuConstraints; - delete m_data->m_overlappingPairsGPU; - -#ifdef TEST_OTHER_GPU_SOLVER - delete m_data->m_solver3; -#endif //TEST_OTHER_GPU_SOLVER - - delete m_data->m_solver2; - - - delete m_data; -} - -void b3GpuRigidBodyPipeline::reset() -{ - m_data->m_gpuConstraints->resize(0); - m_data->m_cpuConstraints.resize(0); - m_data->m_allAabbsGPU->resize(0); - m_data->m_allAabbsCPU.resize(0); -} - -void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint) -{ - m_data->m_joints.push_back(constraint); -} - -void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint) -{ - m_data->m_joints.remove(constraint); -} - - - -void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid) -{ - m_data->m_gpuSolver->recomputeBatches(); - //slow linear search - m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); - //remove - for (int i=0;i<m_data->m_cpuConstraints.size();i++) - { - if (m_data->m_cpuConstraints[i].m_uid == uid) - { - //m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]); - m_data->m_cpuConstraints.swap(i,m_data->m_cpuConstraints.size()-1); - m_data->m_cpuConstraints.pop_back(); - - break; - } - } - - if (m_data->m_cpuConstraints.size()) - { - m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints); - } else - { - m_data->m_gpuConstraints->resize(0); - } - -} -int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB,float breakingThreshold) -{ - m_data->m_gpuSolver->recomputeBatches(); - b3GpuGenericConstraint c; - c.m_uid = m_data->m_constraintUid; - m_data->m_constraintUid++; - c.m_flags = B3_CONSTRAINT_FLAG_ENABLED; - c.m_rbA = bodyA; - c.m_rbB = bodyB; - c.m_pivotInA.setValue(pivotInA[0],pivotInA[1],pivotInA[2]); - c.m_pivotInB.setValue(pivotInB[0],pivotInB[1],pivotInB[2]); - c.m_breakingImpulseThreshold = breakingThreshold; - c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE; - m_data->m_cpuConstraints.push_back(c); - return c.m_uid; -} -int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB,float breakingThreshold) -{ - m_data->m_gpuSolver->recomputeBatches(); - b3GpuGenericConstraint c; - c.m_uid = m_data->m_constraintUid; - m_data->m_constraintUid++; - c.m_flags = B3_CONSTRAINT_FLAG_ENABLED; - c.m_rbA = bodyA; - c.m_rbB = bodyB; - c.m_pivotInA.setValue(pivotInA[0],pivotInA[1],pivotInA[2]); - c.m_pivotInB.setValue(pivotInB[0],pivotInB[1],pivotInB[2]); - c.m_relTargetAB.setValue(relTargetAB[0],relTargetAB[1],relTargetAB[2],relTargetAB[3]); - c.m_breakingImpulseThreshold = breakingThreshold; - c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE; - - m_data->m_cpuConstraints.push_back(c); - return c.m_uid; -} - - -void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) -{ - - //update worldspace AABBs from local AABB/worldtransform - { - B3_PROFILE("setupGpuAabbs"); - setupGpuAabbsFull(); - } - - int numPairs =0; - - //compute overlapping pairs - { - - if (gUseDbvt) - { - { - B3_PROFILE("setAabb"); - m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU); - for (int i=0;i<m_data->m_allAabbsCPU.size();i++) - { - b3Vector3 aabbMin=b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0],m_data->m_allAabbsCPU[i].m_min[1],m_data->m_allAabbsCPU[i].m_min[2]); - b3Vector3 aabbMax=b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0],m_data->m_allAabbsCPU[i].m_max[1],m_data->m_allAabbsCPU[i].m_max[2]); - m_data->m_broadphaseDbvt->setAabb(i,aabbMin,aabbMax,0); - } - } - - { - B3_PROFILE("calculateOverlappingPairs"); - m_data->m_broadphaseDbvt->calculateOverlappingPairs(); - } - numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); - - } else - { - if (gUseCalculateOverlappingPairsHost) - { - m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); - } else - { - m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs); - } - numPairs = m_data->m_broadphaseSap->getNumOverlap(); - } - } - - //compute contact points -// printf("numPairs=%d\n",numPairs); - - int numContacts = 0; - - - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - - if (numPairs) - { - cl_mem pairs =0; - cl_mem aabbsWS =0; - if (gUseDbvt) - { - B3_PROFILE("m_overlappingPairsGPU->copyFromHost"); - m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); - pairs = m_data->m_overlappingPairsGPU->getBufferCL(); - aabbsWS = m_data->m_allAabbsGPU->getBufferCL(); - } else - { - pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer(); - aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS(); - } - - m_data->m_overlappingPairsGPU->resize(numPairs); - - //mark the contacts for each pair as 'unused' - if (numPairs) - { - b3OpenCLArray<b3BroadphasePair> gpuPairs(this->m_data->m_context,m_data->m_queue); - gpuPairs.setFromOpenCLBuffer(pairs,numPairs); - - if (gClearPairsOnGpu) - { - - - //b3AlignedObjectArray<b3BroadphasePair> hostPairs;//just for debugging - //gpuPairs.copyToHost(hostPairs); - - b3LauncherCL launcher(m_data->m_queue,m_data->m_clearOverlappingPairsKernel,"clearOverlappingPairsKernel"); - launcher.setBuffer(pairs); - launcher.setConst(numPairs); - launcher.launch1D(numPairs); - - - //gpuPairs.copyToHost(hostPairs); - - - } else - { - b3AlignedObjectArray<b3BroadphasePair> hostPairs; - gpuPairs.copyToHost(hostPairs); - - for (int i=0;i<hostPairs.size();i++) - { - hostPairs[i].z = 0xffffffff; - } - - gpuPairs.copyFromHost(hostPairs); - } - } - - m_data->m_narrowphase->computeContacts(pairs,numPairs,aabbsWS,numBodies); - numContacts = m_data->m_narrowphase->getNumContactsGpu(); - - if (gUseDbvt) - { - ///store the cached information (contact locations in the 'z' component) - B3_PROFILE("m_overlappingPairsGPU->copyToHost"); - m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); - } - if (gDumpContactStats && numContacts) - { - m_data->m_narrowphase->getContactsGpu(); - - printf("numContacts = %d\n", numContacts); - - int totalPoints = 0; - const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU(); - - for (int i=0;i<numContacts;i++) - { - totalPoints += contacts->getNPoints(); - } - printf("totalPoints=%d\n",totalPoints); - - } - } - - - //convert contact points to contact constraints - - //solve constraints - - b3OpenCLArray<b3RigidBodyData> gpuBodies(m_data->m_context,m_data->m_queue,0,true); - gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(),m_data->m_narrowphase->getNumRigidBodies()); - b3OpenCLArray<b3InertiaData> gpuInertias(m_data->m_context,m_data->m_queue,0,true); - gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(),m_data->m_narrowphase->getNumRigidBodies()); - b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context,m_data->m_queue,0,true); - gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(),m_data->m_narrowphase->getNumContactsGpu()); - - int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size(); - if (useBullet2CpuSolver && numJoints) - { - - // b3AlignedObjectArray<b3Contact4> hostContacts; - //gpuContacts.copyToHost(hostContacts); - { - bool useGpu = m_data->m_joints.size()==0; - -// b3Contact4* contacts = numContacts? &hostContacts[0]: 0; - //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints); - if (useGpu) - { - m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(),&gpuBodies,&gpuInertias,numJoints, m_data->m_gpuConstraints); - } else - { - b3AlignedObjectArray<b3RigidBodyData> hostBodies; - gpuBodies.copyToHost(hostBodies); - b3AlignedObjectArray<b3InertiaData> hostInertias; - gpuInertias.copyToHost(hostInertias); - - b3TypedConstraint** joints = numJoints? &m_data->m_joints[0] : 0; - m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(),&hostBodies[0],&hostInertias[0],0,0,numJoints, joints); - gpuBodies.copyFromHost(hostBodies); - } - } - } - - if (numContacts) - { - -#ifdef TEST_OTHER_GPU_SOLVER - - if (gUseJacobi) - { - bool useGpu = true; - if (useGpu) - { - - bool forceHost = false; - if (forceHost) - { - b3AlignedObjectArray<b3RigidBodyData> hostBodies; - b3AlignedObjectArray<b3InertiaData> hostInertias; - b3AlignedObjectArray<b3Contact4> hostContacts; - - { - B3_PROFILE("copyToHost"); - gpuBodies.copyToHost(hostBodies); - gpuInertias.copyToHost(hostInertias); - gpuContacts.copyToHost(hostContacts); - } - - { - b3JacobiSolverInfo solverInfo; - m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(),&hostContacts[0],hostContacts.size(),solverInfo); - - - } - { - B3_PROFILE("copyFromHost"); - gpuBodies.copyFromHost(hostBodies); - } - } else - - - { - int static0Index = m_data->m_narrowphase->getStatic0Index(); - b3JacobiSolverInfo solverInfo; - //m_data->m_solver3->solveContacts( >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo); - //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); - m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),m_data->m_config, static0Index); - } - } else - { - b3AlignedObjectArray<b3RigidBodyData> hostBodies; - gpuBodies.copyToHost(hostBodies); - b3AlignedObjectArray<b3InertiaData> hostInertias; - gpuInertias.copyToHost(hostInertias); - b3AlignedObjectArray<b3Contact4> hostContacts; - gpuContacts.copyToHost(hostContacts); - { - //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); - } - gpuBodies.copyFromHost(hostBodies); - } - - } else -#endif //TEST_OTHER_GPU_SOLVER - { - - int static0Index = m_data->m_narrowphase->getStatic0Index(); - m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(),gpuInertias.getBufferCL(),numContacts, gpuContacts.getBufferCL(),m_data->m_config, static0Index); - - //m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL()); - - - /*m_data->m_solver3->solveContactConstraintHost( - (b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies, - (b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias, - (b3OpenCLArray<Constraint4>*) &gpuContacts, - 0,numContacts,256); - */ - } - } - - integrate(deltaTime); - -} - - -void b3GpuRigidBodyPipeline::integrate(float timeStep) -{ - //integrate - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - float angularDamp = 0.99f; - - if (gIntegrateOnCpu) - { - if(numBodies) - { - b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData(); - npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU); - - b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0); - - for (int nodeID=0;nodeID<numBodies;nodeID++) - { - integrateSingleTransform( bodies,nodeID, timeStep, angularDamp, m_data->m_gravity); - } - npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU); - } - } else - { - b3LauncherCL launcher(m_data->m_queue,m_data->m_integrateTransformsKernel,"m_integrateTransformsKernel"); - launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); - - launcher.setConst(numBodies); - launcher.setConst(timeStep); - launcher.setConst(angularDamp); - launcher.setConst(m_data->m_gravity); - launcher.launch1D(numBodies); - } -} - - - - -void b3GpuRigidBodyPipeline::setupGpuAabbsFull() -{ - cl_int ciErrNum=0; - - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - if (!numBodies) - return; - - if (gCalcWorldSpaceAabbOnCpu) - { - - if (numBodies) - { - if (gUseDbvt) - { - m_data->m_allAabbsCPU.resize(numBodies); - m_data->m_narrowphase->readbackAllBodiesToCpu(); - for (int i=0;i<numBodies;i++) - { - b3ComputeWorldAabb( i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(),&m_data->m_allAabbsCPU[0]); - } - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - } else - { - m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies); - m_data->m_narrowphase->readbackAllBodiesToCpu(); - for (int i=0;i<numBodies;i++) - { - b3ComputeWorldAabb( i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(),&m_data->m_broadphaseSap->getAllAabbsCPU()[0]); - } - m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU()); - //m_data->m_broadphaseSap->writeAabbsToGpu(); - } - } - } else - { - //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) - b3LauncherCL launcher(m_data->m_queue,m_data->m_updateAabbsKernel,"m_updateAabbsKernel"); - launcher.setConst(numBodies); - cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); - launcher.setBuffer(bodies); - cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu(); - launcher.setBuffer(collidables); - cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu(); - launcher.setBuffer(localAabbs); - - cl_mem worldAabbs =0; - if (gUseDbvt) - { - worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); - } else - { - worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS(); - } - launcher.setBuffer(worldAabbs); - launcher.launch1D(numBodies); - - oclCHECKERROR(ciErrNum, CL_SUCCESS); - } - - /* - b3AlignedObjectArray<b3SapAabb> aabbs; - m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs); - - printf("numAabbs = %d\n", aabbs.size()); - - for (int i=0;i<aabbs.size();i++) - { - printf("aabb[%d].m_min=%f,%f,%f,%d\n",i,aabbs[i].m_minVec[0],aabbs[i].m_minVec[1],aabbs[i].m_minVec[2],aabbs[i].m_minIndices[3]); - printf("aabb[%d].m_max=%f,%f,%f,%d\n",i,aabbs[i].m_maxVec[0],aabbs[i].m_maxVec[1],aabbs[i].m_maxVec[2],aabbs[i].m_signedMaxIndices[3]); - - }; - */ - - - - - -} - - - -cl_mem b3GpuRigidBodyPipeline::getBodyBuffer() -{ - return m_data->m_narrowphase->getBodiesGpu(); -} - -int b3GpuRigidBodyPipeline::getNumBodies() const -{ - return m_data->m_narrowphase->getNumRigidBodies(); -} - -void b3GpuRigidBodyPipeline::setGravity(const float* grav) -{ - m_data->m_gravity.setValue(grav[0],grav[1],grav[2]); -} - -void b3GpuRigidBodyPipeline::copyConstraintsToHost() -{ - m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); -} - -void b3GpuRigidBodyPipeline::writeAllInstancesToGpu() -{ - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints); -} - - -int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu) -{ - - b3Vector3 aabbMin=b3MakeVector3(0,0,0),aabbMax=b3MakeVector3(0,0,0); - - - if (collidableIndex>=0) - { - b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex); - b3Vector3 localAabbMin=b3MakeVector3(localAabb.m_min[0],localAabb.m_min[1],localAabb.m_min[2]); - b3Vector3 localAabbMax=b3MakeVector3(localAabb.m_max[0],localAabb.m_max[1],localAabb.m_max[2]); - - b3Scalar margin = 0.01f; - b3Transform t; - t.setIdentity(); - t.setOrigin(b3MakeVector3(position[0],position[1],position[2])); - t.setRotation(b3Quaternion(orientation[0],orientation[1],orientation[2],orientation[3])); - b3TransformAabb(localAabbMin,localAabbMax, margin,t,aabbMin,aabbMax); - } else - { - b3Error("registerPhysicsInstance using invalid collidableIndex\n"); - return -1; - } - - - bool writeToGpu = false; - int bodyIndex = m_data->m_narrowphase->getNumRigidBodies(); - bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex,mass,position,orientation,&aabbMin.getX(),&aabbMax.getX(),writeToGpu); - - if (bodyIndex>=0) - { - if (gUseDbvt) - { - m_data->m_broadphaseDbvt->createProxy(aabbMin,aabbMax,bodyIndex,0,1,1); - b3SapAabb aabb; - for (int i=0;i<3;i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - aabb.m_minIndices[3] = bodyIndex; - } - m_data->m_allAabbsCPU.push_back(aabb); - if (writeInstanceToGpu) - { - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - } - } else - { - if (mass) - { - m_data->m_broadphaseSap->createProxy(aabbMin,aabbMax,bodyIndex,1,1);//m_dispatcher); - } else - { - m_data->m_broadphaseSap->createLargeProxy(aabbMin,aabbMax,bodyIndex,1,1);//m_dispatcher); - } - } - } - - /* - if (mass>0.f) - m_numDynamicPhysicsInstances++; - - m_numPhysicsInstances++; - */ - - return bodyIndex; -} - -void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults) -{ - this->m_data->m_raycaster->castRays(rays,hitResults, - getNumBodies(),this->m_data->m_narrowphase->getBodiesCpu(), - m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), - m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap); -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h deleted file mode 100644 index b4eac6841a..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h +++ /dev/null @@ -1,74 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_RIGIDBODY_PIPELINE_H -#define B3_GPU_RIGIDBODY_PIPELINE_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -class b3GpuRigidBodyPipeline -{ -protected: - struct b3GpuRigidBodyPipelineInternalData* m_data; - - int allocateCollidable(); - -public: - - - b3GpuRigidBodyPipeline(cl_context ctx,cl_device_id device, cl_command_queue q , class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config); - virtual ~b3GpuRigidBodyPipeline(); - - void stepSimulation(float deltaTime); - void integrate(float timeStep); - void setupGpuAabbsFull(); - - int registerConvexPolyhedron(class b3ConvexUtility* convex); - - //int registerConvexPolyhedron(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - //int registerSphereShape(float radius); - //int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - //int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, const float* scaling); - //int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes); - - - int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData, bool writeInstanceToGpu); - //if you passed "writeInstanceToGpu" false in the registerPhysicsInstance method (for performance) you need to call writeAllInstancesToGpu after all instances are registered - void writeAllInstancesToGpu(); - void copyConstraintsToHost(); - void setGravity(const float* grav); - void reset(); - - int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB,float breakingThreshold); - int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold); - void removeConstraintByUid(int uid); - - void addConstraint(class b3TypedConstraint* constraint); - void removeConstraint(b3TypedConstraint* constraint); - - void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults); - - cl_mem getBodyBuffer(); - - int getNumBodies() const; - -}; - -#endif //B3_GPU_RIGIDBODY_PIPELINE_H
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h deleted file mode 100644 index 5ac92f97d6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H -#define B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - - -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - - - -#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h" -#include "Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h" - -struct b3GpuRigidBodyPipelineInternalData -{ - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - cl_kernel m_integrateTransformsKernel; - cl_kernel m_updateAabbsKernel; - cl_kernel m_clearOverlappingPairsKernel; - - class b3PgsJacobiSolver* m_solver; - - class b3GpuPgsConstraintSolver* m_gpuSolver; - - class b3GpuPgsContactSolver* m_solver2; - class b3GpuJacobiContactSolver* m_solver3; - class b3GpuRaycast* m_raycaster; - - class b3GpuBroadphaseInterface* m_broadphaseSap; - - struct b3DynamicBvhBroadphase* m_broadphaseDbvt; - b3OpenCLArray<b3SapAabb>* m_allAabbsGPU; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; - b3OpenCLArray<b3BroadphasePair>* m_overlappingPairsGPU; - - b3OpenCLArray<b3GpuGenericConstraint>* m_gpuConstraints; - b3AlignedObjectArray<b3GpuGenericConstraint> m_cpuConstraints; - - b3AlignedObjectArray<b3TypedConstraint*> m_joints; - int m_constraintUid; - class b3GpuNarrowPhase* m_narrowphase; - b3Vector3 m_gravity; - - b3Config m_config; -}; - -#endif //B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h deleted file mode 100644 index f2a61801ac..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h +++ /dev/null @@ -1,228 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - - -#ifndef B3_GPU_SOLVER_BODY_H -#define B3_GPU_SOLVER_BODY_H - - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3TransformUtil.h" - -///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision -#ifdef B3_USE_SSE -#define USE_SIMD 1 -#endif // - - - -///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance. -B3_ATTRIBUTE_ALIGNED16 (struct) b3GpuSolverBody -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); -// b3Transform m_worldTransformUnused; - b3Vector3 m_deltaLinearVelocity; - b3Vector3 m_deltaAngularVelocity; - b3Vector3 m_angularFactor; - b3Vector3 m_linearFactor; - b3Vector3 m_invMass; - b3Vector3 m_pushVelocity; - b3Vector3 m_turnVelocity; - b3Vector3 m_linearVelocity; - b3Vector3 m_angularVelocity; - - union - { - void* m_originalBody; - int m_originalBodyIndex; - }; - - int padding[3]; - - /* - void setWorldTransform(const b3Transform& worldTransform) - { - m_worldTransform = worldTransform; - } - - const b3Transform& getWorldTransform() const - { - return m_worldTransform; - } - */ - B3_FORCE_INLINE void getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity ) const - { - if (m_originalBody) - velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos); - else - velocity.setValue(0,0,0); - } - - B3_FORCE_INLINE void getAngularVelocity(b3Vector3& angVel) const - { - if (m_originalBody) - angVel =m_angularVelocity+m_deltaAngularVelocity; - else - angVel.setValue(0,0,0); - } - - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,const b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor; - m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor); - } - } - - B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_pushVelocity += linearComponent*impulseMagnitude*m_linearFactor; - m_turnVelocity += angularComponent*(impulseMagnitude*m_angularFactor); - } - } - - - - const b3Vector3& getDeltaLinearVelocity() const - { - return m_deltaLinearVelocity; - } - - const b3Vector3& getDeltaAngularVelocity() const - { - return m_deltaAngularVelocity; - } - - const b3Vector3& getPushVelocity() const - { - return m_pushVelocity; - } - - const b3Vector3& getTurnVelocity() const - { - return m_turnVelocity; - } - - - //////////////////////////////////////////////// - ///some internal methods, don't use them - - b3Vector3& internalGetDeltaLinearVelocity() - { - return m_deltaLinearVelocity; - } - - b3Vector3& internalGetDeltaAngularVelocity() - { - return m_deltaAngularVelocity; - } - - const b3Vector3& internalGetAngularFactor() const - { - return m_angularFactor; - } - - const b3Vector3& internalGetInvMass() const - { - return m_invMass; - } - - void internalSetInvMass(const b3Vector3& invMass) - { - m_invMass = invMass; - } - - b3Vector3& internalGetPushVelocity() - { - return m_pushVelocity; - } - - b3Vector3& internalGetTurnVelocity() - { - return m_turnVelocity; - } - - B3_FORCE_INLINE void internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity ) const - { - velocity = m_linearVelocity+m_deltaLinearVelocity + (m_angularVelocity+m_deltaAngularVelocity).cross(rel_pos); - } - - B3_FORCE_INLINE void internalGetAngularVelocity(b3Vector3& angVel) const - { - angVel = m_angularVelocity+m_deltaAngularVelocity; - } - - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent,const b3Scalar impulseMagnitude) - { - //if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent*impulseMagnitude*m_linearFactor; - m_deltaAngularVelocity += angularComponent*(impulseMagnitude*m_angularFactor); - } - } - - - - - void writebackVelocity() - { - //if (m_originalBody>=0) - { - m_linearVelocity +=m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //m_originalBody->setCompanionId(-1); - } - } - - - void writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp) - { - (void) timeStep; - if (m_originalBody) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //correct the position/orientation based on push/turn recovery - b3Transform newTransform; - if (m_pushVelocity[0]!=0.f || m_pushVelocity[1]!=0 || m_pushVelocity[2]!=0 || m_turnVelocity[0]!=0.f || m_turnVelocity[1]!=0 || m_turnVelocity[2]!=0) - { - // b3Quaternion orn = m_worldTransform.getRotation(); -// b3TransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform); -// m_worldTransform = newTransform; - } - //m_worldTransform.setRotation(orn); - //m_originalBody->setCompanionId(-1); - } - } - - - -}; - -#endif //B3_SOLVER_BODY_H - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h deleted file mode 100644 index 60d235baab..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h +++ /dev/null @@ -1,82 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2013 Erwin Coumans http://github.com/erwincoumans/bullet3 - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - - -#ifndef B3_GPU_SOLVER_CONSTRAINT_H -#define B3_GPU_SOLVER_CONSTRAINT_H - - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" -//#include "b3JacobianEntry.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -//#define NO_FRICTION_TANGENTIALS 1 - - - -///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints. -B3_ATTRIBUTE_ALIGNED16 (struct) b3GpuSolverConstraint -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3Vector3 m_relpos1CrossNormal; - b3Vector3 m_contactNormal; - - b3Vector3 m_relpos2CrossNormal; - //b3Vector3 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - b3Vector3 m_angularComponentA; - b3Vector3 m_angularComponentB; - - mutable b3Scalar m_appliedPushImpulse; - mutable b3Scalar m_appliedImpulse; - int m_padding1; - int m_padding2; - b3Scalar m_friction; - b3Scalar m_jacDiagABInv; - b3Scalar m_rhs; - b3Scalar m_cfm; - - b3Scalar m_lowerLimit; - b3Scalar m_upperLimit; - b3Scalar m_rhsPenetration; - union - { - void* m_originalContactPoint; - int m_originalConstraintIndex; - b3Scalar m_unusedPadding4; - }; - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - - - enum b3SolverConstraintType - { - B3_SOLVER_CONTACT_1D = 0, - B3_SOLVER_FRICTION_1D - }; -}; - -typedef b3AlignedObjectArray<b3GpuSolverConstraint> b3GpuConstraintArray; - - -#endif //B3_GPU_SOLVER_CONSTRAINT_H - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3Solver.cpp b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3Solver.cpp deleted file mode 100644 index 20bf6d47c5..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ /dev/null @@ -1,1225 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#include "b3Solver.h" - -///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments -bool useNewBatchingKernel = true; -bool gConvertConstraintOnCpu = false; - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" - -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" - -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Common/b3Vector3.h" - -struct SolverDebugInfo -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - int m_valInt4; - int m_valInt5; - int m_valInt6; - int m_valInt7; - - int m_valInt8; - int m_valInt9; - int m_valInt10; - int m_valInt11; - - int m_valInt12; - int m_valInt13; - int m_valInt14; - int m_valInt15; - - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -}; - - - - -class SolverDeviceInl -{ -public: - struct ParallelSolveData - { - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - }; -}; - - - -b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : - m_context(ctx), - m_device(device), - m_queue(queue), - m_batchSizes(ctx,queue), - m_nIterations(4) -{ - m_sort32 = new b3RadixSort32CL(ctx,device,queue); - m_scan = new b3PrefixScanCL(ctx,device,queue,B3_SOLVER_N_CELLS); - m_search = new b3BoundSearchCL(ctx,device,queue,B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 ); - - m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,queue,sortSize); - m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx,queue); - - m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,queue,B3_SOLVER_N_CELLS ); - m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_offsets = new b3OpenCLArray<unsigned int>( ctx,queue,B3_SOLVER_N_CELLS); - m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; -// const char* srcFileNameForCaching=""; - - - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - - - { - - cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - b3Assert(solverSetup2Prog); - - - cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - - m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros ); - b3Assert(m_solveFrictionKernel); - - m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros ); - b3Assert(m_solveContactKernel); - - m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros ); - b3Assert(m_contactToConstraintKernel); - - m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_setSortDataKernel); - - m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_reorderContactKernel); - - - m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros ); - b3Assert(m_copyConstraintKernel); - - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH); - //cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true); - b3Assert(batchingProg); - - m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros ); - b3Assert(m_batchingKernel); - } - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros ); - //m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros ); - b3Assert(m_batchingKernelNew); - } -} - -b3Solver::~b3Solver() -{ - delete m_offsets; - delete m_numConstraints; - delete m_sortDataBuffer; - delete m_contactBuffer2; - - delete m_sort32; - delete m_scan; - delete m_search; - - - clReleaseKernel(m_batchingKernel); - clReleaseKernel(m_batchingKernelNew); - - clReleaseKernel( m_solveContactKernel); - clReleaseKernel( m_solveFrictionKernel); - - clReleaseKernel( m_contactToConstraintKernel); - clReleaseKernel( m_setSortDataKernel); - clReleaseKernel( m_reorderContactKernel); - clReleaseKernel( m_copyConstraintKernel); - -} - - - - -template<bool JACOBI> -static -__inline -void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - - b3Vector3 dLinVelA; dLinVelA.setZero(); - b3Vector3 dAngVelA; dAngVelA.setZero(); - b3Vector3 dLinVelB; dLinVelB.setZero(); - b3Vector3 dAngVelB; dAngVelB.setZero(); - - for(int ic=0; ic<4; ic++) - { - // dont necessary because this makes change to 0 - if( cs.m_jacCoeffInv[ic] == 0.f ) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, &linear, &angular0, &angular1 ); - - float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max( updated, minRambdaDt[ic] ); - updated = b3Min( updated, maxRambdaDt[ic] ); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if( JACOBI ) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - else - { - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - } - } - - if( JACOBI ) - { - linVelA += dLinVelA; - angVelA += dAngVelA; - linVelB += dLinVelB; - angVelB += dAngVelB; - } - -} - - - - - - static - __inline - void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) - { - - if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1 (n, tangent[0],tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0]-center; - tangent[0] = cross3( n, r ); - tangent[1] = cross3( tangent[0], n ); - tangent[0] = normalize3( tangent[0] ); - tangent[1] = normalize3( tangent[1] ); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max( updated, minRambdaDt[i] ); - updated = b3Min( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - - { // angular damping for point constraint - b3Vector3 ab = ( posB - posA ).normalized(); - b3Vector3 ac = ( center - posA ).normalized(); - if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot( n, angVelA ); - float angNB = b3Dot( n, angVelB ); - - angVelA -= (angNA*0.1f)*n; - angVelB -= (angNB*0.1f)*n; - } - } - - } -/* - b3AlignedObjectArray<b3RigidBodyData>& m_bodies; - b3AlignedObjectArray<b3InertiaData>& m_shapes; - b3AlignedObjectArray<b3GpuConstraint4>& m_constraints; - b3AlignedObjectArray<int>* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; - */ - -struct SolveTask// : public ThreadPool::Task -{ - SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints, - int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex) - : m_bodies( bodies ), m_shapes( shapes ), - m_constraints( constraints ), - m_batchSizes(batchSizes), - m_cellIndex(cellIndex), - m_curWgidx(curWgidx), - m_start( start ), - m_nConstraints( nConstraints ), - m_solveFriction( true ), - m_maxNumBatches(maxNumBatches) - {} - - unsigned short int getType(){ return 0; } - - void run(int tIdx) - { - int offset = 0; - for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++) - { - int numInBatch = m_batchSizes->at(m_cellIndex*B3_MAX_NUM_BATCHES+ii); - if (!numInBatch) - break; - - for (int jj=0;jj<numInBatch;jj++) - { - int i = m_start + offset+jj; - int batchId = m_constraints[i].m_batchIdx; - b3Assert(batchId==ii); - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; -// int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if( !m_solveFriction ) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - } - else - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - - } - } - offset+=numInBatch; - - - } -/* for (int bb=0;bb<m_maxNumBatches;bb++) - { - //for(int ic=m_nConstraints-1; ic>=0; ic--) - for(int ic=0; ic<m_nConstraints; ic++) - { - - int i = m_start + ic; - if (m_constraints[i].m_batchIdx != bb) - continue; - - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; - int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if( !m_solveFriction ) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - } - else - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - - } - } - } - */ - - - - } - - b3AlignedObjectArray<b3RigidBodyData>& m_bodies; - b3AlignedObjectArray<b3InertiaData>& m_shapes; - b3AlignedObjectArray<b3GpuConstraint4>& m_constraints; - b3AlignedObjectArray<int>* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; -}; - - -void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,b3AlignedObjectArray<int>* batchSizes) -{ - -#if 0 - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; - for (int z=0;z<4;z++) - { - for (int y=0;y<4;y++) - { - for (int x=0;x<4;x++) - { - int newIndex = (x+y*nSplitX+z*nSplitX*nSplitY); - // printf("newIndex=%d\n",newIndex); - - int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - // printf("newIndex=%d\n",newIndex); - } - } - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) - { - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) - { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - - /*int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - */ - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - // printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - } - } - } -#endif - - b3AlignedObjectArray<b3RigidBodyData> bodyNative; - bodyBuf->copyToHost(bodyNative); - b3AlignedObjectArray<b3InertiaData> shapeNative; - shapeBuf->copyToHost(shapeNative); - b3AlignedObjectArray<b3GpuConstraint4> constraintNative; - constraint->copyToHost(constraintNative); - - b3AlignedObjectArray<unsigned int> numConstraintsHost; - m_numConstraints->copyToHost(numConstraintsHost); - - //printf("------------------------\n"); - b3AlignedObjectArray<unsigned int> offsetsHost; - m_offsets->copyToHost(offsetsHost); - static int frame=0; - bool useBatches=true; - if (useBatches) - { - for(int iter=0; iter<m_nIterations; iter++) - { - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) - { - - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; - //printf("cell Batch %d\n",cellBatch); - b3AlignedObjectArray<int> usedBodies[B3_SOLVER_N_CELLS]; - for (int i=0;i<B3_SOLVER_N_CELLS;i++) - { - usedBodies[i].resize(0); - } - - - - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) - { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - - - if( numConstraintsHost[cellIdx] == 0 ) - continue; - - //printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - //printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]); - if (zIdx) - { - //printf("?\n"); - } - - if (iter==0) - { - //printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx); - //printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]); - } - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx,batchSizes,cellIdx); - task.m_solveFriction = false; - task.run(0); - - } - } - } - - for(int iter=0; iter<m_nIterations; iter++) - { - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; - - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) - { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - - if( numConstraintsHost[cellIdx] == 0 ) - continue; - - //printf("yIdx=%d\n",yIdx); - - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0,batchSizes,cellIdx); - task.m_solveFriction = true; - task.run(0); - - } - } - } - - - } else - { - for(int iter=0; iter<m_nIterations; iter++) - { - SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0); - task.m_solveFriction = false; - task.run(0); - } - - for(int iter=0; iter<m_nIterations; iter++) - { - SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0); - task.m_solveFriction = true; - task.run(0); - } - } - - bodyBuf->copyFromHost(bodyNative); - shapeBuf->copyFromHost(shapeNative); - constraint->copyFromHost(constraintNative); - frame++; - -} - -void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, - b3OpenCLArray<unsigned int>* m_numConstraints, - b3OpenCLArray<unsigned int>* m_offsets, - int batchId - ) -{ -// b3BufferInfoCL( m_numConstraints->getBufferCL() ), -// b3BufferInfoCL( m_offsets->getBufferCL() ) - - int cellBatch = batchId; - const int nn = B3_SOLVER_N_CELLS; -// int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; - - b3AlignedObjectArray<unsigned int> gN; - m_numConstraints->copyToHost(gN); - b3AlignedObjectArray<unsigned int> gOffsets; - m_offsets->copyToHost(gOffsets); - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - -// int bIdx = batchId; - - b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints; - constraint->copyToHost(cpuConstraints); - - printf("batch = %d\n", batchId); - - int numWorkgroups = nn/B3_SOLVER_N_BATCHES; - b3AlignedObjectArray<int> usedBodies; - - - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) - { - printf("wgIdx = %d ", wgIdx); - - int zIdx = (wgIdx/((nSplitX*nSplitY))/2)*2+((cellBatch&4)>>2); - int remain = wgIdx%((nSplitX*nSplitY)); - int yIdx = (remain%(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain/(nSplitX/2))*2 + (cellBatch&1); - - - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - printf("cellIdx=%d\n",cellIdx); - if( gN[cellIdx] == 0 ) - continue; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - for (int c=start;c<end;c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - //printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB); - if (usedBodies.findLinearSearch(constraint.m_bodyA)< usedBodies.size()) - { - printf("error?\n"); - } - if (usedBodies.findLinearSearch(constraint.m_bodyB)< usedBodies.size()) - { - printf("error?\n"); - } - } - - for (int c=start;c<end;c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - usedBodies.push_back(constraint.m_bodyA); - usedBodies.push_back(constraint.m_bodyB); - } - - } -} - -static bool verify=false; - -void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches) -{ - - - b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 ); - { - - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches;//250; - - - int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); -#endif - - - - { - - B3_PROFILE("m_batchSolveKernel iterations"); - for(int iter=0; iter<m_nIterations; iter++) - { - for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++) - { - - if (verify) - { - checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib); - } - -#ifdef DEBUG_ME - memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); - gpuDebugInfo.write(debugInfo,numWorkItems); -#endif - - - cdata.z = ib; - - - b3LauncherCL launcher( m_queue, m_solveContactKernel ,"m_solveContactKernel"); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL( bodyBuf->getBufferCL() ), - b3BufferInfoCL( shapeBuf->getBufferCL() ), - b3BufferInfoCL( constraint->getBufferCL() ), - b3BufferInfoCL( m_numConstraints->getBufferCL() ), - b3BufferInfoCL( m_offsets->getBufferCL() ) -#ifdef DEBUG_ME - , b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - - - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - //launcher.setConst( cdata.x ); - launcher.setConst( cdata.y ); - launcher.setConst( cdata.z ); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst( nSplit ); - launcher.launch1D( numWorkItems, 64 ); - - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName,"rb"); - if (f) - { - int sizeInBytes=0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*) malloc(sizeInBytes); - fread(buf,sizeInBytes,1,f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D( num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo,numWorkItems); - clFinish(m_queue); - for (int i=0;i<numWorkItems;i++) - { - if (debugInfo[i].m_valInt2>0) - { - printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3>0) - { - printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - - - } - } - - clFinish(m_queue); - - - } - - cdata.x = 1; - bool applyFriction=true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for(int iter=0; iter<m_nIterations; iter++) - { - for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( bodyBuf->getBufferCL() ), - b3BufferInfoCL( shapeBuf->getBufferCL() ), - b3BufferInfoCL( constraint->getBufferCL() ), - b3BufferInfoCL( m_numConstraints->getBufferCL() ), - b3BufferInfoCL( m_offsets->getBufferCL() ) -#ifdef DEBUG_ME - ,b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher( m_queue, m_solveFrictionKernel,"m_solveFrictionKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - //launcher.setConst( cdata.x ); - launcher.setConst( cdata.y ); - launcher.setConst( cdata.z ); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst( nSplit ); - - launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 ); - } - } - clFinish(m_queue); - - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } - - -} - -void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg ) -{ -// b3OpenCLArray<b3GpuConstraint4>* constraintNative =0; - contactCOut->resize(nContacts); - struct CB - { - int m_nContacts; - float m_dt; - float m_positionDrift; - float m_positionConstraintCoeff; - }; - - { - - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_dt = cfg.m_dt; - cdata.m_positionDrift = cfg.m_positionDrift; - cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - - - if (gConvertConstraintOnCpu) - { - b3AlignedObjectArray<b3RigidBodyData> gBodies; - bodyBuf->copyToHost(gBodies); - - b3AlignedObjectArray<b3Contact4> gContact; - contactsIn->copyToHost(gContact); - - b3AlignedObjectArray<b3InertiaData> gShapes; - shapeBuf->copyToHost(gShapes); - - b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut; - gConstraintOut.resize(nContacts); - - B3_PROFILE("cpu contactToConstraintKernel"); - for (int gIdx=0;gIdx<nContacts;gIdx++) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Float4 posA = gBodies[aIdx].m_pos; - b3Float4 linVelA = gBodies[aIdx].m_linVel; - b3Float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; - - b3Float4 posB = gBodies[bIdx].m_pos; - b3Float4 linVelB = gBodies[bIdx].m_linVel; - b3Float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - - b3ContactConstraint4_t cs; - - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff, - &cs ); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = (b3GpuConstraint4&)cs; - } - - contactCOut->copyFromHost(gConstraintOut); - - } else - { - B3_PROFILE("gpu m_contactToConstraintKernel"); - - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), - b3BufferInfoCL( contactCOut->getBufferCL() )}; - b3LauncherCL launcher( m_queue, m_contactToConstraintKernel,"m_contactToConstraintKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - //launcher.setConst( cdata ); - - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_dt); - launcher.setConst(cdata.m_positionDrift); - launcher.setConst(cdata.m_positionConstraintCoeff); - - launcher.launch1D( nContacts, 64 ); - clFinish(m_queue); - - } - } - - -} - -/* -void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactsIn, void* additionalData, - int nContacts, const b3Solver::ConstraintCfg& cfg ) -{ - - - - const int sortAlignment = 512; // todo. get this out of sort - if( cfg.m_enableParallelSolve ) - { - - - int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); - - b3OpenCLArray<unsigned int>* countsNative = m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost ); - b3OpenCLArray<unsigned int>* offsetsNative = m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost ); - - { // 2. set cell idx - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; - }; - - b3Assert( sortSize%64 == 0 ); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = cfg.m_staticIdx; - cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent); - cdata.m_nSplit = B3_SOLVER_N_SPLIT; - - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_setSortDataKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( sortSize, 64 ); - } - - { // 3. sort by cell idx - int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - m_sort32->execute(*m_sortDataBuffer,sortSize); - } - { // 4. find entries - m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, b3BoundSearchCL::COUNT); - - m_scan->execute( *countsNative, *offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT ); - } - - { // 5. sort constraints by cellIdx - // todo. preallocate this -// b3Assert( contactsIn->getType() == TYPE_HOST ); -// b3OpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer - - { - - - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_reorderContactKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - } -// BufferUtils::unmap<true>( out, contactsIn, nContacts ); - } - } - - -} - -*/ -void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx ) -{ - - int numWorkItems = 64*B3_SOLVER_N_CELLS; - { - B3_PROFILE("batch generation"); - - b3Int4 cdata; - cdata.x = nContacts; - cdata.y = 0; - cdata.z = staticIdx; - - -#ifdef BATCH_DEBUG - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); - memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); - gpuDebugInfo.write(debugInfo,numWorkItems); -#endif - - - -#if 0 - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( contacts->getBufferCL() ), - b3BufferInfoCL( m_contactBuffer2->getBufferCL()), - b3BufferInfoCL( nNative->getBufferCL() ), - b3BufferInfoCL( offsetsNative->getBufferCL() ), -#ifdef BATCH_DEBUG - , b3BufferInfoCL(&gpuDebugInfo) -#endif - }; -#endif - - - - { - m_batchSizes.resize(nNative->size()); - B3_PROFILE("batchingKernel"); - //b3LauncherCL launcher( m_queue, m_batchingKernel); - cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; - - b3LauncherCL launcher( m_queue, k,"*batchingKernel"); - if (!useNewBatchingKernel ) - { - launcher.setBuffer( contacts->getBufferCL() ); - } - launcher.setBuffer( m_contactBuffer2->getBufferCL() ); - launcher.setBuffer( nNative->getBufferCL()); - launcher.setBuffer( offsetsNative->getBufferCL()); - - launcher.setBuffer(m_batchSizes.getBufferCL()); - - - //launcher.setConst( cdata ); - launcher.setConst(staticIdx); - - launcher.launch1D( numWorkItems, 64 ); - //clFinish(m_queue); - //b3AlignedObjectArray<int> batchSizesCPU; - //m_batchSizes.copyToHost(batchSizesCPU); - //printf(".\n"); - } - -#ifdef BATCH_DEBUG - aaaa - b3Contact4* hostContacts = new b3Contact4[nContacts]; - m_contactBuffer->read(hostContacts,nContacts); - clFinish(m_queue); - - gpuDebugInfo.read(debugInfo,numWorkItems); - clFinish(m_queue); - - for (int i=0;i<numWorkItems;i++) - { - if (debugInfo[i].m_valInt1>0) - { - printf("catch\n"); - } - if (debugInfo[i].m_valInt2>0) - { - printf("catch22\n"); - } - - if (debugInfo[i].m_valInt3>0) - { - printf("catch666\n"); - } - - if (debugInfo[i].m_valInt4>0) - { - printf("catch777\n"); - } - } - delete[] debugInfo; -#endif //BATCH_DEBUG - - } - -// copy buffer to buffer - //b3Assert(m_contactBuffer->size()==nContacts); - //contacts->copyFromOpenCLArray( *m_contactBuffer); - //clFinish(m_queue);//needed? - - - -} - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3Solver.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3Solver.h deleted file mode 100644 index b37f2f1bec..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/b3Solver.h +++ /dev/null @@ -1,126 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#ifndef __ADL_SOLVER_H -#define __ADL_SOLVER_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "b3GpuConstraint4.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" - - -#define B3NEXTMULTIPLEOF(num, alignment) (((num)/(alignment) + (((num)%(alignment)==0)?0:1))*(alignment)) - -enum -{ - B3_SOLVER_N_SPLIT_X = 8,//16,//4, - B3_SOLVER_N_SPLIT_Y = 4,//16,//4, - B3_SOLVER_N_SPLIT_Z = 8,//, - B3_SOLVER_N_CELLS = B3_SOLVER_N_SPLIT_X*B3_SOLVER_N_SPLIT_Y*B3_SOLVER_N_SPLIT_Z, - B3_SOLVER_N_BATCHES = 8,//4,//8,//4, - B3_MAX_NUM_BATCHES = 128, -}; - -class b3SolverBase -{ - public: - - - struct ConstraintCfg - { - ConstraintCfg( float dt = 0.f ): m_positionDrift( 0.005f ), m_positionConstraintCoeff( 0.2f ), m_dt(dt), m_staticIdx(-1) {} - - float m_positionDrift; - float m_positionConstraintCoeff; - float m_dt; - bool m_enableParallelSolve; - float m_batchCellSize; - int m_staticIdx; - }; - -}; - -class b3Solver : public b3SolverBase -{ - public: - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - b3OpenCLArray<int> m_batchSizes; - - - int m_nIterations; - cl_kernel m_batchingKernel; - cl_kernel m_batchingKernelNew; - cl_kernel m_solveContactKernel; - cl_kernel m_solveFrictionKernel; - cl_kernel m_contactToConstraintKernel; - cl_kernel m_setSortDataKernel; - cl_kernel m_reorderContactKernel; - cl_kernel m_copyConstraintKernel; - - class b3RadixSort32CL* m_sort32; - class b3BoundSearchCL* m_search; - class b3PrefixScanCL* m_scan; - - b3OpenCLArray<b3SortData>* m_sortDataBuffer; - b3OpenCLArray<b3Contact4>* m_contactBuffer2; - - enum - { - DYNAMIC_CONTACT_ALLOCATION_THRESHOLD = 2000000, - }; - - - - - b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); - - virtual ~b3Solver(); - - void solveContactConstraint( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* inertiaBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches); - - void solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches, b3AlignedObjectArray<int>* batchSizes); - - - void convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg ); - - void batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx ); - -}; - - - - -#endif //__ADL_SOLVER_H diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl deleted file mode 100644 index 3b891b863d..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl +++ /dev/null @@ -1,353 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -#define WG_SIZE 64 - - - - - -typedef struct -{ - int m_n; - int m_start; - int m_staticIdx; - int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_a; - int m_b; - u32 m_idx; -}Elem; - -#define STACK_SIZE (WG_SIZE*10) -//#define STACK_SIZE (WG_SIZE) -#define RING_SIZE 1024 -#define RING_SIZE_MASK (RING_SIZE-1) -#define CHECK_SIZE (WG_SIZE) - - -#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd) -#define RING_END ldsTmp - -u32 readBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - return buff[bufIdx] & (1<<bitIdx); -} - -void writeBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; -// buff[bufIdx] |= (1<<bitIdx); - atom_or( &buff[bufIdx], (1<<bitIdx) ); -} - -u32 tryWrite(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) ); - return ((ans >> bitIdx)&1) == 0; -} - -// batching on the GPU -__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut, - __global const u32* gN, __global const u32* gStart, __global int* batchSizes, - int m_staticIdx ) -{ - __local u32 ldsStackIdx[STACK_SIZE]; - __local u32 ldsStackEnd; - __local Elem ldsRingElem[RING_SIZE]; - __local u32 ldsRingEnd; - __local u32 ldsTmp; - __local u32 ldsCheckBuffer[CHECK_SIZE]; - __local u32 ldsFixedBuffer[CHECK_SIZE]; - __local u32 ldsGEnd; - __local u32 ldsDstEnd; - - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - - const int m_n = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - - if( lIdx == 0 ) - { - ldsRingEnd = 0; - ldsGEnd = 0; - ldsStackEnd = 0; - ldsDstEnd = m_start; - } - - - -// while(1) -//was 250 - int ie=0; - int maxBatch = 0; - for(ie=0; ie<50; ie++) - { - ldsFixedBuffer[lIdx] = 0; - - for(int giter=0; giter<4; giter++) - { - int ringCap = GET_RING_CAPACITY; - - // 1. fill ring - if( ldsGEnd < m_n ) - { - while( ringCap > WG_SIZE ) - { - if( ldsGEnd >= m_n ) break; - if( lIdx < ringCap - WG_SIZE ) - { - int srcIdx; - AtomInc1( ldsGEnd, srcIdx ); - if( srcIdx < m_n ) - { - int dstIdx; - AtomInc1( ldsRingEnd, dstIdx ); - - int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit; - int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit; - ldsRingElem[dstIdx].m_a = (a>b)? b:a; - ldsRingElem[dstIdx].m_b = (a>b)? a:b; - ldsRingElem[dstIdx].m_idx = srcIdx; - } - } - ringCap = GET_RING_CAPACITY; - } - } - - GROUP_LDS_BARRIER; - - // 2. fill stack - __local Elem* dst = ldsRingElem; - if( lIdx == 0 ) RING_END = 0; - - int srcIdx=lIdx; - int end = ldsRingEnd; - - { - for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE) - { - Elem e; - if(srcIdx<end) e = ldsRingElem[srcIdx]; - bool done = (srcIdx<end)?false:true; - - for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0; - - if( !done ) - { - int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a)); - int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b)); - - if( aUsed==0 && bUsed==0 ) - { - int aAvailable=1; - int bAvailable=1; - int ea = abs(e.m_a); - int eb = abs(e.m_b); - - bool aStatic = (e.m_a<0) ||(ea==m_staticIdx); - bool bStatic = (e.m_b<0) ||(eb==m_staticIdx); - - if (!aStatic) - aAvailable = tryWrite( ldsCheckBuffer, ea ); - if (!bStatic) - bAvailable = tryWrite( ldsCheckBuffer, eb ); - - //aAvailable = aStatic? 1: aAvailable; - //bAvailable = bStatic? 1: bAvailable; - - bool success = (aAvailable && bAvailable); - if(success) - { - - if (!aStatic) - writeBuf( ldsFixedBuffer, ea ); - if (!bStatic) - writeBuf( ldsFixedBuffer, eb ); - } - done = success; - } - } - - // put it aside - if(srcIdx<end) - { - if( done ) - { - int dstIdx; AtomInc1( ldsStackEnd, dstIdx ); - if( dstIdx < STACK_SIZE ) - ldsStackIdx[dstIdx] = e.m_idx; - else{ - done = false; - AtomAdd( ldsStackEnd, -1 ); - } - } - if( !done ) - { - int dstIdx; AtomInc1( RING_END, dstIdx ); - dst[dstIdx] = e; - } - } - - // if filled, flush - if( ldsStackEnd == STACK_SIZE ) - { - for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE) - { - int idx = m_start + ldsStackIdx[i]; - int dstIdx; AtomInc1( ldsDstEnd, dstIdx ); - gConstraintsOut[ dstIdx ] = gConstraints[ idx ]; - gConstraintsOut[ dstIdx ].m_batchIdx = ie; - } - if( lIdx == 0 ) ldsStackEnd = 0; - - //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) - ldsFixedBuffer[lIdx] = 0; - } - } - } - - if( lIdx == 0 ) ldsRingEnd = RING_END; - } - - GROUP_LDS_BARRIER; - - for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE) - { - int idx = m_start + ldsStackIdx[i]; - int dstIdx; AtomInc1( ldsDstEnd, dstIdx ); - gConstraintsOut[ dstIdx ] = gConstraints[ idx ]; - gConstraintsOut[ dstIdx ].m_batchIdx = ie; - } - - // in case it couldn't consume any pair. Flush them - // todo. Serial batch worth while? - if( ldsStackEnd == 0 ) - { - for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE) - { - int idx = m_start + ldsRingElem[i].m_idx; - int dstIdx; AtomInc1( ldsDstEnd, dstIdx ); - gConstraintsOut[ dstIdx ] = gConstraints[ idx ]; - int curBatch = 100+i; - if (maxBatch < curBatch) - maxBatch = curBatch; - - gConstraintsOut[ dstIdx ].m_batchIdx = curBatch; - - } - GROUP_LDS_BARRIER; - if( lIdx == 0 ) ldsRingEnd = 0; - } - - if( lIdx == 0 ) ldsStackEnd = 0; - - GROUP_LDS_BARRIER; - - // termination - if( ldsGEnd == m_n && ldsRingEnd == 0 ) - break; - } - - if( lIdx == 0 ) - { - if (maxBatch < ie) - maxBatch=ie; - batchSizes[wgIdx]=maxBatch; - } - -} - - - - - - - - - - - - - - - - - - - - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h deleted file mode 100644 index 150eedc94b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ /dev/null @@ -1,388 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* batchingKernelsCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"#define WG_SIZE 64\n" -"typedef struct \n" -"{\n" -" int m_n;\n" -" int m_start;\n" -" int m_staticIdx;\n" -" int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct \n" -"{\n" -" int m_a;\n" -" int m_b;\n" -" u32 m_idx;\n" -"}Elem;\n" -"#define STACK_SIZE (WG_SIZE*10)\n" -"//#define STACK_SIZE (WG_SIZE)\n" -"#define RING_SIZE 1024\n" -"#define RING_SIZE_MASK (RING_SIZE-1)\n" -"#define CHECK_SIZE (WG_SIZE)\n" -"#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n" -"#define RING_END ldsTmp\n" -"u32 readBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" return buff[bufIdx] & (1<<bitIdx);\n" -"}\n" -"void writeBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -"// buff[bufIdx] |= (1<<bitIdx);\n" -" atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -"}\n" -"u32 tryWrite(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -" return ((ans >> bitIdx)&1) == 0;\n" -"}\n" -"// batching on the GPU\n" -"__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n" -" __global const u32* gN, __global const u32* gStart, __global int* batchSizes, \n" -" int m_staticIdx )\n" -"{\n" -" __local u32 ldsStackIdx[STACK_SIZE];\n" -" __local u32 ldsStackEnd;\n" -" __local Elem ldsRingElem[RING_SIZE];\n" -" __local u32 ldsRingEnd;\n" -" __local u32 ldsTmp;\n" -" __local u32 ldsCheckBuffer[CHECK_SIZE];\n" -" __local u32 ldsFixedBuffer[CHECK_SIZE];\n" -" __local u32 ldsGEnd;\n" -" __local u32 ldsDstEnd;\n" -" int wgIdx = GET_GROUP_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" \n" -" const int m_n = gN[wgIdx];\n" -" const int m_start = gStart[wgIdx];\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsRingEnd = 0;\n" -" ldsGEnd = 0;\n" -" ldsStackEnd = 0;\n" -" ldsDstEnd = m_start;\n" -" }\n" -" \n" -" \n" -" \n" -"// while(1)\n" -"//was 250\n" -" int ie=0;\n" -" int maxBatch = 0;\n" -" for(ie=0; ie<50; ie++)\n" -" {\n" -" ldsFixedBuffer[lIdx] = 0;\n" -" for(int giter=0; giter<4; giter++)\n" -" {\n" -" int ringCap = GET_RING_CAPACITY;\n" -" \n" -" // 1. fill ring\n" -" if( ldsGEnd < m_n )\n" -" {\n" -" while( ringCap > WG_SIZE )\n" -" {\n" -" if( ldsGEnd >= m_n ) break;\n" -" if( lIdx < ringCap - WG_SIZE )\n" -" {\n" -" int srcIdx;\n" -" AtomInc1( ldsGEnd, srcIdx );\n" -" if( srcIdx < m_n )\n" -" {\n" -" int dstIdx;\n" -" AtomInc1( ldsRingEnd, dstIdx );\n" -" \n" -" int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n" -" int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n" -" ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" -" ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" -" ldsRingElem[dstIdx].m_idx = srcIdx;\n" -" }\n" -" }\n" -" ringCap = GET_RING_CAPACITY;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" // 2. fill stack\n" -" __local Elem* dst = ldsRingElem;\n" -" if( lIdx == 0 ) RING_END = 0;\n" -" int srcIdx=lIdx;\n" -" int end = ldsRingEnd;\n" -" {\n" -" for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n" -" {\n" -" Elem e;\n" -" if(srcIdx<end) e = ldsRingElem[srcIdx];\n" -" bool done = (srcIdx<end)?false:true;\n" -" for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n" -" \n" -" if( !done )\n" -" {\n" -" int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a));\n" -" int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b));\n" -" if( aUsed==0 && bUsed==0 )\n" -" {\n" -" int aAvailable=1;\n" -" int bAvailable=1;\n" -" int ea = abs(e.m_a);\n" -" int eb = abs(e.m_b);\n" -" bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n" -" bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n" -" \n" -" if (!aStatic)\n" -" aAvailable = tryWrite( ldsCheckBuffer, ea );\n" -" if (!bStatic)\n" -" bAvailable = tryWrite( ldsCheckBuffer, eb );\n" -" \n" -" //aAvailable = aStatic? 1: aAvailable;\n" -" //bAvailable = bStatic? 1: bAvailable;\n" -" bool success = (aAvailable && bAvailable);\n" -" if(success)\n" -" {\n" -" \n" -" if (!aStatic)\n" -" writeBuf( ldsFixedBuffer, ea );\n" -" if (!bStatic)\n" -" writeBuf( ldsFixedBuffer, eb );\n" -" }\n" -" done = success;\n" -" }\n" -" }\n" -" // put it aside\n" -" if(srcIdx<end)\n" -" {\n" -" if( done )\n" -" {\n" -" int dstIdx; AtomInc1( ldsStackEnd, dstIdx );\n" -" if( dstIdx < STACK_SIZE )\n" -" ldsStackIdx[dstIdx] = e.m_idx;\n" -" else{\n" -" done = false;\n" -" AtomAdd( ldsStackEnd, -1 );\n" -" }\n" -" }\n" -" if( !done )\n" -" {\n" -" int dstIdx; AtomInc1( RING_END, dstIdx );\n" -" dst[dstIdx] = e;\n" -" }\n" -" }\n" -" // if filled, flush\n" -" if( ldsStackEnd == STACK_SIZE )\n" -" {\n" -" for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)\n" -" {\n" -" int idx = m_start + ldsStackIdx[i];\n" -" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" -" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" -" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" -" }\n" -" if( lIdx == 0 ) ldsStackEnd = 0;\n" -" //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n" -" ldsFixedBuffer[lIdx] = 0;\n" -" }\n" -" }\n" -" }\n" -" if( lIdx == 0 ) ldsRingEnd = RING_END;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n" -" {\n" -" int idx = m_start + ldsStackIdx[i];\n" -" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" -" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" -" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" -" }\n" -" // in case it couldn't consume any pair. Flush them\n" -" // todo. Serial batch worth while?\n" -" if( ldsStackEnd == 0 )\n" -" {\n" -" for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)\n" -" {\n" -" int idx = m_start + ldsRingElem[i].m_idx;\n" -" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" -" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" -" int curBatch = 100+i;\n" -" if (maxBatch < curBatch)\n" -" maxBatch = curBatch;\n" -" \n" -" gConstraintsOut[ dstIdx ].m_batchIdx = curBatch;\n" -" \n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx == 0 ) ldsRingEnd = 0;\n" -" }\n" -" if( lIdx == 0 ) ldsStackEnd = 0;\n" -" GROUP_LDS_BARRIER;\n" -" // termination\n" -" if( ldsGEnd == m_n && ldsRingEnd == 0 )\n" -" break;\n" -" }\n" -" if( lIdx == 0 )\n" -" {\n" -" if (maxBatch < ie)\n" -" maxBatch=ie;\n" -" batchSizes[wgIdx]=maxBatch;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl deleted file mode 100644 index ba1b66d2c3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - -#define SIMD_WIDTH 64 - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -#define WG_SIZE 64 - - - - - -typedef struct -{ - int m_n; - int m_start; - int m_staticIdx; - int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_a; - int m_b; - u32 m_idx; -}Elem; - - - - - -// batching on the GPU -__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx ) -{ - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - - const int m_n = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - - if( lIdx == 0 ) - { - for (int i=0;i<m_n;i++) - { - int srcIdx = i+m_start; - int batchIndex = i; - gConstraints[ srcIdx ].m_batchIdx = batchIndex; - } - } -} - - -#define CHECK_SIZE (WG_SIZE) - - - - -u32 readBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - return buff[bufIdx] & (1<<bitIdx); -} - -void writeBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - buff[bufIdx] |= (1<<bitIdx); - //atom_or( &buff[bufIdx], (1<<bitIdx) ); -} - -u32 tryWrite(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) ); - return ((ans >> bitIdx)&1) == 0; -} - - -// batching on the GPU -__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx ) -{ - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - const int numConstraints = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - b3Contact4Data_t tmp; - - __local u32 ldsFixedBuffer[CHECK_SIZE]; - - - - - - if( lIdx == 0 ) - { - - - __global struct b3Contact4Data* cs = &gConstraints[m_start]; - - - int numValidConstraints = 0; - int batchIdx = 0; - - while( numValidConstraints < numConstraints) - { - int nCurrentBatch = 0; - // clear flag - - for(int i=0; i<CHECK_SIZE; i++) - ldsFixedBuffer[i] = 0; - - for(int i=numValidConstraints; i<numConstraints; i++) - { - - int bodyAS = cs[i].m_bodyAPtrAndSignBit; - int bodyBS = cs[i].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; - bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; - int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA); - int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB); - - if( aUnavailable==0 && bUnavailable==0 ) // ok - { - if (!aIsStatic) - { - writeBuf( ldsFixedBuffer, bodyA ); - } - if (!bIsStatic) - { - writeBuf( ldsFixedBuffer, bodyB ); - } - - cs[i].m_batchIdx = batchIdx; - - if (i!=numValidConstraints) - { - - tmp = cs[i]; - cs[i] = cs[numValidConstraints]; - cs[numValidConstraints] = tmp; - - - } - - numValidConstraints++; - - nCurrentBatch++; - if( nCurrentBatch == SIMD_WIDTH) - { - nCurrentBatch = 0; - for(int i=0; i<CHECK_SIZE; i++) - ldsFixedBuffer[i] = 0; - - } - } - }//for - batchIdx ++; - }//while - - batchSizes[wgIdx] = batchIdx; - - }//if( lIdx == 0 ) - - //return batchIdx; -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h deleted file mode 100644 index 1e5957adae..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ /dev/null @@ -1,291 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* batchingKernelsNewCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define SIMD_WIDTH 64\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"#define WG_SIZE 64\n" -"typedef struct \n" -"{\n" -" int m_n;\n" -" int m_start;\n" -" int m_staticIdx;\n" -" int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct \n" -"{\n" -" int m_a;\n" -" int m_b;\n" -" u32 m_idx;\n" -"}Elem;\n" -"// batching on the GPU\n" -"__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" -"{\n" -" int wgIdx = GET_GROUP_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" \n" -" const int m_n = gN[wgIdx];\n" -" const int m_start = gStart[wgIdx];\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" for (int i=0;i<m_n;i++)\n" -" {\n" -" int srcIdx = i+m_start;\n" -" int batchIndex = i;\n" -" gConstraints[ srcIdx ].m_batchIdx = batchIndex; \n" -" }\n" -" }\n" -"}\n" -"#define CHECK_SIZE (WG_SIZE)\n" -"u32 readBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" return buff[bufIdx] & (1<<bitIdx);\n" -"}\n" -"void writeBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" buff[bufIdx] |= (1<<bitIdx);\n" -" //atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -"}\n" -"u32 tryWrite(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -" return ((ans >> bitIdx)&1) == 0;\n" -"}\n" -"// batching on the GPU\n" -"__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx )\n" -"{\n" -" int wgIdx = GET_GROUP_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" const int numConstraints = gN[wgIdx];\n" -" const int m_start = gStart[wgIdx];\n" -" b3Contact4Data_t tmp;\n" -" \n" -" __local u32 ldsFixedBuffer[CHECK_SIZE];\n" -" \n" -" \n" -" \n" -" \n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" \n" -" \n" -" __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n" -" \n" -" \n" -" int numValidConstraints = 0;\n" -" int batchIdx = 0;\n" -" while( numValidConstraints < numConstraints)\n" -" {\n" -" int nCurrentBatch = 0;\n" -" // clear flag\n" -" \n" -" for(int i=0; i<CHECK_SIZE; i++) \n" -" ldsFixedBuffer[i] = 0; \n" -" for(int i=numValidConstraints; i<numConstraints; i++)\n" -" {\n" -" int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n" -" int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n" -" int bodyA = abs(bodyAS);\n" -" int bodyB = abs(bodyBS);\n" -" bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;\n" -" bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;\n" -" int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);\n" -" int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);\n" -" \n" -" if( aUnavailable==0 && bUnavailable==0 ) // ok\n" -" {\n" -" if (!aIsStatic)\n" -" {\n" -" writeBuf( ldsFixedBuffer, bodyA );\n" -" }\n" -" if (!bIsStatic)\n" -" {\n" -" writeBuf( ldsFixedBuffer, bodyB );\n" -" }\n" -" cs[i].m_batchIdx = batchIdx;\n" -" if (i!=numValidConstraints)\n" -" {\n" -" tmp = cs[i];\n" -" cs[i] = cs[numValidConstraints];\n" -" cs[numValidConstraints] = tmp;\n" -" }\n" -" numValidConstraints++;\n" -" \n" -" nCurrentBatch++;\n" -" if( nCurrentBatch == SIMD_WIDTH)\n" -" {\n" -" nCurrentBatch = 0;\n" -" for(int i=0; i<CHECK_SIZE; i++) \n" -" ldsFixedBuffer[i] = 0;\n" -" \n" -" }\n" -" }\n" -" }//for\n" -" batchIdx ++;\n" -" }//while\n" -" \n" -" batchSizes[wgIdx] = batchIdx;\n" -" }//if( lIdx == 0 )\n" -" \n" -" //return batchIdx;\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl deleted file mode 100644 index e22bc9bc33..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" - - - -__kernel void - integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration) -{ - int nodeID = get_global_id(0); - - if( nodeID < numNodes) - { - integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration); - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h deleted file mode 100644 index a5a432947c..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ /dev/null @@ -1,433 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* integrateKernelCL= \ -"/*\n" -"Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" -"{\n" -" \n" -" if (bodies[nodeID].m_invMass != 0.f)\n" -" {\n" -" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" -" //angular velocity\n" -" {\n" -" b3Float4 axis;\n" -" //add some hardcoded angular damping\n" -" bodies[nodeID].m_angVel.x *= angularDamping;\n" -" bodies[nodeID].m_angVel.y *= angularDamping;\n" -" bodies[nodeID].m_angVel.z *= angularDamping;\n" -" \n" -" b3Float4 angvel = bodies[nodeID].m_angVel;\n" -" float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" -" \n" -" //limit the angular motion\n" -" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" -" {\n" -" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" -" }\n" -" if(fAngle < 0.001f)\n" -" {\n" -" // use Taylor's expansions of sync function\n" -" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" -" }\n" -" else\n" -" {\n" -" // sync(fAngle) = sin(c*fAngle)/t\n" -" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" -" }\n" -" \n" -" b3Quat dorn;\n" -" dorn.x = axis.x;\n" -" dorn.y = axis.y;\n" -" dorn.z = axis.z;\n" -" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" -" b3Quat orn0 = bodies[nodeID].m_quat;\n" -" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" -" predictedOrn = b3QuatNormalized(predictedOrn);\n" -" bodies[nodeID].m_quat=predictedOrn;\n" -" }\n" -" //linear velocity \n" -" bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n" -" \n" -" //apply gravity\n" -" bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" -" \n" -" }\n" -" \n" -"}\n" -"inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" -"{\n" -" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" -" \n" -" if( (body->m_invMass != 0.f))\n" -" {\n" -" //angular velocity\n" -" {\n" -" b3Float4 axis;\n" -" //add some hardcoded angular damping\n" -" body->m_angVel.x *= angularDamping;\n" -" body->m_angVel.y *= angularDamping;\n" -" body->m_angVel.z *= angularDamping;\n" -" \n" -" b3Float4 angvel = body->m_angVel;\n" -" float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" -" //limit the angular motion\n" -" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" -" {\n" -" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" -" }\n" -" if(fAngle < 0.001f)\n" -" {\n" -" // use Taylor's expansions of sync function\n" -" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" -" }\n" -" else\n" -" {\n" -" // sync(fAngle) = sin(c*fAngle)/t\n" -" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" -" }\n" -" b3Quat dorn;\n" -" dorn.x = axis.x;\n" -" dorn.y = axis.y;\n" -" dorn.z = axis.z;\n" -" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" -" b3Quat orn0 = body->m_quat;\n" -" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" -" predictedOrn = b3QuatNormalized(predictedOrn);\n" -" body->m_quat=predictedOrn;\n" -" }\n" -" //apply gravity\n" -" body->m_linVel += gravityAcceleration * timeStep;\n" -" //linear velocity \n" -" body->m_pos += body->m_linVel * timeStep;\n" -" \n" -" }\n" -" \n" -"}\n" -"__kernel void \n" -" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" -"{\n" -" int nodeID = get_global_id(0);\n" -" \n" -" if( nodeID < numNodes)\n" -" {\n" -" integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl deleted file mode 100644 index 7f5dabe274..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl +++ /dev/null @@ -1,877 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define B3_CONSTRAINT_FLAG_ENABLED 1 - -#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3 -#define B3_GPU_FIXED_CONSTRAINT_TYPE 4 - -#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails -#define B3_INFINITY 1e30f - -#define mymake_float4 (float4) - - -__inline float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - -typedef float4 Quaternion; - - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - - - -typedef struct -{ - Matrix3x3 m_invInertiaWorld; - Matrix3x3 m_initInvInertia; -} BodyInertia; - - -typedef struct -{ - Matrix3x3 m_basis;//orientation - float4 m_origin;//transform -}b3Transform; - -typedef struct -{ -// b3Transform m_worldTransformUnused; - float4 m_deltaLinearVelocity; - float4 m_deltaAngularVelocity; - float4 m_angularFactor; - float4 m_linearFactor; - float4 m_invMass; - float4 m_pushVelocity; - float4 m_turnVelocity; - float4 m_linearVelocity; - float4 m_angularVelocity; - - union - { - void* m_originalBody; - int m_originalBodyIndex; - }; - int padding[3]; - -} b3GpuSolverBody; - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} b3RigidBodyCL; - -typedef struct -{ - - float4 m_relpos1CrossNormal; - float4 m_contactNormal; - - float4 m_relpos2CrossNormal; - //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - float4 m_angularComponentA; - float4 m_angularComponentB; - - float m_appliedPushImpulse; - float m_appliedImpulse; - int m_padding1; - int m_padding2; - float m_friction; - float m_jacDiagABInv; - float m_rhs; - float m_cfm; - - float m_lowerLimit; - float m_upperLimit; - float m_rhsPenetration; - int m_originalConstraint; - - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - -} b3SolverConstraint; - -typedef struct -{ - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - int m_originalConstraintIndex; - int m_batchId; -} b3BatchConstraint; - - - - - - -typedef struct -{ - int m_constraintType; - int m_rbA; - int m_rbB; - float m_breakingImpulseThreshold; - - float4 m_pivotInA; - float4 m_pivotInB; - Quaternion m_relTargetAB; - - int m_flags; - int m_padding[3]; -} b3GpuGenericConstraint; - - -/*b3Transform getWorldTransform(b3RigidBodyCL* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -}*/ - - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = mymake_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - - -__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude) -{ - body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor; - body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor); -} - - -void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c) -{ - float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm; - float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity); - float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity); - - deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv; - - float sum = c->m_appliedImpulse + deltaImpulse; - if (sum < c->m_lowerLimit) - { - deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse; - c->m_appliedImpulse = c->m_lowerLimit; - } - else if (sum > c->m_upperLimit) - { - deltaImpulse = c->m_upperLimit-c->m_appliedImpulse; - c->m_appliedImpulse = c->m_upperLimit; - } - else - { - c->m_appliedImpulse = sum; - } - - internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse); - internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse); - -} - -__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, - __global b3BatchConstraint* batchConstraints, - __global b3SolverConstraint* rows, - __global unsigned int* numConstraintRowsInfo1, - __global unsigned int* rowOffsets, - __global b3GpuGenericConstraint* constraints, - int batchOffset, - int numConstraintsInBatch - ) -{ - int b = get_global_id(0); - if (b>=numConstraintsInBatch) - return; - - __global b3BatchConstraint* c = &batchConstraints[b+batchOffset]; - int originalConstraintIndex = c->m_originalConstraintIndex; - if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED) - { - int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex]; - int rowOffset = rowOffsets[originalConstraintIndex]; - for (int jj=0;jj<numConstraintRows;jj++) - { - __global b3SolverConstraint* constraint = &rows[rowOffset+jj]; - resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint); - } - } -}; - -__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies) -{ - int i = get_global_id(0); - if (i>=numBodies) - return; - - __global b3GpuSolverBody* solverBody = &solverBodies[i]; - __global b3RigidBodyCL* bodyCL = &bodiesCL[i]; - - solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f); - solverBody->m_originalBodyIndex = i; - solverBody->m_angularFactor = (float4)(1,1,1,0); - solverBody->m_linearFactor = (float4) (1,1,1,0); - solverBody->m_linearVelocity = bodyCL->m_linVel; - solverBody->m_angularVelocity = bodyCL->m_angVel; -} - -__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints) -{ - int cid = get_global_id(0); - if (cid>=numConstraints) - return; - int numRows = numConstraintRows[cid]; - if (numRows) - { - for (int i=0;i<numRows;i++) - { - int rowIndex = rowOffsets[cid]+i; - float breakingThreshold = constraints[cid].m_breakingImpulseThreshold; - if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold) - { - constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED; - } - } - } -} - - - -__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints) -{ - int i = get_global_id(0); - if (i>=numConstraints) - return; - - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - switch (constraint->m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - infos[i] = 3; - break; - } - case B3_GPU_FIXED_CONSTRAINT_TYPE: - { - infos[i] = 6; - break; - } - default: - { - } - } -} - -__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, - __global b3BatchConstraint* batchConstraints, - __global b3GpuGenericConstraint* constraints, - __global b3RigidBodyCL* bodies, - int numConstraints) -{ - int i = get_global_id(0); - if (i>=numConstraints) - return; - - int rbA = constraints[i].m_rbA; - int rbB = constraints[i].m_rbB; - - batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA; - batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB; - batchConstraints[i].m_batchId = -1; - batchConstraints[i].m_originalConstraintIndex = i; - -} - - - - -typedef struct -{ - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - float fps,erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - union - { - __global float4* m_J1linearAxisFloat4; - __global float* m_J1linearAxis; - }; - union - { - __global float4* m_J1angularAxisFloat4; - __global float* m_J1angularAxis; - - }; - union - { - __global float4* m_J2linearAxisFloat4; - __global float* m_J2linearAxis; - }; - union - { - __global float4* m_J2angularAxisFloat4; - __global float* m_J2angularAxis; - }; - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - __global float* m_constraintError; - __global float* cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - __global float* m_lowerLimit; - __global float* m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - __global int *findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - float m_damping; -} b3GpuConstraintInfo2; - - -void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2) -{ - *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f); - *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f); - *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f); -} - - -void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies) -{ - float4 posA = bodies[constraint->m_rbA].m_pos; - Quaternion rotA = bodies[constraint->m_rbA].m_quat; - - float4 posB = bodies[constraint->m_rbB].m_pos; - Quaternion rotB = bodies[constraint->m_rbB].m_quat; - - - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip+1] = 1; - info->m_J1linearAxis[2*info->rowskip+2] = 1; - - float4 a1 = qtRotate(rotA,constraint->m_pivotInA); - - { - __global float4* angular0 = (__global float4*)(info->m_J1angularAxis); - __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip); - __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip); - float4 a1neg = -a1; - getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2); - } - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip+1] = -1; - info->m_J2linearAxis[2*info->rowskip+2] = -1; - } - - float4 a2 = qtRotate(rotB,constraint->m_pivotInB); - - { - // float4 a2n = -a2; - __global float4* angular0 = (__global float4*)(info->m_J2angularAxis); - __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip); - __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip); - getSkewSymmetricMatrix(a2,angular0,angular1,angular2); - } - - // set right hand side -// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - float currERP = info->erp; - - float k = info->fps * currERP; - int j; - float4 result = a2 + posB - a1 - posA; - float* resultPtr = &result; - - for (j=0; j<3; j++) - { - info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]); - } -} - -Quaternion nearest( Quaternion first, Quaternion qd) -{ - Quaternion diff,sum; - diff = first- qd; - sum = first + qd; - - if( dot(diff,diff) < dot(sum,sum) ) - return qd; - return (-qd); -} - -float b3Acos(float x) -{ - if (x<-1) - x=-1; - if (x>1) - x=1; - return acos(x); -} - -float getAngle(Quaternion orn) -{ - if (orn.w>=1.f) - orn.w=1.f; - float s = 2.f * b3Acos(orn.w); - return s; -} - -void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle) -{ - Quaternion orn1 = nearest(orn0,orn1a); - - Quaternion dorn = qtMul(orn1,qtInvert(orn0)); - *angle = getAngle(dorn); - *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f); - - //check for axis length - float len = dot3F4(*axis,*axis); - if (len < FLT_EPSILON*FLT_EPSILON) - *axis = (float4)(1,0,0,0); - else - *axis /= sqrt(len); -} - - - -void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row) -{ - Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat; - Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat; - - int s = info->rowskip; - int start_index = start_row * s; - - // 3 rows to make body rotations equal - info->m_J1angularAxis[start_index] = 1; - info->m_J1angularAxis[start_index + s + 1] = 1; - info->m_J1angularAxis[start_index + s*2+2] = 1; - if ( info->m_J2angularAxis) - { - info->m_J2angularAxis[start_index] = -1; - info->m_J2angularAxis[start_index + s+1] = -1; - info->m_J2angularAxis[start_index + s*2+2] = -1; - } - - float currERP = info->erp; - float k = info->fps * currERP; - float4 diff; - float angle; - float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB)); - - calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle); - diff*=-angle; - - float* resultPtr = &diff; - - for (int j=0; j<3; j++) - { - info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j]; - } - - -} - - -__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies) -{ - int i = get_global_id(0); - if (i>=numBodies) - return; - - if (bodies[i].m_invMass) - { -// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP) - { - bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity; - } -// if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP) - { - bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity; - } - } -} - - -__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, - __global unsigned int* infos, - __global unsigned int* constraintRowOffsets, - __global b3GpuGenericConstraint* constraints, - __global b3BatchConstraint* batchConstraints, - __global b3RigidBodyCL* bodies, - __global BodyInertia* inertias, - __global b3GpuSolverBody* solverBodies, - float timeStep, - float globalErp, - float globalCfm, - float globalDamping, - int globalNumIterations, - int numConstraints) -{ - - int i = get_global_id(0); - if (i>=numConstraints) - return; - - //for now, always initialize the batch info - int info1 = infos[i]; - - __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]]; - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA]; - __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB]; - - int solverBodyIdA = constraint->m_rbA; - int solverBodyIdB = constraint->m_rbB; - - __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA]; - __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB]; - - - if (rbA->m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } else - { -// if (!solverBodyIdA) -// m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } - - if (rbB->m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } else - { -// if (!solverBodyIdB) -// m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } - - if (info1) - { - int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; -// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations) - // m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - - int j; - for ( j=0;j<info1;j++) - { -// memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint)); - currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0); - currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0); - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_cfm = 0.f; - currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0); - currentConstraintRow[j].m_friction = 0.f; - currentConstraintRow[j].m_frictionIndex = 0; - currentConstraintRow[j].m_jacDiagABInv = 0.f; - currentConstraintRow[j].m_lowerLimit = 0.f; - currentConstraintRow[j].m_upperLimit = 0.f; - - currentConstraintRow[j].m_originalConstraint = i; - currentConstraintRow[j].m_overrideNumSolverIterations = 0; - currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0); - currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0); - currentConstraintRow[j].m_rhs = 0.f; - currentConstraintRow[j].m_rhsPenetration = 0.f; - currentConstraintRow[j].m_solverBodyIdA = 0; - currentConstraintRow[j].m_solverBodyIdB = 0; - - currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; - currentConstraintRow[j].m_upperLimit = B3_INFINITY; - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; - currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; - currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; - } - - bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0); - bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0); - bodyAPtr->m_pushVelocity = (float4)(0,0,0,0); - bodyAPtr->m_turnVelocity = (float4)(0,0,0,0); - bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0); - bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0); - bodyBPtr->m_pushVelocity = (float4)(0,0,0,0); - bodyBPtr->m_turnVelocity = (float4)(0,0,0,0); - - int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this - - - - - b3GpuConstraintInfo2 info2; - info2.fps = 1.f/timeStep; - info2.erp = globalErp; - info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal; - info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxisFloat4 = 0; - info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this - - ///the size of b3SolverConstraint needs be a multiple of float -// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = globalCfm; - info2.m_damping = globalDamping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = globalNumIterations; - - switch (constraint->m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - getInfo2Point2Point(constraint,&info2,bodies); - break; - } - case B3_GPU_FIXED_CONSTRAINT_TYPE: - { - getInfo2Point2Point(constraint,&info2,bodies); - - getInfo2FixedOrientation(constraint,&info2,bodies,3); - - break; - } - - default: - { - } - } - - ///finalize the constraint setup - for ( j=0;j<info1;j++) - { - __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j]; - - if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold) - { - solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold; - } - - if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold) - { - solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold; - } - -// solverConstraint->m_originalContactPoint = constraint; - - Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld; - { - - //float4 angularFactorA(1,1,1); - float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal; - solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA; - } - - Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld; - { - - float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal; - solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor(); - } - - { - //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal - //because it gets multiplied iMJlB - float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass; - float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA); - float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal? - float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB); - - float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal); - sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal); - sum += dot3F4(iMJlB,solverConstraint->m_contactNormal); - sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal); - float fsum = fabs(sum); - if (fsum>FLT_EPSILON) - { - solverConstraint->m_jacDiagABInv = 1.f/sum; - } else - { - solverConstraint->m_jacDiagABInv = 0.f; - } - } - - - ///fix rhs - ///todo: add force/torque accelerators - { - float rel_vel; - float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel); - float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel); - - rel_vel = vel1Dotn+vel2Dotn; - - float restitution = 0.f; - float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2 - float velocityError = restitution - rel_vel * info2.m_damping; - float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv; - float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv; - solverConstraint->m_rhs = penetrationImpulse+velocityImpulse; - solverConstraint->m_appliedImpulse = 0.f; - - } - } - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h deleted file mode 100644 index d48ecf6ea6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/jointSolver.h +++ /dev/null @@ -1,721 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveConstraintRowsCL= \ -"/*\n" -"Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#define B3_CONSTRAINT_FLAG_ENABLED 1\n" -"#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n" -"#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n" -"#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n" -"#define B3_INFINITY 1e30f\n" -"#define mymake_float4 (float4)\n" -"__inline float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = mymake_float4(a.xyz,0.f);\n" -" float4 b1 = mymake_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"typedef float4 Quaternion;\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertiaWorld;\n" -" Matrix3x3 m_initInvInertia;\n" -"} BodyInertia;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_basis;//orientation\n" -" float4 m_origin;//transform\n" -"}b3Transform;\n" -"typedef struct\n" -"{\n" -"// b3Transform m_worldTransformUnused;\n" -" float4 m_deltaLinearVelocity;\n" -" float4 m_deltaAngularVelocity;\n" -" float4 m_angularFactor;\n" -" float4 m_linearFactor;\n" -" float4 m_invMass;\n" -" float4 m_pushVelocity;\n" -" float4 m_turnVelocity;\n" -" float4 m_linearVelocity;\n" -" float4 m_angularVelocity;\n" -" union \n" -" {\n" -" void* m_originalBody;\n" -" int m_originalBodyIndex;\n" -" };\n" -" int padding[3];\n" -"} b3GpuSolverBody;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" unsigned int m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} b3RigidBodyCL;\n" -"typedef struct\n" -"{\n" -" float4 m_relpos1CrossNormal;\n" -" float4 m_contactNormal;\n" -" float4 m_relpos2CrossNormal;\n" -" //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n" -" float4 m_angularComponentA;\n" -" float4 m_angularComponentB;\n" -" \n" -" float m_appliedPushImpulse;\n" -" float m_appliedImpulse;\n" -" int m_padding1;\n" -" int m_padding2;\n" -" float m_friction;\n" -" float m_jacDiagABInv;\n" -" float m_rhs;\n" -" float m_cfm;\n" -" \n" -" float m_lowerLimit;\n" -" float m_upperLimit;\n" -" float m_rhsPenetration;\n" -" int m_originalConstraint;\n" -" int m_overrideNumSolverIterations;\n" -" int m_frictionIndex;\n" -" int m_solverBodyIdA;\n" -" int m_solverBodyIdB;\n" -"} b3SolverConstraint;\n" -"typedef struct \n" -"{\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_originalConstraintIndex;\n" -" int m_batchId;\n" -"} b3BatchConstraint;\n" -"typedef struct \n" -"{\n" -" int m_constraintType;\n" -" int m_rbA;\n" -" int m_rbB;\n" -" float m_breakingImpulseThreshold;\n" -" float4 m_pivotInA;\n" -" float4 m_pivotInB;\n" -" Quaternion m_relTargetAB;\n" -" int m_flags;\n" -" int m_padding[3];\n" -"} b3GpuGenericConstraint;\n" -"/*b3Transform getWorldTransform(b3RigidBodyCL* rb)\n" -"{\n" -" b3Transform newTrans;\n" -" newTrans.setOrigin(rb->m_pos);\n" -" newTrans.setRotation(rb->m_quat);\n" -" return newTrans;\n" -"}*/\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" v = mymake_float4(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n" -"{\n" -" body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n" -" body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n" -"}\n" -"void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n" -"{\n" -" float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n" -" float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n" -" float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n" -" deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n" -" deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n" -" float sum = c->m_appliedImpulse + deltaImpulse;\n" -" if (sum < c->m_lowerLimit)\n" -" {\n" -" deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse;\n" -" c->m_appliedImpulse = c->m_lowerLimit;\n" -" }\n" -" else if (sum > c->m_upperLimit) \n" -" {\n" -" deltaImpulse = c->m_upperLimit-c->m_appliedImpulse;\n" -" c->m_appliedImpulse = c->m_upperLimit;\n" -" }\n" -" else\n" -" {\n" -" c->m_appliedImpulse = sum;\n" -" }\n" -" internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n" -" internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n" -"}\n" -"__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" -" __global b3BatchConstraint* batchConstraints,\n" -" __global b3SolverConstraint* rows,\n" -" __global unsigned int* numConstraintRowsInfo1, \n" -" __global unsigned int* rowOffsets,\n" -" __global b3GpuGenericConstraint* constraints,\n" -" int batchOffset,\n" -" int numConstraintsInBatch\n" -" )\n" -"{\n" -" int b = get_global_id(0);\n" -" if (b>=numConstraintsInBatch)\n" -" return;\n" -" __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" -" int originalConstraintIndex = c->m_originalConstraintIndex;\n" -" if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" -" {\n" -" int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];\n" -" int rowOffset = rowOffsets[originalConstraintIndex];\n" -" for (int jj=0;jj<numConstraintRows;jj++)\n" -" {\n" -" __global b3SolverConstraint* constraint = &rows[rowOffset+jj];\n" -" resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" -" }\n" -" }\n" -"};\n" -"__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numBodies)\n" -" return;\n" -" __global b3GpuSolverBody* solverBody = &solverBodies[i];\n" -" __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n" -" solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f);\n" -" solverBody->m_originalBodyIndex = i;\n" -" solverBody->m_angularFactor = (float4)(1,1,1,0);\n" -" solverBody->m_linearFactor = (float4) (1,1,1,0);\n" -" solverBody->m_linearVelocity = bodyCL->m_linVel;\n" -" solverBody->m_angularVelocity = bodyCL->m_angVel;\n" -"}\n" -"__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" -"{\n" -" int cid = get_global_id(0);\n" -" if (cid>=numConstraints)\n" -" return;\n" -" int numRows = numConstraintRows[cid];\n" -" if (numRows)\n" -" {\n" -" for (int i=0;i<numRows;i++)\n" -" {\n" -" int rowIndex = rowOffsets[cid]+i;\n" -" float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;\n" -" if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)\n" -" {\n" -" constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConstraints)\n" -" return;\n" -" __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -" switch (constraint->m_constraintType)\n" -" {\n" -" case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" -" {\n" -" infos[i] = 3;\n" -" break;\n" -" }\n" -" case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" -" {\n" -" infos[i] = 6;\n" -" break;\n" -" }\n" -" default:\n" -" {\n" -" }\n" -" }\n" -"}\n" -"__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" -" __global b3BatchConstraint* batchConstraints, \n" -" __global b3GpuGenericConstraint* constraints,\n" -" __global b3RigidBodyCL* bodies,\n" -" int numConstraints)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConstraints)\n" -" return;\n" -" int rbA = constraints[i].m_rbA;\n" -" int rbB = constraints[i].m_rbB;\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA;\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB;\n" -" batchConstraints[i].m_batchId = -1;\n" -" batchConstraints[i].m_originalConstraintIndex = i;\n" -"}\n" -"typedef struct\n" -"{\n" -" // integrator parameters: frames per second (1/stepsize), default error\n" -" // reduction parameter (0..1).\n" -" float fps,erp;\n" -" // for the first and second body, pointers to two (linear and angular)\n" -" // n*3 jacobian sub matrices, stored by rows. these matrices will have\n" -" // been initialized to 0 on entry. if the second body is zero then the\n" -" // J2xx pointers may be 0.\n" -" union \n" -" {\n" -" __global float4* m_J1linearAxisFloat4;\n" -" __global float* m_J1linearAxis;\n" -" };\n" -" union\n" -" {\n" -" __global float4* m_J1angularAxisFloat4;\n" -" __global float* m_J1angularAxis;\n" -" };\n" -" union\n" -" {\n" -" __global float4* m_J2linearAxisFloat4;\n" -" __global float* m_J2linearAxis;\n" -" };\n" -" union\n" -" {\n" -" __global float4* m_J2angularAxisFloat4;\n" -" __global float* m_J2angularAxis;\n" -" };\n" -" // elements to jump from one row to the next in J's\n" -" int rowskip;\n" -" // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n" -" // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n" -" // set to a constant value (typically very small or zero) value on entry.\n" -" __global float* m_constraintError;\n" -" __global float* cfm;\n" -" // lo and hi limits for variables (set to -/+ infinity on entry).\n" -" __global float* m_lowerLimit;\n" -" __global float* m_upperLimit;\n" -" // findex vector for variables. see the LCP solver interface for a\n" -" // description of what this does. this is set to -1 on entry.\n" -" // note that the returned indexes are relative to the first index of\n" -" // the constraint.\n" -" __global int *findex;\n" -" // number of solver iterations\n" -" int m_numIterations;\n" -" //damping of the velocity\n" -" float m_damping;\n" -"} b3GpuConstraintInfo2;\n" -"void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n" -"{\n" -" *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n" -" *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n" -" *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n" -"}\n" -"void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n" -"{\n" -" float4 posA = bodies[constraint->m_rbA].m_pos;\n" -" Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n" -" float4 posB = bodies[constraint->m_rbB].m_pos;\n" -" Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n" -" // anchor points in global coordinates with respect to body PORs.\n" -" \n" -" // set jacobian\n" -" info->m_J1linearAxis[0] = 1;\n" -" info->m_J1linearAxis[info->rowskip+1] = 1;\n" -" info->m_J1linearAxis[2*info->rowskip+2] = 1;\n" -" float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n" -" {\n" -" __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n" -" __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n" -" __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip);\n" -" float4 a1neg = -a1;\n" -" getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2);\n" -" }\n" -" if (info->m_J2linearAxis)\n" -" {\n" -" info->m_J2linearAxis[0] = -1;\n" -" info->m_J2linearAxis[info->rowskip+1] = -1;\n" -" info->m_J2linearAxis[2*info->rowskip+2] = -1;\n" -" }\n" -" \n" -" float4 a2 = qtRotate(rotB,constraint->m_pivotInB);\n" -" \n" -" {\n" -" // float4 a2n = -a2;\n" -" __global float4* angular0 = (__global float4*)(info->m_J2angularAxis);\n" -" __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip);\n" -" __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip);\n" -" getSkewSymmetricMatrix(a2,angular0,angular1,angular2);\n" -" }\n" -" \n" -" // set right hand side\n" -"// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n" -" float currERP = info->erp;\n" -" float k = info->fps * currERP;\n" -" int j;\n" -" float4 result = a2 + posB - a1 - posA;\n" -" float* resultPtr = &result;\n" -" for (j=0; j<3; j++)\n" -" {\n" -" info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n" -" }\n" -"}\n" -"Quaternion nearest( Quaternion first, Quaternion qd)\n" -"{\n" -" Quaternion diff,sum;\n" -" diff = first- qd;\n" -" sum = first + qd;\n" -" \n" -" if( dot(diff,diff) < dot(sum,sum) )\n" -" return qd;\n" -" return (-qd);\n" -"}\n" -"float b3Acos(float x) \n" -"{ \n" -" if (x<-1) \n" -" x=-1; \n" -" if (x>1) \n" -" x=1;\n" -" return acos(x); \n" -"}\n" -"float getAngle(Quaternion orn)\n" -"{\n" -" if (orn.w>=1.f)\n" -" orn.w=1.f;\n" -" float s = 2.f * b3Acos(orn.w);\n" -" return s;\n" -"}\n" -"void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" -"{\n" -" Quaternion orn1 = nearest(orn0,orn1a);\n" -" \n" -" Quaternion dorn = qtMul(orn1,qtInvert(orn0));\n" -" *angle = getAngle(dorn);\n" -" *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);\n" -" \n" -" //check for axis length\n" -" float len = dot3F4(*axis,*axis);\n" -" if (len < FLT_EPSILON*FLT_EPSILON)\n" -" *axis = (float4)(1,0,0,0);\n" -" else\n" -" *axis /= sqrt(len);\n" -"}\n" -"void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" -"{\n" -" Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n" -" Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n" -" int s = info->rowskip;\n" -" int start_index = start_row * s;\n" -" // 3 rows to make body rotations equal\n" -" info->m_J1angularAxis[start_index] = 1;\n" -" info->m_J1angularAxis[start_index + s + 1] = 1;\n" -" info->m_J1angularAxis[start_index + s*2+2] = 1;\n" -" if ( info->m_J2angularAxis)\n" -" {\n" -" info->m_J2angularAxis[start_index] = -1;\n" -" info->m_J2angularAxis[start_index + s+1] = -1;\n" -" info->m_J2angularAxis[start_index + s*2+2] = -1;\n" -" }\n" -" \n" -" float currERP = info->erp;\n" -" float k = info->fps * currERP;\n" -" float4 diff;\n" -" float angle;\n" -" float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));\n" -" \n" -" calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);\n" -" diff*=-angle;\n" -" \n" -" float* resultPtr = &diff;\n" -" \n" -" for (int j=0; j<3; j++)\n" -" {\n" -" info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" -" }\n" -" \n" -"}\n" -"__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numBodies)\n" -" return;\n" -" if (bodies[i].m_invMass)\n" -" {\n" -"// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP)\n" -" {\n" -" bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity;\n" -" }\n" -"// if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP)\n" -" {\n" -" bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity;\n" -" } \n" -" }\n" -"}\n" -"__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, \n" -" __global unsigned int* infos, \n" -" __global unsigned int* constraintRowOffsets, \n" -" __global b3GpuGenericConstraint* constraints, \n" -" __global b3BatchConstraint* batchConstraints, \n" -" __global b3RigidBodyCL* bodies,\n" -" __global BodyInertia* inertias,\n" -" __global b3GpuSolverBody* solverBodies,\n" -" float timeStep,\n" -" float globalErp,\n" -" float globalCfm,\n" -" float globalDamping,\n" -" int globalNumIterations,\n" -" int numConstraints)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConstraints)\n" -" return;\n" -" \n" -" //for now, always initialize the batch info\n" -" int info1 = infos[i];\n" -" \n" -" __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" -" __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -" __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" -" __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" -" int solverBodyIdA = constraint->m_rbA;\n" -" int solverBodyIdB = constraint->m_rbB;\n" -" __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" -" __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" -" if (rbA->m_invMass)\n" -" {\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" -" } else\n" -" {\n" -"// if (!solverBodyIdA)\n" -"// m_staticIdx = 0;\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" -" }\n" -" if (rbB->m_invMass)\n" -" {\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" -" } else\n" -" {\n" -"// if (!solverBodyIdB)\n" -"// m_staticIdx = 0;\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" -" }\n" -" if (info1)\n" -" {\n" -" int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" -"// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" -" // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" -" int j;\n" -" for ( j=0;j<info1;j++)\n" -" {\n" -"// memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint));\n" -" currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_appliedImpulse = 0.f;\n" -" currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" -" currentConstraintRow[j].m_cfm = 0.f;\n" -" currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_friction = 0.f;\n" -" currentConstraintRow[j].m_frictionIndex = 0;\n" -" currentConstraintRow[j].m_jacDiagABInv = 0.f;\n" -" currentConstraintRow[j].m_lowerLimit = 0.f;\n" -" currentConstraintRow[j].m_upperLimit = 0.f;\n" -" currentConstraintRow[j].m_originalConstraint = i;\n" -" currentConstraintRow[j].m_overrideNumSolverIterations = 0;\n" -" currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_rhs = 0.f;\n" -" currentConstraintRow[j].m_rhsPenetration = 0.f;\n" -" currentConstraintRow[j].m_solverBodyIdA = 0;\n" -" currentConstraintRow[j].m_solverBodyIdB = 0;\n" -" \n" -" currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;\n" -" currentConstraintRow[j].m_upperLimit = B3_INFINITY;\n" -" currentConstraintRow[j].m_appliedImpulse = 0.f;\n" -" currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" -" currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;\n" -" currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;\n" -" currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; \n" -" }\n" -" bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" -" bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" -" bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n" -" bodyAPtr->m_turnVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n" -" int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" -" \n" -" b3GpuConstraintInfo2 info2;\n" -" info2.fps = 1.f/timeStep;\n" -" info2.erp = globalErp;\n" -" info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal;\n" -" info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal;\n" -" info2.m_J2linearAxisFloat4 = 0;\n" -" info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n" -" info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" -" ///the size of b3SolverConstraint needs be a multiple of float\n" -"// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n" -" info2.m_constraintError = ¤tConstraintRow->m_rhs;\n" -" currentConstraintRow->m_cfm = globalCfm;\n" -" info2.m_damping = globalDamping;\n" -" info2.cfm = ¤tConstraintRow->m_cfm;\n" -" info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n" -" info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n" -" info2.m_numIterations = globalNumIterations;\n" -" switch (constraint->m_constraintType)\n" -" {\n" -" case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" -" {\n" -" getInfo2Point2Point(constraint,&info2,bodies);\n" -" break;\n" -" }\n" -" case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" -" {\n" -" getInfo2Point2Point(constraint,&info2,bodies);\n" -" getInfo2FixedOrientation(constraint,&info2,bodies,3);\n" -" break;\n" -" }\n" -" default:\n" -" {\n" -" }\n" -" }\n" -" ///finalize the constraint setup\n" -" for ( j=0;j<info1;j++)\n" -" {\n" -" __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j];\n" -" if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold)\n" -" {\n" -" solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n" -" }\n" -" if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n" -" {\n" -" solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n" -" }\n" -"// solverConstraint->m_originalContactPoint = constraint;\n" -" \n" -" Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n" -" {\n" -" //float4 angularFactorA(1,1,1);\n" -" float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n" -" solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n" -" }\n" -" \n" -" Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n" -" {\n" -" float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n" -" solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n" -" }\n" -" {\n" -" //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n" -" //because it gets multiplied iMJlB\n" -" float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass;\n" -" float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n" -" float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n" -" float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n" -" float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n" -" sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n" -" sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n" -" sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal);\n" -" float fsum = fabs(sum);\n" -" if (fsum>FLT_EPSILON)\n" -" {\n" -" solverConstraint->m_jacDiagABInv = 1.f/sum;\n" -" } else\n" -" {\n" -" solverConstraint->m_jacDiagABInv = 0.f;\n" -" }\n" -" }\n" -" ///fix rhs\n" -" ///todo: add force/torque accelerators\n" -" {\n" -" float rel_vel;\n" -" float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n" -" float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n" -" rel_vel = vel1Dotn+vel2Dotn;\n" -" float restitution = 0.f;\n" -" float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n" -" float velocityError = restitution - rel_vel * info2.m_damping;\n" -" float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv;\n" -" float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n" -" solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n" -" solverConstraint->m_appliedImpulse = 0.f;\n" -" }\n" -" }\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl deleted file mode 100644 index 5c4d62e4ec..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl +++ /dev/null @@ -1,501 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define mymake_float4 (float4) -//#define make_float2 (float2) -//#define make_uint4 (uint4) -//#define make_int4 (int4) -//#define make_uint2 (uint2) -//#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// - - - - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = mymake_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - - - - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - - - - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1); - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = mymake_float4(-n.xyz,0.f); - *angular0 = -cross3(r0, n); - *angular1 = cross3(r1, n); -} - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ); - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1); - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/(jmj0+jmj1+jmj2+jmj3); -} - - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB); - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - float4 angular0, angular1, linear; - float4 r0 = cs->m_worldPos[ic] - posA; - float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt ); - updated = min2( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - *linVelA += linImp0; - *angVelA += angImp0; - *linVelB += linImp1; - *angVelB += angImp1; - } -} - -void btPlaneSpace1 (const float4* n, float4* p, float4* q); - void btPlaneSpace1 (const float4* n, float4* p, float4* q) -{ - if (fabs(n[0].z) > 0.70710678f) { - // choose p in y-z plane - float a = n[0].y*n[0].y + n[0].z*n[0].z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n[0].z*k; - p[0].z = n[0].y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n[0].x*p[0].z; - q[0].z = n[0].x*p[0].y; - } - else { - // choose p in x-y plane - float a = n[0].x*n[0].x + n[0].y*n[0].y; - float k = 1.f/sqrt(a); - p[0].x = -n[0].y*k; - p[0].y = n[0].x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n[0].z*p[0].y; - q[0].y = n[0].z*p[0].x; - q[0].z = a*k; - } -} - -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs); -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) -{ - //float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, - posB, &linVelB, &angVelB, invMassB, invInertiaB ); - - if (gBodies[aIdx].m_invMass) - { - gBodies[aIdx].m_linVel = linVelA; - gBodies[aIdx].m_angVel = angVelA; - } else - { - gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0); - - } - if (gBodies[bIdx].m_invMass) - { - gBodies[bIdx].m_linVel = linVelB; - gBodies[bIdx].m_angVel = angVelB; - } else - { - gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0); - - } - -} - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void BatchSolveKernelContact(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - __global int* gN, - __global int* gOffsets, - __global int* batchSizes, - int maxBatch1, - int cellBatch, - int4 nSplit - ) -{ - //__local int ldsBatchIdx[WG_SIZE+1]; - __local int ldsCurBatch; - __local int ldsNextBatch; - __local int ldsStart; - - int lIdx = GET_LOCAL_IDX; - int wgIdx = GET_GROUP_IDX; - -// int gIdx = GET_GLOBAL_IDX; -// debugInfo[gIdx].m_valInt0 = gIdx; - //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; - - - - - int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplit.x*nSplit.y)/4)); - int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y); - - //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1); - //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1); - //int cellIdx = xIdx+yIdx*nSplit; - - if( gN[cellIdx] == 0 ) - return; - - int maxBatch = batchSizes[cellIdx]; - - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - - - - if( lIdx == 0 ) - { - ldsCurBatch = 0; - ldsNextBatch = 0; - ldsStart = start; - } - - - GROUP_LDS_BARRIER; - - int idx=ldsStart+lIdx; - while (ldsCurBatch < maxBatch) - { - for(; idx<end; ) - { - if (gConstraints[idx].m_batchIdx == ldsCurBatch) - { - solveContactConstraint( gBodies, gShapes, &gConstraints[idx] ); - - idx+=64; - } else - { - break; - } - } - GROUP_LDS_BARRIER; - - if( lIdx == 0 ) - { - ldsCurBatch++; - } - GROUP_LDS_BARRIER; - } - - -} - - - -__kernel void solveSingleContactKernel(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - int cellIdx, - int batchOffset, - int numConstraintsInBatch - ) -{ - - int index = get_global_id(0); - if (index < numConstraintsInBatch) - { - int idx=batchOffset+index; - solveContactConstraint( gBodies, gShapes, &gConstraints[idx] ); - } -} diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h deleted file mode 100644 index 15a049992b..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveContact.h +++ /dev/null @@ -1,393 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveContactCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define mymake_float4 (float4)\n" -"//#define make_float2 (float2)\n" -"//#define make_uint4 (uint4)\n" -"//#define make_int4 (int4)\n" -"//#define make_uint2 (uint2)\n" -"//#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = mymake_float4(a.xyz,0.f);\n" -" float4 b1 = mymake_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = mymake_float4(-n.xyz,0.f);\n" -" *angular0 = -cross3(r0, n);\n" -" *angular1 = cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" -"void solveContact(__global Constraint4* cs,\n" -" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" -" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);\n" -"void solveContact(__global Constraint4* cs,\n" -" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" -" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n" -"{\n" -" float minRambdaDt = 0;\n" -" float maxRambdaDt = FLT_MAX;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = cs->m_worldPos[ic] - posA;\n" -" float4 r1 = cs->m_worldPos[ic] - posB;\n" -" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" -" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" -" *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" -" rambdaDt *= cs->m_jacCoeffInv[ic];\n" -" {\n" -" float prevSum = cs->m_appliedRambdaDt[ic];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt );\n" -" updated = min2( updated, maxRambdaDt );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_appliedRambdaDt[ic] = updated;\n" -" }\n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" *linVelA += linImp0;\n" -" *angVelA += angImp0;\n" -" *linVelB += linImp1;\n" -" *angVelB += angImp1;\n" -" }\n" -"}\n" -"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" -" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n[0].z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n[0].z*k;\n" -" p[0].z = n[0].y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n[0].x*p[0].z;\n" -" q[0].z = n[0].x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n[0].y*k;\n" -" p[0].y = n[0].x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n[0].z*p[0].y;\n" -" q[0].y = n[0].z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" -"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" -"{\n" -" //float frictionCoeff = ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -" posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" -" if (gBodies[aIdx].m_invMass)\n" -" {\n" -" gBodies[aIdx].m_linVel = linVelA;\n" -" gBodies[aIdx].m_angVel = angVelA;\n" -" } else\n" -" {\n" -" gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" \n" -" }\n" -" if (gBodies[bIdx].m_invMass)\n" -" {\n" -" gBodies[bIdx].m_linVel = linVelB;\n" -" gBodies[bIdx].m_angVel = angVelB;\n" -" } else\n" -" {\n" -" gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" \n" -" }\n" -"}\n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void BatchSolveKernelContact(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" __global int* gN,\n" -" __global int* gOffsets,\n" -" __global int* batchSizes,\n" -" int maxBatch1,\n" -" int cellBatch,\n" -" int4 nSplit\n" -" )\n" -"{\n" -" //__local int ldsBatchIdx[WG_SIZE+1];\n" -" __local int ldsCurBatch;\n" -" __local int ldsNextBatch;\n" -" __local int ldsStart;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" int wgIdx = GET_GROUP_IDX;\n" -"// int gIdx = GET_GLOBAL_IDX;\n" -"// debugInfo[gIdx].m_valInt0 = gIdx;\n" -" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" -" \n" -" \n" -" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" -" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" -" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" -" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" -" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" -" //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" -" //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" -" //int cellIdx = xIdx+yIdx*nSplit;\n" -" \n" -" if( gN[cellIdx] == 0 ) \n" -" return;\n" -" int maxBatch = batchSizes[cellIdx];\n" -" \n" -" \n" -" const int start = gOffsets[cellIdx];\n" -" const int end = start + gN[cellIdx];\n" -" \n" -" \n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch = 0;\n" -" ldsNextBatch = 0;\n" -" ldsStart = start;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" int idx=ldsStart+lIdx;\n" -" while (ldsCurBatch < maxBatch)\n" -" {\n" -" for(; idx<end; )\n" -" {\n" -" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" -" {\n" -" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" idx+=64;\n" -" } else\n" -" {\n" -" break;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch++;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" \n" -" \n" -"}\n" -"__kernel void solveSingleContactKernel(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" int cellIdx,\n" -" int batchOffset,\n" -" int numConstraintsInBatch\n" -" )\n" -"{\n" -" int index = get_global_id(0);\n" -" if (index < numConstraintsInBatch)\n" -" {\n" -" int idx=batchOffset+index;\n" -" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" } \n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl deleted file mode 100644 index 1d70fbbae3..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl +++ /dev/null @@ -1,527 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define mymake_float4 (float4) -//#define make_float2 (float2) -//#define make_uint4 (uint4) -//#define make_int4 (int4) -//#define make_uint2 (uint2) -//#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// - - - - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = mymake_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - - - - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - - - - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1); - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = mymake_float4(-n.xyz,0.f); - *angular0 = -cross3(r0, n); - *angular1 = cross3(r1, n); -} - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ); - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1); - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/(jmj0+jmj1+jmj2+jmj3); -} -void btPlaneSpace1 (const float4* n, float4* p, float4* q); - void btPlaneSpace1 (const float4* n, float4* p, float4* q) -{ - if (fabs(n[0].z) > 0.70710678f) { - // choose p in y-z plane - float a = n[0].y*n[0].y + n[0].z*n[0].z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n[0].z*k; - p[0].z = n[0].y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n[0].x*p[0].z; - q[0].z = n[0].x*p[0].y; - } - else { - // choose p in x-y plane - float a = n[0].x*n[0].x + n[0].y*n[0].y; - float k = 1.f/sqrt(a); - p[0].x = -n[0].y*k; - p[0].y = n[0].x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n[0].z*p[0].y; - q[0].y = n[0].z*p[0].x; - q[0].z = a*k; - } -} - - -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs); -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) -{ - float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=ldsCs[0].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - -// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, -// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); - - - { - - __global Constraint4* cs = ldsCs; - - if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; - const float4 center = cs->m_center; - - float4 n = -cs->m_linear; - - float4 tangent[2]; - btPlaneSpace1(&n,&tangent[0],&tangent[1]); - float4 angular0, angular1, linear; - float4 r0 = center - posA; - float4 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ); - rambdaDt *= cs->m_fJacCoeffInv[i]; - - { - float prevSum = cs->m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt[i] ); - updated = min2( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs->m_fAppliedRambdaDt[i] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - { // angular damping for point constraint - float4 ab = normalize3( posB - posA ); - float4 ac = normalize3( center - posA ); - if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = dot3F4( n, angVelA ); - float angNB = dot3F4( n, angVelB ); - - angVelA -= (angNA*0.1f)*n; - angVelB -= (angNB*0.1f)*n; - } - } - } - - - - } - - if (gBodies[aIdx].m_invMass) - { - gBodies[aIdx].m_linVel = linVelA; - gBodies[aIdx].m_angVel = angVelA; - } else - { - gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0); - } - if (gBodies[bIdx].m_invMass) - { - gBodies[bIdx].m_linVel = linVelB; - gBodies[bIdx].m_angVel = angVelB; - } else - { - gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0); - } - - -} - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void BatchSolveKernelFriction(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - __global int* gN, - __global int* gOffsets, - __global int* batchSizes, - int maxBatch1, - int cellBatch, - int4 nSplit - ) -{ - //__local int ldsBatchIdx[WG_SIZE+1]; - __local int ldsCurBatch; - __local int ldsNextBatch; - __local int ldsStart; - - int lIdx = GET_LOCAL_IDX; - int wgIdx = GET_GROUP_IDX; - -// int gIdx = GET_GLOBAL_IDX; -// debugInfo[gIdx].m_valInt0 = gIdx; - //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; - - - int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplit.x*nSplit.y)/4)); - int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y); - - - if( gN[cellIdx] == 0 ) - return; - - int maxBatch = batchSizes[cellIdx]; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - - if( lIdx == 0 ) - { - ldsCurBatch = 0; - ldsNextBatch = 0; - ldsStart = start; - } - - - GROUP_LDS_BARRIER; - - int idx=ldsStart+lIdx; - while (ldsCurBatch < maxBatch) - { - for(; idx<end; ) - { - if (gConstraints[idx].m_batchIdx == ldsCurBatch) - { - - solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] ); - - idx+=64; - } else - { - break; - } - } - GROUP_LDS_BARRIER; - if( lIdx == 0 ) - { - ldsCurBatch++; - } - GROUP_LDS_BARRIER; - } - - -} - - - - - - -__kernel void solveSingleFrictionKernel(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - int cellIdx, - int batchOffset, - int numConstraintsInBatch - ) -{ - - int index = get_global_id(0); - if (index < numConstraintsInBatch) - { - - int idx=batchOffset+index; - - solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] ); - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h deleted file mode 100644 index eb58674f22..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solveFriction.h +++ /dev/null @@ -1,421 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveFrictionCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define mymake_float4 (float4)\n" -"//#define make_float2 (float2)\n" -"//#define make_uint4 (uint4)\n" -"//#define make_int4 (int4)\n" -"//#define make_uint2 (uint2)\n" -"//#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = mymake_float4(a.xyz,0.f);\n" -" float4 b1 = mymake_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = mymake_float4(-n.xyz,0.f);\n" -" *angular0 = -cross3(r0, n);\n" -" *angular1 = cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" -"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" -" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n[0].z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n[0].z*k;\n" -" p[0].z = n[0].y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n[0].x*p[0].z;\n" -" q[0].z = n[0].x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n[0].y*k;\n" -" p[0].y = n[0].x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n[0].z*p[0].y;\n" -" q[0].y = n[0].z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" -"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" -"{\n" -" float frictionCoeff = ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" \n" -" {\n" -" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" -" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" -" float sum = 0;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" sum +=ldsCs[0].m_appliedRambdaDt[j];\n" -" }\n" -" frictionCoeff = 0.7f;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" maxRambdaDt[j] = frictionCoeff*sum;\n" -" minRambdaDt[j] = -maxRambdaDt[j];\n" -" }\n" -" \n" -"// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -"// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" -" \n" -" \n" -" {\n" -" \n" -" __global Constraint4* cs = ldsCs;\n" -" \n" -" if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" -" const float4 center = cs->m_center;\n" -" \n" -" float4 n = -cs->m_linear;\n" -" \n" -" float4 tangent[2];\n" -" btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = center - posA;\n" -" float4 r1 = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" -" float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB );\n" -" rambdaDt *= cs->m_fJacCoeffInv[i];\n" -" \n" -" {\n" -" float prevSum = cs->m_fAppliedRambdaDt[i];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt[i] );\n" -" updated = min2( updated, maxRambdaDt[i] );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_fAppliedRambdaDt[i] = updated;\n" -" }\n" -" \n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" \n" -" linVelA += linImp0;\n" -" angVelA += angImp0;\n" -" linVelB += linImp1;\n" -" angVelB += angImp1;\n" -" }\n" -" { // angular damping for point constraint\n" -" float4 ab = normalize3( posB - posA );\n" -" float4 ac = normalize3( center - posA );\n" -" if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" -" {\n" -" float angNA = dot3F4( n, angVelA );\n" -" float angNB = dot3F4( n, angVelB );\n" -" \n" -" angVelA -= (angNA*0.1f)*n;\n" -" angVelB -= (angNB*0.1f)*n;\n" -" }\n" -" }\n" -" }\n" -" \n" -" \n" -" }\n" -" if (gBodies[aIdx].m_invMass)\n" -" {\n" -" gBodies[aIdx].m_linVel = linVelA;\n" -" gBodies[aIdx].m_angVel = angVelA;\n" -" } else\n" -" {\n" -" gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" }\n" -" if (gBodies[bIdx].m_invMass)\n" -" {\n" -" gBodies[bIdx].m_linVel = linVelB;\n" -" gBodies[bIdx].m_angVel = angVelB;\n" -" } else\n" -" {\n" -" gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" }\n" -" \n" -"}\n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void BatchSolveKernelFriction(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" __global int* gN,\n" -" __global int* gOffsets,\n" -" __global int* batchSizes,\n" -" int maxBatch1,\n" -" int cellBatch,\n" -" int4 nSplit\n" -" )\n" -"{\n" -" //__local int ldsBatchIdx[WG_SIZE+1];\n" -" __local int ldsCurBatch;\n" -" __local int ldsNextBatch;\n" -" __local int ldsStart;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" int wgIdx = GET_GROUP_IDX;\n" -"// int gIdx = GET_GLOBAL_IDX;\n" -"// debugInfo[gIdx].m_valInt0 = gIdx;\n" -" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" -" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" -" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" -" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" -" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" -" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" -" \n" -" if( gN[cellIdx] == 0 ) \n" -" return;\n" -" int maxBatch = batchSizes[cellIdx];\n" -" const int start = gOffsets[cellIdx];\n" -" const int end = start + gN[cellIdx];\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch = 0;\n" -" ldsNextBatch = 0;\n" -" ldsStart = start;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" int idx=ldsStart+lIdx;\n" -" while (ldsCurBatch < maxBatch)\n" -" {\n" -" for(; idx<end; )\n" -" {\n" -" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" -" {\n" -" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" idx+=64;\n" -" } else\n" -" {\n" -" break;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch++;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" \n" -" \n" -"}\n" -"__kernel void solveSingleFrictionKernel(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" int cellIdx,\n" -" int batchOffset,\n" -" int numConstraintsInBatch\n" -" )\n" -"{\n" -" int index = get_global_id(0);\n" -" if (index < numConstraintsInBatch)\n" -" {\n" -" \n" -" int idx=batchOffset+index;\n" -" \n" -" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" } \n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl deleted file mode 100644 index 8e2de7b5a6..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl +++ /dev/null @@ -1,277 +0,0 @@ - -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - - - -#define WG_SIZE 64 - - - - - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - - - - - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - - - -typedef struct -{ - int m_nContacts; - float m_dt; - float m_positionDrift; - float m_positionConstraintCoeff; -} ConstBufferCTC; - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, -int nContacts, -float dt, -float positionDrift, -float positionConstraintCoeff -) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - - b3ContactConstraint4_t cs; - - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, - &cs ); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -} - - - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h deleted file mode 100644 index eb1834ee00..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ /dev/null @@ -1,703 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetupCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#ifndef B3_CONTACT_CONSTRAINT5_H\n" -"#define B3_CONTACT_CONSTRAINT5_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n" -"struct b3ContactConstraint4\n" -"{\n" -" b3Float4 m_linear;//normal?\n" -" b3Float4 m_worldPos[4];\n" -" b3Float4 m_center; // friction\n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; // friction\n" -" float m_fAppliedRambdaDt[2]; // friction\n" -" unsigned int m_bodyA;\n" -" unsigned int m_bodyB;\n" -" int m_batchIdx;\n" -" unsigned int m_paddings;\n" -"};\n" -"//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n" -"inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n" -"{\n" -" return constraint->m_linear.w; \n" -"}\n" -"#endif //B3_CONTACT_CONSTRAINT5_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n" -" void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n" -"{\n" -" if (b3Fabs(n.z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n.y*n.y + n.z*n.z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n.z*k;\n" -" p[0].z = n.y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n.x*p[0].z;\n" -" q[0].z = n.x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n.x*n.x + n.y*n.y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n.y*k;\n" -" p[0].y = n.x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n.z*p[0].y;\n" -" q[0].y = n.z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -" \n" -"void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n" -"{\n" -" *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n" -" *angular0 = b3Cross3(r0, n);\n" -" *angular1 = -b3Cross3(r1, n);\n" -"}\n" -"float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n" -" b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n" -"{\n" -" return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n" -" float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" -"void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n" -" b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n" -" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" -" b3ContactConstraint4_t* dstC )\n" -"{\n" -" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" -" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -" float dtInv = 1.f/dt;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -" dstC->m_linear = src->m_worldNormalOnB;\n" -" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" b3Float4 r0 = src->m_worldPosB[ic] - posA;\n" -" b3Float4 r1 = src->m_worldPosB[ic] - posB;\n" -" if( ic >= src->m_worldNormalOnB.w )//npoints\n" -" {\n" -" dstC->m_jacCoeffInv[ic] = 0.f;\n" -" continue;\n" -" }\n" -" float relVelN;\n" -" {\n" -" b3Float4 linear, angular0, angular1;\n" -" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" -" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB);\n" -" float e = 0.f;//src->getRestituitionCoeff();\n" -" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -" dstC->m_b[ic] = e*relVelN;\n" -" //float penetration = src->m_worldPosB[ic].w;\n" -" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" }\n" -" if( src->m_worldNormalOnB.w > 0 )//npoints\n" -" { // prepare friction\n" -" b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" -" for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" -" center += src->m_worldPosB[i];\n" -" center /= (float)src->m_worldNormalOnB.w;\n" -" b3Float4 tangent[2];\n" -" b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" -" \n" -" b3Float4 r[2];\n" -" r[0] = center - posA;\n" -" r[1] = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" b3Float4 linear, angular0, angular1;\n" -" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" -" }\n" -" dstC->m_center = center;\n" -" }\n" -" for(int i=0; i<4; i++)\n" -" {\n" -" if( i<src->m_worldNormalOnB.w )\n" -" {\n" -" dstC->m_worldPos[i] = src->m_worldPosB[i];\n" -" }\n" -" else\n" -" {\n" -" dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" -" }\n" -" }\n" -"}\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float fastSqrtf(float f2)\n" -"{\n" -" return native_sqrt(f2);\n" -"// return sqrt(f2);\n" -"}\n" -"__inline\n" -"float fastRSqrt(float f2)\n" -"{\n" -" return native_rsqrt(f2);\n" -"}\n" -"__inline\n" -"float fastLength4(float4 v)\n" -"{\n" -" return fast_length(v);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float sqrtf(float a)\n" -"{\n" -"// return sqrt(a);\n" -" return native_sqrt(a);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float length3(const float4 a)\n" -"{\n" -" return sqrtf(dot3F4(a,a));\n" -"}\n" -"__inline\n" -"float dot4(const float4 a, const float4 b)\n" -"{\n" -" return dot( a, b );\n" -"}\n" -"// for height\n" -"__inline\n" -"float dot3w1(const float4 point, const float4 eqn)\n" -"{\n" -" return dot3F4(point,eqn) + eqn.w;\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 normalize4(const float4 a)\n" -"{\n" -" float length = sqrtf(dot4(a, a));\n" -" return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" -"{\n" -" float4 eqn;\n" -" float4 ab = b-a;\n" -" float4 ac = c-a;\n" -" eqn = normalize3( cross3(ab, ac) );\n" -" eqn.w = -dot3F4(eqn,a);\n" -" return eqn;\n" -"}\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -" \n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"typedef struct\n" -"{\n" -" int m_nContacts;\n" -" float m_dt;\n" -" float m_positionDrift;\n" -" float m_positionConstraintCoeff;\n" -"} ConstBufferCTC;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n" -"int nContacts,\n" -"float dt,\n" -"float positionDrift,\n" -"float positionConstraintCoeff\n" -")\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nContacts )\n" -" {\n" -" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" -" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n" -" b3ContactConstraint4_t cs;\n" -" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" -" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" -" &cs );\n" -" \n" -" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" -" gConstraintOut[gIdx] = cs;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl deleted file mode 100644 index 3dc48d4350..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl +++ /dev/null @@ -1,613 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - - - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -// others -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb ) -{ - int nContacts = cb.x; - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int srcIdx = sortData[gIdx].y; - out[gIdx] = in[srcIdx]; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sd; - sd.x = contactsIn[gIdx].m_childIndexB; - sd.y = gIdx; - sortDataOut[gIdx] = sd; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_childIndexA; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - - - - -typedef struct -{ - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; -} ConstBufferSSD; - - -__constant const int gridTable4x4[] = -{ - 0,1,17,16, - 1,2,18,19, - 17,18,32,3, - 16,19,3,34 -}; - -__constant const int gridTable8x8[] = -{ - 0, 2, 3, 16, 17, 18, 19, 1, - 66, 64, 80, 67, 82, 81, 65, 83, - 131,144,128,130,147,129,145,146, - 208,195,194,192,193,211,210,209, - 21, 22, 23, 5, 4, 6, 7, 20, - 86, 85, 69, 87, 70, 68, 84, 71, - 151,133,149,150,135,148,132,134, - 197,27,214,213,212,199,198,196 - -}; - - - - -#define USE_SPATIAL_BATCHING 1 -#define USE_4x4_GRID 1 - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, -int nContacts,float scale,int4 nSplit,int staticIdx) - -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit; - int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit; - - int aIdx = abs(aPtrAndSignBit ); - int bIdx = abs(bPtrAndSignBit); - - bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx); - bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx); - -#if USE_SPATIAL_BATCHING - int idx = (aStatic)? bIdx: aIdx; - float4 p = gBodies[idx].m_pos; - int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1); - int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1); - int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1); - int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y); - -#else//USE_SPATIAL_BATCHING - #if USE_4x4_GRID - int aa = aIdx&3; - int bb = bIdx&3; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*4; - int newIndex = gridTable4x4[gridIndex]; - #else//USE_4x4_GRID - int aa = aIdx&7; - int bb = bIdx&7; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*8; - int newIndex = gridTable8x8[gridIndex]; - #endif//USE_4x4_GRID -#endif//USE_SPATIAL_BATCHING - - - gSortDataOut[gIdx].x = newIndex; - gSortDataOut[gIdx].y = gIdx; - } - else - { - gSortDataOut[gIdx].x = 0xffffffff; - } -} - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb ) -{ - int gIdx = GET_GLOBAL_IDX; - if( gIdx < cb.x ) - { - gOut[gIdx] = gIn[gIdx]; - } -} - - - diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h deleted file mode 100644 index 1b5819f6cf..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ /dev/null @@ -1,601 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetup2CL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float fastSqrtf(float f2)\n" -"{\n" -" return native_sqrt(f2);\n" -"// return sqrt(f2);\n" -"}\n" -"__inline\n" -"float fastRSqrt(float f2)\n" -"{\n" -" return native_rsqrt(f2);\n" -"}\n" -"__inline\n" -"float fastLength4(float4 v)\n" -"{\n" -" return fast_length(v);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float sqrtf(float a)\n" -"{\n" -"// return sqrt(a);\n" -" return native_sqrt(a);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float length3(const float4 a)\n" -"{\n" -" return sqrtf(dot3F4(a,a));\n" -"}\n" -"__inline\n" -"float dot4(const float4 a, const float4 b)\n" -"{\n" -" return dot( a, b );\n" -"}\n" -"// for height\n" -"__inline\n" -"float dot3w1(const float4 point, const float4 eqn)\n" -"{\n" -" return dot3F4(point,eqn) + eqn.w;\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 normalize4(const float4 a)\n" -"{\n" -" float length = sqrtf(dot4(a, a));\n" -" return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" -"{\n" -" float4 eqn;\n" -" float4 ab = b-a;\n" -" float4 ac = c-a;\n" -" eqn = normalize3( cross3(ab, ac) );\n" -" eqn.w = -dot3F4(eqn,a);\n" -" return eqn;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"Matrix3x3 mtZero();\n" -"__inline\n" -"Matrix3x3 mtIdentity();\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"Matrix3x3 mtZero()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(0.f);\n" -" m.m_row[1] = (float4)(0.f);\n" -" m.m_row[2] = (float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtIdentity()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(1,0,0,0);\n" -" m.m_row[1] = (float4)(0,1,0,0);\n" -" m.m_row[2] = (float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m)\n" -"{\n" -" Matrix3x3 out;\n" -" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" -"{\n" -" Matrix3x3 transB;\n" -" transB = mtTranspose( b );\n" -" Matrix3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -" \n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"// others\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" -"{\n" -" int nContacts = cb.x;\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int srcIdx = sortData[gIdx].y;\n" -" out[gIdx] = in[srcIdx];\n" -" }\n" -"}\n" -"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sd;\n" -" sd.x = contactsIn[gIdx].m_childIndexB;\n" -" sd.y = gIdx;\n" -" sortDataOut[gIdx] = sd;\n" -" }\n" -"}\n" -"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sdIn;\n" -" sdIn = sortDataInOut[gIdx];\n" -" int2 sdOut;\n" -" sdOut.x = contactsIn[sdIn.y].m_childIndexA;\n" -" sdOut.y = sdIn.y;\n" -" sortDataInOut[gIdx] = sdOut;\n" -" }\n" -"}\n" -"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sdIn;\n" -" sdIn = sortDataInOut[gIdx];\n" -" int2 sdOut;\n" -" sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;\n" -" sdOut.y = sdIn.y;\n" -" sortDataInOut[gIdx] = sdOut;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sdIn;\n" -" sdIn = sortDataInOut[gIdx];\n" -" int2 sdOut;\n" -" sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;\n" -" sdOut.y = sdIn.y;\n" -" sortDataInOut[gIdx] = sdOut;\n" -" }\n" -"}\n" -"typedef struct\n" -"{\n" -" int m_nContacts;\n" -" int m_staticIdx;\n" -" float m_scale;\n" -" int m_nSplit;\n" -"} ConstBufferSSD;\n" -"__constant const int gridTable4x4[] = \n" -"{\n" -" 0,1,17,16,\n" -" 1,2,18,19,\n" -" 17,18,32,3,\n" -" 16,19,3,34\n" -"};\n" -"__constant const int gridTable8x8[] = \n" -"{\n" -" 0, 2, 3, 16, 17, 18, 19, 1,\n" -" 66, 64, 80, 67, 82, 81, 65, 83,\n" -" 131,144,128,130,147,129,145,146,\n" -" 208,195,194,192,193,211,210,209,\n" -" 21, 22, 23, 5, 4, 6, 7, 20,\n" -" 86, 85, 69, 87, 70, 68, 84, 71,\n" -" 151,133,149,150,135,148,132,134,\n" -" 197,27,214,213,212,199,198,196\n" -" \n" -"};\n" -"#define USE_SPATIAL_BATCHING 1\n" -"#define USE_4x4_GRID 1\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" -"int nContacts,float scale,int4 nSplit,int staticIdx)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nContacts )\n" -" {\n" -" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n" -" int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n" -" int aIdx = abs(aPtrAndSignBit );\n" -" int bIdx = abs(bPtrAndSignBit);\n" -" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n" -" bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n" -"#if USE_SPATIAL_BATCHING \n" -" int idx = (aStatic)? bIdx: aIdx;\n" -" float4 p = gBodies[idx].m_pos;\n" -" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);\n" -" int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);\n" -" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);\n" -" int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);\n" -" \n" -"#else//USE_SPATIAL_BATCHING\n" -" #if USE_4x4_GRID\n" -" int aa = aIdx&3;\n" -" int bb = bIdx&3;\n" -" if (aStatic)\n" -" aa = bb;\n" -" if (bStatic)\n" -" bb = aa;\n" -" int gridIndex = aa + bb*4;\n" -" int newIndex = gridTable4x4[gridIndex];\n" -" #else//USE_4x4_GRID\n" -" int aa = aIdx&7;\n" -" int bb = bIdx&7;\n" -" if (aStatic)\n" -" aa = bb;\n" -" if (bStatic)\n" -" bb = aa;\n" -" int gridIndex = aa + bb*8;\n" -" int newIndex = gridTable8x8[gridIndex];\n" -" #endif//USE_4x4_GRID\n" -"#endif//USE_SPATIAL_BATCHING\n" -" gSortDataOut[gIdx].x = newIndex;\n" -" gSortDataOut[gIdx].y = gIdx;\n" -" }\n" -" else\n" -" {\n" -" gSortDataOut[gIdx].x = 0xffffffff;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < cb.x )\n" -" {\n" -" gOut[gIdx] = gIn[gIdx];\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl deleted file mode 100644 index a21a08c3b4..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl +++ /dev/null @@ -1,968 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a1, float4 b1) -{ - - float4 a=make_float4(a1.xyz,0.f); - float4 b=make_float4(b1.xyz,0.f); - //float4 a=a1; - //float4 b=b1; - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - - - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - int m_batchIdx; - u32 m_paddings; -} Constraint4; - - - - - - -__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) -{ - int i = GET_GLOBAL_IDX; - - if( i < numContactManifolds) - { - int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; - bool isFixedA = (pa <0) || (pa == fixedBodyIndex); - int bodyIndexA = abs(pa); - if (!isFixedA) - { - AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x); - } - barrier(CLK_GLOBAL_MEM_FENCE); - int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; - bool isFixedB = (pb <0) || (pb == fixedBodyIndex); - int bodyIndexB = abs(pb); - if (!isFixedB) - { - AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y); - } - } -} - -__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies) -{ - int i = GET_GLOBAL_IDX; - - if( i < numSplitBodies) - { - linearVelocities[i] = make_float4(0); - angularVelocities[i] = make_float4(0); - } -} - - -__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, -__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) -{ - int i = GET_GLOBAL_IDX; - if (i<numBodies) - { - if (gBodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/((float)count); - float4 averageLinVel = make_float4(0.f); - float4 averageAngVel = make_float4(0.f); - - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - - }//bodies[i].m_invMass - }//i<numBodies -} - - - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = make_float4(n.xyz,0.f); - *angular0 = cross3(r0, n); - *angular1 = -cross3(r1, n); -} - - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB); -} - - -void btPlaneSpace1 (float4 n, float4* p, float4* q); - void btPlaneSpace1 (float4 n, float4* p, float4* q) -{ - if (fabs(n.z) > 0.70710678f) { - // choose p in y-z plane - float a = n.y*n.y + n.z*n.z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n.z*k; - p[0].z = n.y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n.x*p[0].z; - q[0].z = n.x*p[0].y; - } - else { - // choose p in x-y plane - float a = n.x*n.x + n.y*n.y; - float k = 1.f/sqrt(a); - p[0].x = -n.y*k; - p[0].y = n.x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n.z*p[0].y; - q[0].y = n.z*p[0].x; - q[0].z = a*k; - } -} - - - - - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB, - float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - float4 angular0, angular1, linear; - float4 r0 = cs->m_worldPos[ic] - posA; - float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); - - - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt ); - updated = min2( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - - if (invMassA) - { - *dLinVelA += linImp0; - *dAngVelA += angImp0; - } - if (invMassB) - { - *dLinVelB += linImp1; - *dAngVelB += angImp1; - } - } -} - - -// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - - -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, -__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, -__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) -{ - - //float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - float4 dLinVelA = make_float4(0,0,0,0); - float4 dAngVelA = make_float4(0,0,0,0); - float4 dLinVelB = make_float4(0,0,0,0); - float4 dAngVelB = make_float4(0,0,0,0); - - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[0].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - - if (invMassA) - { - dLinVelA = deltaLinearVelocities[splitIndexA]; - dAngVelA = deltaAngularVelocities[splitIndexA]; - } - - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[0].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - - if (invMassB) - { - dLinVelB = deltaLinearVelocities[splitIndexB]; - dAngVelB = deltaAngularVelocities[splitIndexB]; - } - - solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, - posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB); - - if (invMassA) - { - deltaLinearVelocities[splitIndexA] = dLinVelA; - deltaAngularVelocities[splitIndexA] = dAngVelA; - } - if (invMassB) - { - deltaLinearVelocities[splitIndexB] = dLinVelB; - deltaAngularVelocities[splitIndexB] = dAngVelB; - } - -} - - -__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , -__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, -float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds -) -{ - int i = GET_GLOBAL_IDX; - if (i<numManifolds) - { - solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - } -} - - - - -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, - __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) -{ - float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - float4 dLinVelA = make_float4(0,0,0,0); - float4 dAngVelA = make_float4(0,0,0,0); - float4 dLinVelB = make_float4(0,0,0,0); - float4 dAngVelB = make_float4(0,0,0,0); - - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[0].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - - if (invMassA) - { - dLinVelA = deltaLinearVelocities[splitIndexA]; - dAngVelA = deltaAngularVelocities[splitIndexA]; - } - - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[0].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - - if (invMassB) - { - dLinVelB = deltaLinearVelocities[splitIndexB]; - dAngVelB = deltaAngularVelocities[splitIndexB]; - } - - - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=ldsCs[0].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - -// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, -// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); - - - { - - __global Constraint4* cs = ldsCs; - - if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; - const float4 center = cs->m_center; - - float4 n = -cs->m_linear; - - float4 tangent[2]; - btPlaneSpace1(n,&tangent[0],&tangent[1]); - float4 angular0, angular1, linear; - float4 r0 = center - posA; - float4 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB ); - rambdaDt *= cs->m_fJacCoeffInv[i]; - - { - float prevSum = cs->m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt[i] ); - updated = min2( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs->m_fAppliedRambdaDt[i] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - { // angular damping for point constraint - float4 ab = normalize3( posB - posA ); - float4 ac = normalize3( center - posA ); - if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = dot3F4( n, angVelA ); - float angNB = dot3F4( n, angVelB ); - - dAngVelA -= (angNA*0.1f)*n; - dAngVelB -= (angNB*0.1f)*n; - } - } - } - - - - } - - if (invMassA) - { - deltaLinearVelocities[splitIndexA] = dLinVelA; - deltaAngularVelocities[splitIndexA] = dAngVelA; - } - if (invMassB) - { - deltaLinearVelocities[splitIndexB] = dLinVelB; - deltaAngularVelocities[splitIndexB] = dAngVelB; - } - - -} - - -__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , - __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, - float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds -) -{ - int i = GET_GLOBAL_IDX; - if (i<numManifolds) - { - solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - } -} - - -__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) -{ - int i = GET_GLOBAL_IDX; - if (i<numBodies) - { - if (gBodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset]; - gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } -} - - - -void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, - const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, - __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB, - Constraint4* dstC ) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f/dt; - for(int ic=0; ic<4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() ); - for(int ic=0; ic<4; ic++) - { - float4 r0 = src->m_worldPosB[ic] - posA; - float4 r1 = src->m_worldPosB[ic] - posB; - - if( ic >= src->m_worldNormalOnB.w )//npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - float4 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f;//src->getRestituitionCoeff(); - if( relVelN*relVelN < 0.004f ) e = 0.f; - - dstC->m_b[ic] = e*relVelN; - //float penetration = src->m_worldPosB[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if( src->m_worldNormalOnB.w > 0 )//npoints - { // prepare friction - float4 center = make_float4(0.f); - for(int i=0; i<src->m_worldNormalOnB.w; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB.w; - - float4 tangent[2]; - btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]); - - float4 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for(int i=0; i<2; i++) - { - float4 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for(int i=0; i<4; i++) - { - if( i<src->m_worldNormalOnB.w ) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, -__global const unsigned int* bodyCount, -int nContacts, -float dt, -float positionDrift, -float positionConstraintCoeff -) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - Constraint4 cs; - - float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1; - float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1; - - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, - &cs ); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h deleted file mode 100644 index c0173ad9f4..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ /dev/null @@ -1,909 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverUtilsCL= \ -"/*\n" -"Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float fastSqrtf(float f2)\n" -"{\n" -" return native_sqrt(f2);\n" -"// return sqrt(f2);\n" -"}\n" -"__inline\n" -"float fastRSqrt(float f2)\n" -"{\n" -" return native_rsqrt(f2);\n" -"}\n" -"__inline\n" -"float fastLength4(float4 v)\n" -"{\n" -" return fast_length(v);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float sqrtf(float a)\n" -"{\n" -"// return sqrt(a);\n" -" return native_sqrt(a);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a1, float4 b1)\n" -"{\n" -" float4 a=make_float4(a1.xyz,0.f);\n" -" float4 b=make_float4(b1.xyz,0.f);\n" -" //float4 a=a1;\n" -" //float4 b=b1;\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float length3(const float4 a)\n" -"{\n" -" return sqrtf(dot3F4(a,a));\n" -"}\n" -"__inline\n" -"float dot4(const float4 a, const float4 b)\n" -"{\n" -" return dot( a, b );\n" -"}\n" -"// for height\n" -"__inline\n" -"float dot3w1(const float4 point, const float4 eqn)\n" -"{\n" -" return dot3F4(point,eqn) + eqn.w;\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 normalize4(const float4 a)\n" -"{\n" -" float length = sqrtf(dot4(a, a));\n" -" return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" -"{\n" -" float4 eqn;\n" -" float4 ab = b-a;\n" -" float4 ac = c-a;\n" -" eqn = normalize3( cross3(ab, ac) );\n" -" eqn.w = -dot3F4(eqn,a);\n" -" return eqn;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"Matrix3x3 mtZero();\n" -"__inline\n" -"Matrix3x3 mtIdentity();\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"Matrix3x3 mtZero()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(0.f);\n" -" m.m_row[1] = (float4)(0.f);\n" -" m.m_row[2] = (float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtIdentity()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(1,0,0,0);\n" -" m.m_row[1] = (float4)(0,1,0,0);\n" -" m.m_row[2] = (float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m)\n" -"{\n" -" Matrix3x3 out;\n" -" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" -"{\n" -" Matrix3x3 transB;\n" -" transB = mtTranspose( b );\n" -" Matrix3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings;\n" -"} Constraint4;\n" -"__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" \n" -" if( i < numContactManifolds)\n" -" {\n" -" int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;\n" -" bool isFixedA = (pa <0) || (pa == fixedBodyIndex);\n" -" int bodyIndexA = abs(pa);\n" -" if (!isFixedA)\n" -" {\n" -" AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);\n" -" }\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;\n" -" bool isFixedB = (pb <0) || (pb == fixedBodyIndex);\n" -" int bodyIndexB = abs(pb);\n" -" if (!isFixedB)\n" -" {\n" -" AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);\n" -" } \n" -" }\n" -"}\n" -"__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" \n" -" if( i < numSplitBodies)\n" -" {\n" -" linearVelocities[i] = make_float4(0);\n" -" angularVelocities[i] = make_float4(0);\n" -" }\n" -"}\n" -"__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" -"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numBodies)\n" -" {\n" -" if (gBodies[i].m_invMass)\n" -" {\n" -" int bodyOffset = offsetSplitBodies[i];\n" -" int count = bodyCount[i];\n" -" float factor = 1.f/((float)count);\n" -" float4 averageLinVel = make_float4(0.f);\n" -" float4 averageAngVel = make_float4(0.f);\n" -" \n" -" for (int j=0;j<count;j++)\n" -" {\n" -" averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;\n" -" averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;\n" -" }\n" -" \n" -" for (int j=0;j<count;j++)\n" -" {\n" -" deltaLinearVelocities[bodyOffset+j] = averageLinVel;\n" -" deltaAngularVelocities[bodyOffset+j] = averageAngVel;\n" -" }\n" -" \n" -" }//bodies[i].m_invMass\n" -" }//i<numBodies\n" -"}\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = make_float4(n.xyz,0.f);\n" -" *angular0 = cross3(r0, n);\n" -" *angular1 = -cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);\n" -"}\n" -"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n" -" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n.z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n.y*n.y + n.z*n.z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n.z*k;\n" -" p[0].z = n.y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n.x*p[0].z;\n" -" q[0].z = n.x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n.x*n.x + n.y*n.y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n.y*k;\n" -" p[0].y = n.x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n.z*p[0].y;\n" -" q[0].y = n.z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void solveContact(__global Constraint4* cs,\n" -" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" -" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n" -" float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)\n" -"{\n" -" float minRambdaDt = 0;\n" -" float maxRambdaDt = FLT_MAX;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = cs->m_worldPos[ic] - posA;\n" -" float4 r1 = cs->m_worldPos[ic] - posB;\n" -" setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" -" \n" -" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" -" *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" -" rambdaDt *= cs->m_jacCoeffInv[ic];\n" -" \n" -" {\n" -" float prevSum = cs->m_appliedRambdaDt[ic];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt );\n" -" updated = min2( updated, maxRambdaDt );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_appliedRambdaDt[ic] = updated;\n" -" }\n" -" \n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" \n" -" if (invMassA)\n" -" {\n" -" *dLinVelA += linImp0;\n" -" *dAngVelA += angImp0;\n" -" }\n" -" if (invMassB)\n" -" {\n" -" *dLinVelB += linImp1;\n" -" *dAngVelB += angImp1;\n" -" }\n" -" }\n" -"}\n" -"// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" -"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" -"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" -"{\n" -" //float frictionCoeff = ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" \n" -" float4 dLinVelA = make_float4(0,0,0,0);\n" -" float4 dAngVelA = make_float4(0,0,0,0);\n" -" float4 dLinVelB = make_float4(0,0,0,0);\n" -" float4 dAngVelB = make_float4(0,0,0,0);\n" -" \n" -" int bodyOffsetA = offsetSplitBodies[aIdx];\n" -" int constraintOffsetA = contactConstraintOffsets[0].x;\n" -" int splitIndexA = bodyOffsetA+constraintOffsetA;\n" -" \n" -" if (invMassA)\n" -" {\n" -" dLinVelA = deltaLinearVelocities[splitIndexA];\n" -" dAngVelA = deltaAngularVelocities[splitIndexA];\n" -" }\n" -" int bodyOffsetB = offsetSplitBodies[bIdx];\n" -" int constraintOffsetB = contactConstraintOffsets[0].y;\n" -" int splitIndexB= bodyOffsetB+constraintOffsetB;\n" -" if (invMassB)\n" -" {\n" -" dLinVelB = deltaLinearVelocities[splitIndexB];\n" -" dAngVelB = deltaAngularVelocities[splitIndexB];\n" -" }\n" -" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -" posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n" -" if (invMassA)\n" -" {\n" -" deltaLinearVelocities[splitIndexA] = dLinVelA;\n" -" deltaAngularVelocities[splitIndexA] = dAngVelA;\n" -" } \n" -" if (invMassB)\n" -" {\n" -" deltaLinearVelocities[splitIndexB] = dLinVelB;\n" -" deltaAngularVelocities[splitIndexB] = dAngVelB;\n" -" }\n" -"}\n" -"__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" -"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" -"float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" -")\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numManifolds)\n" -" {\n" -" solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -" }\n" -"}\n" -"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,\n" -" __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" -" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" -"{\n" -" float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" \n" -" float4 dLinVelA = make_float4(0,0,0,0);\n" -" float4 dAngVelA = make_float4(0,0,0,0);\n" -" float4 dLinVelB = make_float4(0,0,0,0);\n" -" float4 dAngVelB = make_float4(0,0,0,0);\n" -" \n" -" int bodyOffsetA = offsetSplitBodies[aIdx];\n" -" int constraintOffsetA = contactConstraintOffsets[0].x;\n" -" int splitIndexA = bodyOffsetA+constraintOffsetA;\n" -" \n" -" if (invMassA)\n" -" {\n" -" dLinVelA = deltaLinearVelocities[splitIndexA];\n" -" dAngVelA = deltaAngularVelocities[splitIndexA];\n" -" }\n" -" int bodyOffsetB = offsetSplitBodies[bIdx];\n" -" int constraintOffsetB = contactConstraintOffsets[0].y;\n" -" int splitIndexB= bodyOffsetB+constraintOffsetB;\n" -" if (invMassB)\n" -" {\n" -" dLinVelB = deltaLinearVelocities[splitIndexB];\n" -" dAngVelB = deltaAngularVelocities[splitIndexB];\n" -" }\n" -" {\n" -" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" -" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" -" float sum = 0;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" sum +=ldsCs[0].m_appliedRambdaDt[j];\n" -" }\n" -" frictionCoeff = 0.7f;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" maxRambdaDt[j] = frictionCoeff*sum;\n" -" minRambdaDt[j] = -maxRambdaDt[j];\n" -" }\n" -" \n" -"// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -"// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" -" \n" -" \n" -" {\n" -" \n" -" __global Constraint4* cs = ldsCs;\n" -" \n" -" if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" -" const float4 center = cs->m_center;\n" -" \n" -" float4 n = -cs->m_linear;\n" -" \n" -" float4 tangent[2];\n" -" btPlaneSpace1(n,&tangent[0],&tangent[1]);\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = center - posA;\n" -" float4 r1 = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" -" float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );\n" -" rambdaDt *= cs->m_fJacCoeffInv[i];\n" -" \n" -" {\n" -" float prevSum = cs->m_fAppliedRambdaDt[i];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt[i] );\n" -" updated = min2( updated, maxRambdaDt[i] );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_fAppliedRambdaDt[i] = updated;\n" -" }\n" -" \n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" \n" -" dLinVelA += linImp0;\n" -" dAngVelA += angImp0;\n" -" dLinVelB += linImp1;\n" -" dAngVelB += angImp1;\n" -" }\n" -" { // angular damping for point constraint\n" -" float4 ab = normalize3( posB - posA );\n" -" float4 ac = normalize3( center - posA );\n" -" if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" -" {\n" -" float angNA = dot3F4( n, angVelA );\n" -" float angNB = dot3F4( n, angVelB );\n" -" \n" -" dAngVelA -= (angNA*0.1f)*n;\n" -" dAngVelB -= (angNB*0.1f)*n;\n" -" }\n" -" }\n" -" }\n" -" \n" -" \n" -" }\n" -" if (invMassA)\n" -" {\n" -" deltaLinearVelocities[splitIndexA] = dLinVelA;\n" -" deltaAngularVelocities[splitIndexA] = dAngVelA;\n" -" } \n" -" if (invMassB)\n" -" {\n" -" deltaLinearVelocities[splitIndexB] = dLinVelB;\n" -" deltaAngularVelocities[splitIndexB] = dAngVelB;\n" -" }\n" -" \n" -"}\n" -"__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" -" __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" -" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" -" float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" -")\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numManifolds)\n" -" {\n" -" solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -" }\n" -"}\n" -"__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" -" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numBodies)\n" -" {\n" -" if (gBodies[i].m_invMass)\n" -" {\n" -" int bodyOffset = offsetSplitBodies[i];\n" -" int count = bodyCount[i];\n" -" if (count)\n" -" {\n" -" gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset];\n" -" gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset];\n" -" }\n" -" }\n" -" }\n" -"}\n" -"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" -" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" -" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" -" Constraint4* dstC )\n" -"{\n" -" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" -" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -" float dtInv = 1.f/dt;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -" dstC->m_linear = src->m_worldNormalOnB;\n" -" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" float4 r0 = src->m_worldPosB[ic] - posA;\n" -" float4 r1 = src->m_worldPosB[ic] - posB;\n" -" if( ic >= src->m_worldNormalOnB.w )//npoints\n" -" {\n" -" dstC->m_jacCoeffInv[ic] = 0.f;\n" -" continue;\n" -" }\n" -" float relVelN;\n" -" {\n" -" float4 linear, angular0, angular1;\n" -" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" -" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" -" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB);\n" -" float e = 0.f;//src->getRestituitionCoeff();\n" -" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -" dstC->m_b[ic] = e*relVelN;\n" -" //float penetration = src->m_worldPosB[ic].w;\n" -" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" }\n" -" if( src->m_worldNormalOnB.w > 0 )//npoints\n" -" { // prepare friction\n" -" float4 center = make_float4(0.f);\n" -" for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" -" center += src->m_worldPosB[i];\n" -" center /= (float)src->m_worldNormalOnB.w;\n" -" float4 tangent[2];\n" -" btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" -" \n" -" float4 r[2];\n" -" r[0] = center - posA;\n" -" r[1] = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" float4 linear, angular0, angular1;\n" -" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" -" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" -" }\n" -" dstC->m_center = center;\n" -" }\n" -" for(int i=0; i<4; i++)\n" -" {\n" -" if( i<src->m_worldNormalOnB.w )\n" -" {\n" -" dstC->m_worldPos[i] = src->m_worldPosB[i];\n" -" }\n" -" else\n" -" {\n" -" dstC->m_worldPos[i] = make_float4(0.f);\n" -" }\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" -"__global const unsigned int* bodyCount,\n" -"int nContacts,\n" -"float dt,\n" -"float positionDrift,\n" -"float positionConstraintCoeff\n" -")\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nContacts )\n" -" {\n" -" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" -" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" Constraint4 cs;\n" -" float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;\n" -" float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;\n" -" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" -" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" -" &cs );\n" -" \n" -" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" -" gConstraintOut[gIdx] = cs;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl deleted file mode 100644 index ba8ba735d0..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl +++ /dev/null @@ -1,22 +0,0 @@ - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" - - -__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB) -{ - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB); - } -} - -__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs) -{ - int pairId = get_global_id(0); - if( pairId< numPairs ) - { - pairs[pairId].z = 0xffffffff; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h deleted file mode 100644 index d70e74017a..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ /dev/null @@ -1,483 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* updateAabbsKernelCL= \ -"#ifndef B3_UPDATE_AABBS_H\n" -"#define B3_UPDATE_AABBS_H\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -" union\n" -" {\n" -" float m_min[4];\n" -" b3Float4 m_minVec;\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float m_max[4];\n" -" b3Float4 m_maxVec;\n" -" int m_signedMaxIndices[4];\n" -" };\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -" b3Float4ConstArg pos,\n" -" b3QuatConstArg orn,\n" -" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -" b3Mat3x3 m;\n" -" m = b3QuatGetRotationMatrix(orn);\n" -" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -" \n" -" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -" 0.f);\n" -" *aabbMinOut = center-extent;\n" -" *aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -" SHAPE_HEIGHT_FIELD=1,\n" -" SHAPE_CONVEX_HULL=3,\n" -" SHAPE_PLANE=4,\n" -" SHAPE_CONCAVE_TRIMESH=5,\n" -" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -" SHAPE_SPHERE=7,\n" -" MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" union\n" -" {\n" -" float m_radius;\n" -" int m_compoundBvhIndex;\n" -" };\n" -" int m_shapeType;\n" -" union\n" -" {\n" -" int m_shapeIndex;\n" -" float m_height;\n" -" };\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -" b3Float4 m_childPosition;\n" -" b3Quat m_childOrientation;\n" -" union\n" -" {\n" -" int m_shapeIndex;//used for SHAPE_COMPOUND_OF_CONVEX_HULLS\n" -" int m_capsuleAxis;\n" -" };\n" -" union \n" -" {\n" -" float m_radius;//used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES\n" -" int m_numChildShapes;//used for compound shape\n" -" };\n" -" union \n" -" {\n" -" float m_height;//used for childshape of SHAPE_COMPOUND_OF_CAPSULES\n" -" int m_collidableShapeIndex;\n" -" };\n" -" int m_shapeType;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -" int m_bodyIndexA;\n" -" int m_bodyIndexB;\n" -"// int m_pairType;\n" -" int m_childShapeIndexA;\n" -" int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n" -"{\n" -" __global const b3RigidBodyData_t* body = &bodies[bodyId];\n" -" b3Float4 position = body->m_pos;\n" -" b3Quat orientation = body->m_quat;\n" -" \n" -" int collidableIndex = body->m_collidableIdx;\n" -" int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" -" \n" -" if (shapeIndex>=0)\n" -" {\n" -" \n" -" b3Aabb_t localAabb = localShapeAABB[collidableIndex];\n" -" b3Aabb_t worldAabb;\n" -" \n" -" b3Float4 aabbAMinOut,aabbAMaxOut; \n" -" float margin = 0.f;\n" -" b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);\n" -" \n" -" worldAabb.m_minVec =aabbAMinOut;\n" -" worldAabb.m_minIndices[3] = bodyId;\n" -" worldAabb.m_maxVec = aabbAMaxOut;\n" -" worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;\n" -" worldAabbs[bodyId] = worldAabb;\n" -" }\n" -"}\n" -"#endif //B3_UPDATE_AABBS_H\n" -"__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB)\n" -"{\n" -" int nodeID = get_global_id(0);\n" -" if( nodeID < numNodes )\n" -" {\n" -" b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB);\n" -" }\n" -"}\n" -"__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs)\n" -"{\n" -" int pairId = get_global_id(0);\n" -" if( pairId< numPairs )\n" -" {\n" -" pairs[pairId].z = 0xffffffff;\n" -" }\n" -"}\n" -; diff --git a/thirdparty/bullet/src/Bullet3OpenCL/premake4.lua b/thirdparty/bullet/src/Bullet3OpenCL/premake4.lua deleted file mode 100644 index 55a8613634..0000000000 --- a/thirdparty/bullet/src/Bullet3OpenCL/premake4.lua +++ /dev/null @@ -1,29 +0,0 @@ -function createProject(vendor) - hasCL = findOpenCL(vendor) - - if (hasCL) then - - project ("Bullet3OpenCL_" .. vendor) - - initOpenCL(vendor) - - kind "StaticLib" - - - includedirs { - ".",".." - } - - files { - "**.cpp", - "**.h" - } - - end -end - -createProject("clew") -createProject("AMD") -createProject("Intel") -createProject("NVIDIA") -createProject("Apple") |