diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL')
122 files changed, 0 insertions, 56183 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h deleted file mode 100644 index b296992525..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h +++ /dev/null @@ -1,42 +0,0 @@ - -#ifndef B3_GPU_BROADPHASE_INTERFACE_H -#define B3_GPU_BROADPHASE_INTERFACE_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3Vector3.h" -#include "b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" - -class b3GpuBroadphaseInterface -{ -public: - typedef class b3GpuBroadphaseInterface*(CreateFunc)(cl_context ctx, cl_device_id device, cl_command_queue q); - - virtual ~b3GpuBroadphaseInterface() - { - } - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) = 0; - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) = 0; - - virtual void calculateOverlappingPairs(int maxPairs) = 0; - virtual void calculateOverlappingPairsHost(int maxPairs) = 0; - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu() = 0; - - virtual cl_mem getAabbBufferWS() = 0; - virtual int getNumOverlap() = 0; - virtual cl_mem getOverlappingPairBuffer() = 0; - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() = 0; - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() = 0; - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() = 0; - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() = 0; - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() = 0; -}; - -#endif //B3_GPU_BROADPHASE_INTERFACE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp deleted file mode 100644 index e714fadac3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.cpp +++ /dev/null @@ -1,338 +0,0 @@ - -#include "b3GpuGridBroadphase.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "kernels/gridBroadphaseKernels.h" -#include "kernels/sapKernels.h" -//#include "kernels/gridBroadphase.cl" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" -#define B3_GRID_BROADPHASE_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl" - -cl_kernel kCalcHashAABB; -cl_kernel kClearCellStart; -cl_kernel kFindCellStart; -cl_kernel kFindOverlappingPairs; -cl_kernel m_copyAabbsKernel; -cl_kernel m_sap2Kernel; - -//int maxPairsPerBody = 64; -int maxBodiesPerCell = 256; //?? - -b3GpuGridBroadphase::b3GpuGridBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q) - : m_context(ctx), - m_device(device), - m_queue(q), - m_allAabbsGPU1(ctx, q), - m_smallAabbsMappingGPU(ctx, q), - m_largeAabbsMappingGPU(ctx, q), - m_gpuPairs(ctx, q), - - m_hashGpu(ctx, q), - - m_cellStartGpu(ctx, q), - m_paramsGPU(ctx, q) -{ - b3Vector3 gridSize = b3MakeVector3(3, 3, 3); - b3Vector3 invGridSize = b3MakeVector3(1.f / gridSize[0], 1.f / gridSize[1], 1.f / gridSize[2]); - - m_paramsCPU.m_gridSize[0] = 128; - m_paramsCPU.m_gridSize[1] = 128; - m_paramsCPU.m_gridSize[2] = 128; - m_paramsCPU.m_gridSize[3] = maxBodiesPerCell; - m_paramsCPU.setMaxBodiesPerCell(maxBodiesPerCell); - m_paramsCPU.m_invCellSize[0] = invGridSize[0]; - m_paramsCPU.m_invCellSize[1] = invGridSize[1]; - m_paramsCPU.m_invCellSize[2] = invGridSize[2]; - m_paramsCPU.m_invCellSize[3] = 0.f; - m_paramsGPU.push_back(m_paramsCPU); - - cl_int errNum = 0; - - { - const char* sapSrc = sapCL; - cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH); - b3Assert(errNum == CL_SUCCESS); - m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg); - m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg); - b3Assert(errNum == CL_SUCCESS); - } - - { - cl_program gridProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, gridBroadphaseCL, &errNum, "", B3_GRID_BROADPHASE_PATH); - b3Assert(errNum == CL_SUCCESS); - - kCalcHashAABB = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kCalcHashAABB", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - - kClearCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kClearCellStart", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - - kFindCellStart = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kFindCellStart", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - - kFindOverlappingPairs = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, gridBroadphaseCL, "kFindOverlappingPairs", &errNum, gridProg); - b3Assert(errNum == CL_SUCCESS); - } - - m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue); -} -b3GpuGridBroadphase::~b3GpuGridBroadphase() -{ - clReleaseKernel(kCalcHashAABB); - clReleaseKernel(kClearCellStart); - clReleaseKernel(kFindCellStart); - clReleaseKernel(kFindOverlappingPairs); - clReleaseKernel(m_sap2Kernel); - clReleaseKernel(m_copyAabbsKernel); - - delete m_sorter; -} - -void b3GpuGridBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size(); //NOT userPtr; - m_smallAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); - - m_allAabbsCPU1.push_back(aabb); -} -void b3GpuGridBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU1.size(); //NOT userPtr; - m_largeAabbsMappingCPU.push_back(m_allAabbsCPU1.size()); - - m_allAabbsCPU1.push_back(aabb); -} - -void b3GpuGridBroadphase::calculateOverlappingPairs(int maxPairs) -{ - B3_PROFILE("b3GpuGridBroadphase::calculateOverlappingPairs"); - - if (0) - { - calculateOverlappingPairsHost(maxPairs); - /* - b3AlignedObjectArray<b3Int4> cpuPairs; - m_gpuPairs.copyToHost(cpuPairs); - printf("host m_gpuPairs.size()=%d\n",m_gpuPairs.size()); - for (int i=0;i<m_gpuPairs.size();i++) - { - printf("host pair %d = %d,%d\n",i,cpuPairs[i].x,cpuPairs[i].y); - } - */ - return; - } - - int numSmallAabbs = m_smallAabbsMappingGPU.size(); - - b3OpenCLArray<int> pairCount(m_context, m_queue); - pairCount.push_back(0); - m_gpuPairs.resize(maxPairs); //numSmallAabbs*maxPairsPerBody); - - { - int numLargeAabbs = m_largeAabbsMappingGPU.size(); - if (numLargeAabbs && numSmallAabbs) - { - B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU1.getBufferCL()), - b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_gpuPairs.getBufferCL()), - b3BufferInfoCL(pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbs); - launcher.setConst(numSmallAabbs); - launcher.setConst(0); //axis is not used - launcher.setConst(maxPairs); - //@todo: use actual maximum work item sizes of the device instead of hardcoded values - launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64); - - int numPairs = pairCount.at(0); - - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - } - } - } - - if (numSmallAabbs) - { - B3_PROFILE("gridKernel"); - m_hashGpu.resize(numSmallAabbs); - { - B3_PROFILE("kCalcHashAABB"); - b3LauncherCL launch(m_queue, kCalcHashAABB, "kCalcHashAABB"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_allAabbsGPU1.getBufferCL()); - launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(this->m_paramsGPU.getBufferCL()); - launch.launch1D(numSmallAabbs); - } - - m_sorter->execute(m_hashGpu); - - int numCells = this->m_paramsCPU.m_gridSize[0] * this->m_paramsCPU.m_gridSize[1] * this->m_paramsCPU.m_gridSize[2]; - m_cellStartGpu.resize(numCells); - //b3AlignedObjectArray<int > cellStartCpu; - - { - B3_PROFILE("kClearCellStart"); - b3LauncherCL launch(m_queue, kClearCellStart, "kClearCellStart"); - launch.setConst(numCells); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - launch.launch1D(numCells); - //m_cellStartGpu.copyToHost(cellStartCpu); - //printf("??\n"); - } - - { - B3_PROFILE("kFindCellStart"); - b3LauncherCL launch(m_queue, kFindCellStart, "kFindCellStart"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - launch.launch1D(numSmallAabbs); - //m_cellStartGpu.copyToHost(cellStartCpu); - //printf("??\n"); - } - - { - B3_PROFILE("kFindOverlappingPairs"); - - b3LauncherCL launch(m_queue, kFindOverlappingPairs, "kFindOverlappingPairs"); - launch.setConst(numSmallAabbs); - launch.setBuffer(m_allAabbsGPU1.getBufferCL()); - launch.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launch.setBuffer(m_hashGpu.getBufferCL()); - launch.setBuffer(m_cellStartGpu.getBufferCL()); - - launch.setBuffer(m_paramsGPU.getBufferCL()); - //launch.setBuffer(0); - launch.setBuffer(pairCount.getBufferCL()); - launch.setBuffer(m_gpuPairs.getBufferCL()); - - launch.setConst(maxPairs); - launch.launch1D(numSmallAabbs); - - int numPairs = pairCount.at(0); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - } - - m_gpuPairs.resize(numPairs); - - if (0) - { - b3AlignedObjectArray<b3Int4> pairsCpu; - m_gpuPairs.copyToHost(pairsCpu); - - int sz = m_gpuPairs.size(); - printf("m_gpuPairs.size()=%d\n", sz); - for (int i = 0; i < m_gpuPairs.size(); i++) - { - printf("pair %d = %d,%d\n", i, pairsCpu[i].x, pairsCpu[i].y); - } - - printf("?!?\n"); - } - } - } - - //calculateOverlappingPairsHost(maxPairs); -} -void b3GpuGridBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - m_hostPairs.resize(0); - m_allAabbsGPU1.copyToHost(m_allAabbsCPU1); - for (int i = 0; i < m_allAabbsCPU1.size(); i++) - { - for (int j = i + 1; j < m_allAabbsCPU1.size(); j++) - { - if (b3TestAabbAgainstAabb2(m_allAabbsCPU1[i].m_minVec, m_allAabbsCPU1[i].m_maxVec, - m_allAabbsCPU1[j].m_minVec, m_allAabbsCPU1[j].m_maxVec)) - { - b3Int4 pair; - int a = m_allAabbsCPU1[j].m_minIndices[3]; - int b = m_allAabbsCPU1[i].m_minIndices[3]; - if (a <= b) - { - pair.x = a; - pair.y = b; //store the original index in the unsorted aabb array - } - else - { - pair.x = b; - pair.y = a; //store the original index in the unsorted aabb array - } - - if (m_hostPairs.size() < maxPairs) - { - m_hostPairs.push_back(pair); - } - } - } - } - - m_gpuPairs.copyFromHost(m_hostPairs); -} - -//call writeAabbsToGpu after done making all changes (createProxy etc) -void b3GpuGridBroadphase::writeAabbsToGpu() -{ - m_allAabbsGPU1.copyFromHost(m_allAabbsCPU1); - m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); - m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); -} - -cl_mem b3GpuGridBroadphase::getAabbBufferWS() -{ - return this->m_allAabbsGPU1.getBufferCL(); -} -int b3GpuGridBroadphase::getNumOverlap() -{ - return m_gpuPairs.size(); -} -cl_mem b3GpuGridBroadphase::getOverlappingPairBuffer() -{ - return m_gpuPairs.getBufferCL(); -} - -b3OpenCLArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsGPU() -{ - return m_allAabbsGPU1; -} - -b3AlignedObjectArray<b3SapAabb>& b3GpuGridBroadphase::getAllAabbsCPU() -{ - return m_allAabbsCPU1; -} - -b3OpenCLArray<b3Int4>& b3GpuGridBroadphase::getOverlappingPairsGPU() -{ - return m_gpuPairs; -} -b3OpenCLArray<int>& b3GpuGridBroadphase::getSmallAabbIndicesGPU() -{ - return m_smallAabbsMappingGPU; -} -b3OpenCLArray<int>& b3GpuGridBroadphase::getLargeAabbIndicesGPU() -{ - return m_largeAabbsMappingGPU; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h deleted file mode 100644 index b76cb43b68..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuGridBroadphase.h +++ /dev/null @@ -1,80 +0,0 @@ -#ifndef B3_GPU_GRID_BROADPHASE_H -#define B3_GPU_GRID_BROADPHASE_H - -#include "b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -struct b3ParamsGridBroadphaseCL -{ - float m_invCellSize[4]; - int m_gridSize[4]; - - int getMaxBodiesPerCell() const - { - return m_gridSize[3]; - } - - void setMaxBodiesPerCell(int maxOverlap) - { - m_gridSize[3] = maxOverlap; - } -}; - -class b3GpuGridBroadphase : public b3GpuBroadphaseInterface -{ -protected: - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3OpenCLArray<b3SapAabb> m_allAabbsGPU1; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU1; - - b3OpenCLArray<int> m_smallAabbsMappingGPU; - b3AlignedObjectArray<int> m_smallAabbsMappingCPU; - - b3OpenCLArray<int> m_largeAabbsMappingGPU; - b3AlignedObjectArray<int> m_largeAabbsMappingCPU; - - b3AlignedObjectArray<b3Int4> m_hostPairs; - b3OpenCLArray<b3Int4> m_gpuPairs; - - b3OpenCLArray<b3SortData> m_hashGpu; - b3OpenCLArray<int> m_cellStartGpu; - - b3ParamsGridBroadphaseCL m_paramsCPU; - b3OpenCLArray<b3ParamsGridBroadphaseCL> m_paramsGPU; - - class b3RadixSort32CL* m_sorter; - -public: - b3GpuGridBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q); - virtual ~b3GpuGridBroadphase(); - - static b3GpuBroadphaseInterface* CreateFunc(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuGridBroadphase(ctx, device, q); - } - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual cl_mem getAabbBufferWS(); - virtual int getNumOverlap(); - virtual cl_mem getOverlappingPairBuffer(); - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU(); - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU(); - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU(); - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU(); - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU(); -}; - -#endif //B3_GPU_GRID_BROADPHASE_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp deleted file mode 100644 index 616fc34f3a..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.cpp +++ /dev/null @@ -1,557 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#include "b3GpuParallelLinearBvh.h" - -b3GpuParallelLinearBvh::b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue) : m_queue(queue), - m_radixSorter(context, device, queue), - - m_rootNodeIndex(context, queue), - m_maxDistanceFromRoot(context, queue), - m_temp(context, queue), - - m_internalNodeAabbs(context, queue), - m_internalNodeLeafIndexRanges(context, queue), - m_internalNodeChildNodes(context, queue), - m_internalNodeParentNodes(context, queue), - - m_commonPrefixes(context, queue), - m_commonPrefixLengths(context, queue), - m_distanceFromRoot(context, queue), - - m_leafNodeParentNodes(context, queue), - m_mortonCodesAndAabbIndicies(context, queue), - m_mergedAabb(context, queue), - m_leafNodeAabbs(context, queue), - - m_largeAabbs(context, queue) -{ - m_rootNodeIndex.resize(1); - m_maxDistanceFromRoot.resize(1); - m_temp.resize(1); - - // - const char CL_PROGRAM_PATH[] = "src/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl"; - - const char* kernelSource = parallelLinearBvhCL; //parallelLinearBvhCL.h - cl_int error; - char* additionalMacros = 0; - m_parallelLinearBvhProgram = b3OpenCLUtils::compileCLProgramFromString(context, device, kernelSource, &error, additionalMacros, CL_PROGRAM_PATH); - b3Assert(m_parallelLinearBvhProgram); - - m_separateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "separateAabbs", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_separateAabbsKernel); - m_findAllNodesMergedAabbKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findAllNodesMergedAabb", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_findAllNodesMergedAabbKernel); - m_assignMortonCodesAndAabbIndiciesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "assignMortonCodesAndAabbIndicies", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_assignMortonCodesAndAabbIndiciesKernel); - - m_computeAdjacentPairCommonPrefixKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "computeAdjacentPairCommonPrefix", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_computeAdjacentPairCommonPrefixKernel); - m_buildBinaryRadixTreeLeafNodesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeLeafNodes", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_buildBinaryRadixTreeLeafNodesKernel); - m_buildBinaryRadixTreeInternalNodesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeInternalNodes", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_buildBinaryRadixTreeInternalNodesKernel); - m_findDistanceFromRootKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findDistanceFromRoot", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_findDistanceFromRootKernel); - m_buildBinaryRadixTreeAabbsRecursiveKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "buildBinaryRadixTreeAabbsRecursive", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_buildBinaryRadixTreeAabbsRecursiveKernel); - - m_findLeafIndexRangesKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "findLeafIndexRanges", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_findLeafIndexRangesKernel); - - m_plbvhCalculateOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhCalculateOverlappingPairs", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhCalculateOverlappingPairsKernel); - m_plbvhRayTraverseKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhRayTraverse", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhRayTraverseKernel); - m_plbvhLargeAabbAabbTestKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhLargeAabbAabbTest", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhLargeAabbAabbTestKernel); - m_plbvhLargeAabbRayTestKernel = b3OpenCLUtils::compileCLKernelFromString(context, device, kernelSource, "plbvhLargeAabbRayTest", &error, m_parallelLinearBvhProgram, additionalMacros); - b3Assert(m_plbvhLargeAabbRayTestKernel); -} - -b3GpuParallelLinearBvh::~b3GpuParallelLinearBvh() -{ - clReleaseKernel(m_separateAabbsKernel); - clReleaseKernel(m_findAllNodesMergedAabbKernel); - clReleaseKernel(m_assignMortonCodesAndAabbIndiciesKernel); - - clReleaseKernel(m_computeAdjacentPairCommonPrefixKernel); - clReleaseKernel(m_buildBinaryRadixTreeLeafNodesKernel); - clReleaseKernel(m_buildBinaryRadixTreeInternalNodesKernel); - clReleaseKernel(m_findDistanceFromRootKernel); - clReleaseKernel(m_buildBinaryRadixTreeAabbsRecursiveKernel); - - clReleaseKernel(m_findLeafIndexRangesKernel); - - clReleaseKernel(m_plbvhCalculateOverlappingPairsKernel); - clReleaseKernel(m_plbvhRayTraverseKernel); - clReleaseKernel(m_plbvhLargeAabbAabbTestKernel); - clReleaseKernel(m_plbvhLargeAabbRayTestKernel); - - clReleaseProgram(m_parallelLinearBvhProgram); -} - -void b3GpuParallelLinearBvh::build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices, - const b3OpenCLArray<int>& largeAabbIndices) -{ - B3_PROFILE("b3ParallelLinearBvh::build()"); - - int numLargeAabbs = largeAabbIndices.size(); - int numSmallAabbs = smallAabbIndices.size(); - - //Since all AABBs(both large and small) are input as a contiguous array, - //with 2 additional arrays used to indicate the indices of large and small AABBs, - //it is necessary to separate the AABBs so that the large AABBs will not degrade the quality of the BVH. - { - B3_PROFILE("Separate large and small AABBs"); - - m_largeAabbs.resize(numLargeAabbs); - m_leafNodeAabbs.resize(numSmallAabbs); - - //Write large AABBs into m_largeAabbs - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(worldSpaceAabbs.getBufferCL()), - b3BufferInfoCL(largeAabbIndices.getBufferCL()), - - b3BufferInfoCL(m_largeAabbs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbs); - - launcher.launch1D(numLargeAabbs); - } - - //Write small AABBs into m_leafNodeAabbs - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(worldSpaceAabbs.getBufferCL()), - b3BufferInfoCL(smallAabbIndices.getBufferCL()), - - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_separateAabbsKernel, "m_separateAabbsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - - launcher.launch1D(numSmallAabbs); - } - - clFinish(m_queue); - } - - // - int numLeaves = numSmallAabbs; //Number of leaves in the BVH == Number of rigid bodies with small AABBs - int numInternalNodes = numLeaves - 1; - - if (numLeaves < 2) - { - //Number of leaf nodes is checked in calculateOverlappingPairs() and testRaysAgainstBvhAabbs(), - //so it does not matter if numLeaves == 0 and rootNodeIndex == -1 - int rootNodeIndex = numLeaves - 1; - m_rootNodeIndex.copyFromHostPointer(&rootNodeIndex, 1); - - //Since the AABBs need to be rearranged(sorted) for the BVH construction algorithm, - //m_mortonCodesAndAabbIndicies.m_value is used to map a sorted AABB index to the unsorted AABB index - //instead of directly moving the AABBs. It needs to be set for the ray cast traversal kernel to work. - //( m_mortonCodesAndAabbIndicies[].m_value == unsorted index == index of m_leafNodeAabbs ) - if (numLeaves == 1) - { - b3SortData leaf; - leaf.m_value = 0; //1 leaf so index is always 0; leaf.m_key does not need to be set - - m_mortonCodesAndAabbIndicies.resize(1); - m_mortonCodesAndAabbIndicies.copyFromHostPointer(&leaf, 1); - } - - return; - } - - // - { - m_internalNodeAabbs.resize(numInternalNodes); - m_internalNodeLeafIndexRanges.resize(numInternalNodes); - m_internalNodeChildNodes.resize(numInternalNodes); - m_internalNodeParentNodes.resize(numInternalNodes); - - m_commonPrefixes.resize(numInternalNodes); - m_commonPrefixLengths.resize(numInternalNodes); - m_distanceFromRoot.resize(numInternalNodes); - - m_leafNodeParentNodes.resize(numLeaves); - m_mortonCodesAndAabbIndicies.resize(numLeaves); - m_mergedAabb.resize(numLeaves); - } - - //Find the merged AABB of all small AABBs; this is used to define the size of - //each cell in the virtual grid for the next kernel(2^10 cells in each dimension). - { - B3_PROFILE("Find AABB of merged nodes"); - - m_mergedAabb.copyFromOpenCLArray(m_leafNodeAabbs); //Need to make a copy since the kernel modifies the array - - for (int numAabbsNeedingMerge = numLeaves; numAabbsNeedingMerge >= 2; - numAabbsNeedingMerge = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_mergedAabb.getBufferCL()) //Resulting AABB is stored in m_mergedAabb[0] - }; - - b3LauncherCL launcher(m_queue, m_findAllNodesMergedAabbKernel, "m_findAllNodesMergedAabbKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numAabbsNeedingMerge); - - launcher.launch1D(numAabbsNeedingMerge); - } - - clFinish(m_queue); - } - - //Insert the center of the AABBs into a virtual grid, - //then convert the discrete grid coordinates into a morton code - //For each element in m_mortonCodesAndAabbIndicies, set - // m_key == morton code (value to sort by) - // m_value == small AABB index - { - B3_PROFILE("Assign morton codes"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_mergedAabb.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_assignMortonCodesAndAabbIndiciesKernel, "m_assignMortonCodesAndAabbIndiciesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } - - // - { - B3_PROFILE("Sort leaves by morton codes"); - - m_radixSorter.execute(m_mortonCodesAndAabbIndicies); - clFinish(m_queue); - } - - // - constructBinaryRadixTree(); - - //Since it is a sorted binary radix tree, each internal node contains a contiguous subset of leaf node indices. - //The root node contains leaf node indices in the range [0, numLeafNodes - 1]. - //The child nodes of each node split their parent's index range into 2 contiguous halves. - // - //For example, if the root has indices [0, 31], its children might partition that range into [0, 11] and [12, 31]. - //The next level in the tree could then split those ranges into [0, 2], [3, 11], [12, 22], and [23, 31]. - // - //This property can be used for optimizing calculateOverlappingPairs(), to avoid testing each AABB pair twice - { - B3_PROFILE("m_findLeafIndexRangesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findLeafIndexRangesKernel, "m_findLeafIndexRangesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } -} - -void b3GpuParallelLinearBvh::calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs) -{ - int maxPairs = out_overlappingPairs.size(); - b3OpenCLArray<int>& numPairsGpu = m_temp; - - int reset = 0; - numPairsGpu.copyFromHostPointer(&reset, 1); - - // - if (m_leafNodeAabbs.size() > 1) - { - B3_PROFILE("PLBVH small-small AABB test"); - - int numQueryAabbs = m_leafNodeAabbs.size(); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - - b3BufferInfoCL(m_rootNodeIndex.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - - b3BufferInfoCL(numPairsGpu.getBufferCL()), - b3BufferInfoCL(out_overlappingPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhCalculateOverlappingPairsKernel, "m_plbvhCalculateOverlappingPairsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxPairs); - launcher.setConst(numQueryAabbs); - - launcher.launch1D(numQueryAabbs); - clFinish(m_queue); - } - - int numLargeAabbRigids = m_largeAabbs.size(); - if (numLargeAabbRigids > 0 && m_leafNodeAabbs.size() > 0) - { - B3_PROFILE("PLBVH large-small AABB test"); - - int numQueryAabbs = m_leafNodeAabbs.size(); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_largeAabbs.getBufferCL()), - - b3BufferInfoCL(numPairsGpu.getBufferCL()), - b3BufferInfoCL(out_overlappingPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhLargeAabbAabbTestKernel, "m_plbvhLargeAabbAabbTestKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxPairs); - launcher.setConst(numLargeAabbRigids); - launcher.setConst(numQueryAabbs); - - launcher.launch1D(numQueryAabbs); - clFinish(m_queue); - } - - // - int numPairs = -1; - numPairsGpu.copyToHostPointer(&numPairs, 1); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - numPairsGpu.copyFromHostPointer(&maxPairs, 1); - } - - out_overlappingPairs.resize(numPairs); -} - -void b3GpuParallelLinearBvh::testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays, - b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs) -{ - B3_PROFILE("PLBVH testRaysAgainstBvhAabbs()"); - - int numRays = rays.size(); - int maxRayRigidPairs = out_rayRigidPairs.size(); - - int reset = 0; - out_numRayRigidPairs.copyFromHostPointer(&reset, 1); - - // - if (m_leafNodeAabbs.size() > 0) - { - B3_PROFILE("PLBVH ray test small AABB"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - - b3BufferInfoCL(m_rootNodeIndex.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_internalNodeLeafIndexRanges.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - - b3BufferInfoCL(rays.getBufferCL()), - - b3BufferInfoCL(out_numRayRigidPairs.getBufferCL()), - b3BufferInfoCL(out_rayRigidPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhRayTraverseKernel, "m_plbvhRayTraverseKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); - } - - int numLargeAabbRigids = m_largeAabbs.size(); - if (numLargeAabbRigids > 0) - { - B3_PROFILE("PLBVH ray test large AABB"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_largeAabbs.getBufferCL()), - b3BufferInfoCL(rays.getBufferCL()), - - b3BufferInfoCL(out_numRayRigidPairs.getBufferCL()), - b3BufferInfoCL(out_rayRigidPairs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_plbvhLargeAabbRayTestKernel, "m_plbvhLargeAabbRayTestKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbRigids); - launcher.setConst(maxRayRigidPairs); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_queue); - } - - // - int numRayRigidPairs = -1; - out_numRayRigidPairs.copyToHostPointer(&numRayRigidPairs, 1); - - if (numRayRigidPairs > maxRayRigidPairs) - b3Error("Error running out of rayRigid pairs: numRayRigidPairs = %d, maxRayRigidPairs = %d.\n", numRayRigidPairs, maxRayRigidPairs); -} - -void b3GpuParallelLinearBvh::constructBinaryRadixTree() -{ - B3_PROFILE("b3GpuParallelLinearBvh::constructBinaryRadixTree()"); - - int numLeaves = m_leafNodeAabbs.size(); - int numInternalNodes = numLeaves - 1; - - //Each internal node is placed in between 2 leaf nodes. - //By using this arrangement and computing the common prefix between - //these 2 adjacent leaf nodes, it is possible to quickly construct a binary radix tree. - { - B3_PROFILE("m_computeAdjacentPairCommonPrefixKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - b3BufferInfoCL(m_commonPrefixes.getBufferCL()), - b3BufferInfoCL(m_commonPrefixLengths.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_computeAdjacentPairCommonPrefixKernel, "m_computeAdjacentPairCommonPrefixKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //For each leaf node, select its parent node by - //comparing the 2 nearest internal nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeLeafNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_commonPrefixLengths.getBufferCL()), - b3BufferInfoCL(m_leafNodeParentNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeLeafNodesKernel, "m_buildBinaryRadixTreeLeafNodesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLeaves); - - launcher.launch1D(numLeaves); - clFinish(m_queue); - } - - //For each internal node, perform 2 binary searches among the other internal nodes - //to its left and right to find its potential parent nodes and assign child node indices - { - B3_PROFILE("m_buildBinaryRadixTreeInternalNodesKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_commonPrefixes.getBufferCL()), - b3BufferInfoCL(m_commonPrefixLengths.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_internalNodeParentNodes.getBufferCL()), - b3BufferInfoCL(m_rootNodeIndex.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeInternalNodesKernel, "m_buildBinaryRadixTreeInternalNodesKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Find the number of nodes separating each internal node and the root node - //so that the AABBs can be set using the next kernel. - //Also determine the maximum number of nodes separating an internal node and the root node. - { - B3_PROFILE("m_findDistanceFromRootKernel"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_rootNodeIndex.getBufferCL()), - b3BufferInfoCL(m_internalNodeParentNodes.getBufferCL()), - b3BufferInfoCL(m_maxDistanceFromRoot.getBufferCL()), - b3BufferInfoCL(m_distanceFromRoot.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findDistanceFromRootKernel, "m_findDistanceFromRootKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numInternalNodes); - - launcher.launch1D(numInternalNodes); - clFinish(m_queue); - } - - //Starting from the internal nodes nearest to the leaf nodes, recursively move up - //the tree towards the root to set the AABBs of each internal node; each internal node - //checks its children and merges their AABBs - { - B3_PROFILE("m_buildBinaryRadixTreeAabbsRecursiveKernel"); - - int maxDistanceFromRoot = -1; - { - B3_PROFILE("copy maxDistanceFromRoot to CPU"); - m_maxDistanceFromRoot.copyToHostPointer(&maxDistanceFromRoot, 1); - clFinish(m_queue); - } - - for (int distanceFromRoot = maxDistanceFromRoot; distanceFromRoot >= 0; --distanceFromRoot) - { - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_distanceFromRoot.getBufferCL()), - b3BufferInfoCL(m_mortonCodesAndAabbIndicies.getBufferCL()), - b3BufferInfoCL(m_internalNodeChildNodes.getBufferCL()), - b3BufferInfoCL(m_leafNodeAabbs.getBufferCL()), - b3BufferInfoCL(m_internalNodeAabbs.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_buildBinaryRadixTreeAabbsRecursiveKernel, "m_buildBinaryRadixTreeAabbsRecursiveKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(maxDistanceFromRoot); - launcher.setConst(distanceFromRoot); - launcher.setConst(numInternalNodes); - - //It may seem inefficent to launch a thread for each internal node when a - //much smaller number of nodes is actually processed, but this is actually - //faster than determining the exact nodes that are ready to merge their child AABBs. - launcher.launch1D(numInternalNodes); - } - - clFinish(m_queue); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h deleted file mode 100644 index b390775129..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h +++ /dev/null @@ -1,125 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#ifndef B3_GPU_PARALLEL_LINEAR_BVH_H -#define B3_GPU_PARALLEL_LINEAR_BVH_H - -//#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" - -#include "Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h" - -#define b3Int64 cl_long - -///@brief GPU Parallel Linearized Bounding Volume Heirarchy(LBVH) that is reconstructed every frame -///@remarks -///See presentation in docs/b3GpuParallelLinearBvh.pdf for algorithm details. -///@par -///Related papers: \n -///"Fast BVH Construction on GPUs" [Lauterbach et al. 2009] \n -///"Maximizing Parallelism in the Construction of BVHs, Octrees, and k-d trees" [Karras 2012] \n -///@par -///The basic algorithm for building the BVH as presented in [Lauterbach et al. 2009] consists of 4 stages: -/// - [fully parallel] Assign morton codes for each AABB using its center (after quantizing the AABB centers into a virtual grid) -/// - [fully parallel] Sort morton codes -/// - [somewhat parallel] Build binary radix tree (assign parent/child pointers for internal nodes of the BVH) -/// - [somewhat parallel] Set internal node AABBs -///@par -///[Karras 2012] improves on the algorithm by introducing fully parallel methods for the last 2 stages. -///The BVH implementation here shares many concepts with [Karras 2012], but a different method is used for constructing the tree. -///Instead of searching for the child nodes of each internal node, we search for the parent node of each node. -///Additionally, a non-atomic traversal that starts from the leaf nodes and moves towards the root node is used to set the AABBs. -class b3GpuParallelLinearBvh -{ - cl_command_queue m_queue; - - cl_program m_parallelLinearBvhProgram; - - cl_kernel m_separateAabbsKernel; - cl_kernel m_findAllNodesMergedAabbKernel; - cl_kernel m_assignMortonCodesAndAabbIndiciesKernel; - - //Binary radix tree construction kernels - cl_kernel m_computeAdjacentPairCommonPrefixKernel; - cl_kernel m_buildBinaryRadixTreeLeafNodesKernel; - cl_kernel m_buildBinaryRadixTreeInternalNodesKernel; - cl_kernel m_findDistanceFromRootKernel; - cl_kernel m_buildBinaryRadixTreeAabbsRecursiveKernel; - - cl_kernel m_findLeafIndexRangesKernel; - - //Traversal kernels - cl_kernel m_plbvhCalculateOverlappingPairsKernel; - cl_kernel m_plbvhRayTraverseKernel; - cl_kernel m_plbvhLargeAabbAabbTestKernel; - cl_kernel m_plbvhLargeAabbRayTestKernel; - - b3RadixSort32CL m_radixSorter; - - //1 element - b3OpenCLArray<int> m_rootNodeIndex; //Most significant bit(0x80000000) is set to indicate internal node - b3OpenCLArray<int> m_maxDistanceFromRoot; //Max number of internal nodes between an internal node and the root node - b3OpenCLArray<int> m_temp; //Used to hold the number of pairs in calculateOverlappingPairs() - - //1 element per internal node (number_of_internal_nodes == number_of_leaves - 1) - b3OpenCLArray<b3SapAabb> m_internalNodeAabbs; - b3OpenCLArray<b3Int2> m_internalNodeLeafIndexRanges; //x == min leaf index, y == max leaf index - b3OpenCLArray<b3Int2> m_internalNodeChildNodes; //x == left child, y == right child; msb(0x80000000) is set to indicate internal node - b3OpenCLArray<int> m_internalNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal - - //1 element per internal node; for binary radix tree construction - b3OpenCLArray<b3Int64> m_commonPrefixes; - b3OpenCLArray<int> m_commonPrefixLengths; - b3OpenCLArray<int> m_distanceFromRoot; //Number of internal nodes between this node and the root - - //1 element per leaf node (leaf nodes only include small AABBs) - b3OpenCLArray<int> m_leafNodeParentNodes; //For parent node index, msb(0x80000000) is not set since it is always internal - b3OpenCLArray<b3SortData> m_mortonCodesAndAabbIndicies; //m_key == morton code, m_value == aabb index in m_leafNodeAabbs - b3OpenCLArray<b3SapAabb> m_mergedAabb; //m_mergedAabb[0] contains the merged AABB of all leaf nodes - b3OpenCLArray<b3SapAabb> m_leafNodeAabbs; //Contains only small AABBs - - //1 element per large AABB, which is not stored in the BVH - b3OpenCLArray<b3SapAabb> m_largeAabbs; - -public: - b3GpuParallelLinearBvh(cl_context context, cl_device_id device, cl_command_queue queue); - virtual ~b3GpuParallelLinearBvh(); - - ///Must be called before any other function - void build(const b3OpenCLArray<b3SapAabb>& worldSpaceAabbs, const b3OpenCLArray<int>& smallAabbIndices, - const b3OpenCLArray<int>& largeAabbIndices); - - ///calculateOverlappingPairs() uses the worldSpaceAabbs parameter of b3GpuParallelLinearBvh::build() as the query AABBs. - ///@param out_overlappingPairs The size() of this array is used to determine the max number of pairs. - ///If the number of overlapping pairs is < out_overlappingPairs.size(), out_overlappingPairs is resized. - void calculateOverlappingPairs(b3OpenCLArray<b3Int4>& out_overlappingPairs); - - ///@param out_numRigidRayPairs Array of length 1; contains the number of detected ray-rigid AABB intersections; - ///this value may be greater than out_rayRigidPairs.size() if out_rayRigidPairs is not large enough. - ///@param out_rayRigidPairs Contains an array of rays intersecting rigid AABBs; x == ray index, y == rigid body index. - ///If the size of this array is insufficient to hold all ray-rigid AABB intersections, additional intersections are discarded. - void testRaysAgainstBvhAabbs(const b3OpenCLArray<b3RayInfo>& rays, - b3OpenCLArray<int>& out_numRayRigidPairs, b3OpenCLArray<b3Int2>& out_rayRigidPairs); - -private: - void constructBinaryRadixTree(); -}; - -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp deleted file mode 100644 index 62ea7a32df..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.cpp +++ /dev/null @@ -1,76 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#include "b3GpuParallelLinearBvhBroadphase.h" - -b3GpuParallelLinearBvhBroadphase::b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue) : m_plbvh(context, device, queue), - - m_overlappingPairsGpu(context, queue), - - m_aabbsGpu(context, queue), - m_smallAabbsMappingGpu(context, queue), - m_largeAabbsMappingGpu(context, queue) -{ -} - -void b3GpuParallelLinearBvhBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int newAabbIndex = m_aabbsCpu.size(); - - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = newAabbIndex; - - m_smallAabbsMappingCpu.push_back(newAabbIndex); - - m_aabbsCpu.push_back(aabb); -} -void b3GpuParallelLinearBvhBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int newAabbIndex = m_aabbsCpu.size(); - - b3SapAabb aabb; - aabb.m_minVec = aabbMin; - aabb.m_maxVec = aabbMax; - - aabb.m_minIndices[3] = userPtr; - aabb.m_signedMaxIndices[3] = newAabbIndex; - - m_largeAabbsMappingCpu.push_back(newAabbIndex); - - m_aabbsCpu.push_back(aabb); -} - -void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairs(int maxPairs) -{ - //Reconstruct BVH - m_plbvh.build(m_aabbsGpu, m_smallAabbsMappingGpu, m_largeAabbsMappingGpu); - - // - m_overlappingPairsGpu.resize(maxPairs); - m_plbvh.calculateOverlappingPairs(m_overlappingPairsGpu); -} -void b3GpuParallelLinearBvhBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - b3Assert(0); //CPU version not implemented -} - -void b3GpuParallelLinearBvhBroadphase::writeAabbsToGpu() -{ - m_aabbsGpu.copyFromHost(m_aabbsCpu); - m_smallAabbsMappingGpu.copyFromHost(m_smallAabbsMappingCpu); - m_largeAabbsMappingGpu.copyFromHost(m_largeAabbsMappingCpu); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h deleted file mode 100644 index dda0eea7be..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvhBroadphase.h +++ /dev/null @@ -1,66 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -#ifndef B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H -#define B3_GPU_PARALLEL_LINEAR_BVH_BROADPHASE_H - -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" - -#include "b3GpuParallelLinearBvh.h" - -class b3GpuParallelLinearBvhBroadphase : public b3GpuBroadphaseInterface -{ - b3GpuParallelLinearBvh m_plbvh; - - b3OpenCLArray<b3Int4> m_overlappingPairsGpu; - - b3OpenCLArray<b3SapAabb> m_aabbsGpu; - b3OpenCLArray<int> m_smallAabbsMappingGpu; - b3OpenCLArray<int> m_largeAabbsMappingGpu; - - b3AlignedObjectArray<b3SapAabb> m_aabbsCpu; - b3AlignedObjectArray<int> m_smallAabbsMappingCpu; - b3AlignedObjectArray<int> m_largeAabbsMappingCpu; - -public: - b3GpuParallelLinearBvhBroadphase(cl_context context, cl_device_id device, cl_command_queue queue); - virtual ~b3GpuParallelLinearBvhBroadphase() {} - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual int getNumOverlap() { return m_overlappingPairsGpu.size(); } - virtual cl_mem getOverlappingPairBuffer() { return m_overlappingPairsGpu.getBufferCL(); } - - virtual cl_mem getAabbBufferWS() { return m_aabbsGpu.getBufferCL(); } - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() { return m_aabbsGpu; } - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU() { return m_overlappingPairsGpu; } - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU() { return m_smallAabbsMappingGpu; } - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU() { return m_largeAabbsMappingGpu; } - - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() { return m_aabbsCpu; } - - static b3GpuBroadphaseInterface* CreateFunc(cl_context context, cl_device_id device, cl_command_queue queue) - { - return new b3GpuParallelLinearBvhBroadphase(context, device, queue); - } -}; - -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp deleted file mode 100644 index 4126d03ed0..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp +++ /dev/null @@ -1,1298 +0,0 @@ - -bool searchIncremental3dSapOnGpu = true; -#include <limits.h> -#include "b3GpuSapBroadphase.h" -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/sapKernels.h" - -#include "Bullet3Common/b3MinMax.h" - -#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" - -/* - - - - - - - b3OpenCLArray<int> m_pairCount; - - - b3OpenCLArray<b3SapAabb> m_allAabbsGPU; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() - { - return m_allAabbsGPU; - } - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() - { - return m_allAabbsCPU; - } - - b3OpenCLArray<b3Vector3> m_sum; - b3OpenCLArray<b3Vector3> m_sum2; - b3OpenCLArray<b3Vector3> m_dst; - - b3OpenCLArray<int> m_smallAabbsMappingGPU; - b3AlignedObjectArray<int> m_smallAabbsMappingCPU; - - b3OpenCLArray<int> m_largeAabbsMappingGPU; - b3AlignedObjectArray<int> m_largeAabbsMappingCPU; - - - b3OpenCLArray<b3Int4> m_overlappingPairs; - - //temporary gpu work memory - b3OpenCLArray<b3SortData> m_gpuSmallSortData; - b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs; - - class b3PrefixScanFloat4CL* m_prefixScanFloat4; - */ - -b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType) - : m_context(ctx), - m_device(device), - m_queue(q), - - m_objectMinMaxIndexGPUaxis0(ctx, q), - m_objectMinMaxIndexGPUaxis1(ctx, q), - m_objectMinMaxIndexGPUaxis2(ctx, q), - m_objectMinMaxIndexGPUaxis0prev(ctx, q), - m_objectMinMaxIndexGPUaxis1prev(ctx, q), - m_objectMinMaxIndexGPUaxis2prev(ctx, q), - m_sortedAxisGPU0(ctx, q), - m_sortedAxisGPU1(ctx, q), - m_sortedAxisGPU2(ctx, q), - m_sortedAxisGPU0prev(ctx, q), - m_sortedAxisGPU1prev(ctx, q), - m_sortedAxisGPU2prev(ctx, q), - m_addedHostPairsGPU(ctx, q), - m_removedHostPairsGPU(ctx, q), - m_addedCountGPU(ctx, q), - m_removedCountGPU(ctx, q), - m_currentBuffer(-1), - m_pairCount(ctx, q), - m_allAabbsGPU(ctx, q), - m_sum(ctx, q), - m_sum2(ctx, q), - m_dst(ctx, q), - m_smallAabbsMappingGPU(ctx, q), - m_largeAabbsMappingGPU(ctx, q), - m_overlappingPairs(ctx, q), - m_gpuSmallSortData(ctx, q), - m_gpuSmallSortedAabbs(ctx, q) -{ - const char* sapSrc = sapCL; - - cl_int errNum = 0; - - b3Assert(m_context); - b3Assert(m_device); - cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH); - b3Assert(errNum == CL_SUCCESS); - - b3Assert(errNum == CL_SUCCESS); -#ifndef __APPLE__ - m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context, m_device, m_queue); -#else - m_prefixScanFloat4 = 0; -#endif - m_sapKernel = 0; - - switch (kernelType) - { - case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU: - { - m_sapKernel = 0; - break; - } - case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBruteForce", &errNum, sapProg); - break; - } - - case B3_GPU_SAP_KERNEL_ORIGINAL: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelOriginal", &errNum, sapProg); - break; - } - case B3_GPU_SAP_KERNEL_BARRIER: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBarrier", &errNum, sapProg); - break; - } - case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg); - break; - } - - default: - { - m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg); - b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory"); - } - }; - - m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg); - b3Assert(errNum == CL_SUCCESS); - - m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "prepareSumVarianceKernel", &errNum, sapProg); - b3Assert(errNum == CL_SUCCESS); - - m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "flipFloatKernel", &errNum, sapProg); - - m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg); - - m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "scatterKernel", &errNum, sapProg); - - m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue); -} - -b3GpuSapBroadphase::~b3GpuSapBroadphase() -{ - delete m_sorter; - delete m_prefixScanFloat4; - - clReleaseKernel(m_scatterKernel); - clReleaseKernel(m_flipFloatKernel); - clReleaseKernel(m_copyAabbsKernel); - clReleaseKernel(m_sapKernel); - clReleaseKernel(m_sap2Kernel); - clReleaseKernel(m_prepareSumVarianceKernel); -} - -/// conservative test for overlap between two aabbs -static bool TestAabbAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1, - const b3Vector3& aabbMin2, const b3Vector3& aabbMax2) -{ - bool overlap = true; - overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap; - overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap; - overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap; - return overlap; -} - -//http://stereopsis.com/radix.html -static unsigned int FloatFlip(float fl) -{ - unsigned int f = *(unsigned int*)&fl; - unsigned int mask = -(int)(f >> 31) | 0x80000000; - return f ^ mask; -}; - -void b3GpuSapBroadphase::init3dSap() -{ - if (m_currentBuffer < 0) - { - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - - m_currentBuffer = 0; - for (int axis = 0; axis < 3; axis++) - { - for (int buf = 0; buf < 2; buf++) - { - int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = 2 * totalNumAabbs; - m_sortedAxisCPU[axis][buf].resize(numEndPoints); - - if (buf == m_currentBuffer) - { - for (int i = 0; i < totalNumAabbs; i++) - { - m_sortedAxisCPU[axis][buf][i * 2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis]) - 1; - m_sortedAxisCPU[axis][buf][i * 2].m_value = i * 2; - m_sortedAxisCPU[axis][buf][i * 2 + 1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis]) + 1; - m_sortedAxisCPU[axis][buf][i * 2 + 1].m_value = i * 2 + 1; - } - } - } - } - - for (int axis = 0; axis < 3; axis++) - { - m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); - } - - for (int axis = 0; axis < 3; axis++) - { - //int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); - m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints); - for (int i = 0; i < numEndPoints; i++) - { - int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - int newDest = destIndex / 2; - if (destIndex & 1) - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i; - } - else - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i; - } - } - } - } -} - -static bool b3PairCmp(const b3Int4& p, const b3Int4& q) -{ - return ((p.x < q.x) || ((p.x == q.x) && (p.y < q.y))); -} - -static bool operator==(const b3Int4& a, const b3Int4& b) -{ - return a.x == b.x && a.y == b.y; -}; - -static bool operator<(const b3Int4& a, const b3Int4& b) -{ - return a.x < b.x || (a.x == b.x && a.y < b.y); -}; - -static bool operator>(const b3Int4& a, const b3Int4& b) -{ - return a.x > b.x || (a.x == b.x && a.y > b.y); -}; - -b3AlignedObjectArray<b3Int4> addedHostPairs; -b3AlignedObjectArray<b3Int4> removedHostPairs; - -b3AlignedObjectArray<b3SapAabb> preAabbs; - -void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() -{ - //static int framepje = 0; - //printf("framepje=%d\n",framepje++); - - B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap"); - - addedHostPairs.resize(0); - removedHostPairs.resize(0); - - b3Assert(m_currentBuffer >= 0); - - { - preAabbs.resize(m_allAabbsCPU.size()); - for (int i = 0; i < preAabbs.size(); i++) - { - preAabbs[i] = m_allAabbsCPU[i]; - } - } - - if (m_currentBuffer < 0) - return; - { - B3_PROFILE("m_allAabbsGPU.copyToHost"); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - } - - b3AlignedObjectArray<b3Int4> allPairs; - { - B3_PROFILE("m_overlappingPairs.copyToHost"); - m_overlappingPairs.copyToHost(allPairs); - } - if (0) - { - { - printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n", - m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1], m_allAabbsCPU[40].m_min[2], - m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1], m_allAabbsCPU[40].m_max[2]); - } - - { - printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n", - m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1], m_allAabbsCPU[53].m_min[2], - m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1], m_allAabbsCPU[53].m_max[2]); - } - - { - b3Int4 newPair; - newPair.x = 40; - newPair.y = 53; - int index = allPairs.findBinarySearch(newPair); - printf("hasPair(40,53)=%d out of %d\n", index, allPairs.size()); - - { - int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max, (const b3Vector3&)m_allAabbsCPU[53].m_min, (const b3Vector3&)m_allAabbsCPU[53].m_max); - printf("overlap=%d\n", overlap); - } - - if (preAabbs.size()) - { - int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max, (const b3Vector3&)preAabbs[53].m_min, (const b3Vector3&)preAabbs[53].m_max); - printf("prevoverlap=%d\n", prevOverlap); - } - else - { - printf("unknown prevoverlap\n"); - } - } - } - - if (0) - { - for (int i = 0; i < m_allAabbsCPU.size(); i++) - { - //printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]); - } - - for (int axis = 0; axis < 3; axis++) - { - for (int buf = 0; buf < 2; buf++) - { - b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size() * 2); - } - } - } - - m_currentBuffer = 1 - m_currentBuffer; - - int totalNumAabbs = m_allAabbsCPU.size(); - - { - B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)"); - for (int i = 0; i < totalNumAabbs; i++) - { - unsigned int keyMin[3]; - unsigned int keyMax[3]; - for (int axis = 0; axis < 3; axis++) - { - float vmin = m_allAabbsCPU[i].m_min[axis]; - float vmax = m_allAabbsCPU[i].m_max[axis]; - keyMin[axis] = FloatFlip(vmin); - keyMax[axis] = FloatFlip(vmax); - - m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_key = keyMin[axis] - 1; - m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_value = i * 2; - m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_key = keyMax[axis] + 1; - m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_value = i * 2 + 1; - } - //printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]); - } - } - - { - B3_PROFILE("sort m_sortedAxisCPU"); - for (int axis = 0; axis < 3; axis++) - m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); - } - -#if 0 - if (0) - { - for (int axis=0;axis<3;axis++) - { - //printf("axis %d\n",axis); - for (int i=0;i<m_sortedAxisCPU[axis][m_currentBuffer].size();i++) - { - //int key = m_sortedAxisCPU[axis][m_currentBuffer][i].m_key; - //int value = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - //printf("[%d]=%d\n",i,value); - } - - } - } -#endif - - { - B3_PROFILE("assign m_objectMinMaxIndexCPU"); - for (int axis = 0; axis < 3; axis++) - { - int totalNumAabbs = m_allAabbsCPU.size(); - int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); - m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(totalNumAabbs); - for (int i = 0; i < numEndPoints; i++) - { - int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; - int newDest = destIndex / 2; - if (destIndex & 1) - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i; - } - else - { - m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i; - } - } - } - } - -#if 0 - if (0) - { - printf("==========================\n"); - for (int axis=0;axis<3;axis++) - { - unsigned int curMinIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].x; - unsigned int curMaxIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].y; - unsigned int prevMaxIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].y; - unsigned int prevMinIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].x; - - int dmin40 = curMinIndex40 - prevMinIndex40; - int dmax40 = curMinIndex40 - prevMinIndex40; - printf("axis %d curMinIndex40=%d prevMinIndex40=%d\n",axis,curMinIndex40, prevMinIndex40); - printf("axis %d curMaxIndex40=%d prevMaxIndex40=%d\n",axis,curMaxIndex40, prevMaxIndex40); - } - printf(".........................\n"); - for (int axis=0;axis<3;axis++) - { - unsigned int curMinIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].x; - unsigned int curMaxIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].y; - unsigned int prevMaxIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].y; - unsigned int prevMinIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].x; - - int dmin40 = curMinIndex53 - prevMinIndex53; - int dmax40 = curMinIndex53 - prevMinIndex53; - printf("axis %d curMinIndex53=%d prevMinIndex53=%d\n",axis,curMinIndex53, prevMinIndex53); - printf("axis %d curMaxIndex53=%d prevMaxIndex53=%d\n",axis,curMaxIndex53, prevMaxIndex53); - } - - } -#endif - - int a = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); - int b = m_objectMinMaxIndexCPU[1][m_currentBuffer].size(); - int c = m_objectMinMaxIndexCPU[2][m_currentBuffer].size(); - b3Assert(a == b); - b3Assert(b == c); - /* - if (searchIncremental3dSapOnGpu) - { - B3_PROFILE("computePairsIncremental3dSapKernelGPU"); - int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); - int maxCapacity = 1024*1024; - { - B3_PROFILE("copy from host"); - m_objectMinMaxIndexGPUaxis0.copyFromHost(m_objectMinMaxIndexCPU[0][m_currentBuffer]); - m_objectMinMaxIndexGPUaxis1.copyFromHost(m_objectMinMaxIndexCPU[1][m_currentBuffer]); - m_objectMinMaxIndexGPUaxis2.copyFromHost(m_objectMinMaxIndexCPU[2][m_currentBuffer]); - m_objectMinMaxIndexGPUaxis0prev.copyFromHost(m_objectMinMaxIndexCPU[0][1-m_currentBuffer]); - m_objectMinMaxIndexGPUaxis1prev.copyFromHost(m_objectMinMaxIndexCPU[1][1-m_currentBuffer]); - m_objectMinMaxIndexGPUaxis2prev.copyFromHost(m_objectMinMaxIndexCPU[2][1-m_currentBuffer]); - - m_sortedAxisGPU0.copyFromHost(m_sortedAxisCPU[0][m_currentBuffer]); - m_sortedAxisGPU1.copyFromHost(m_sortedAxisCPU[1][m_currentBuffer]); - m_sortedAxisGPU2.copyFromHost(m_sortedAxisCPU[2][m_currentBuffer]); - m_sortedAxisGPU0prev.copyFromHost(m_sortedAxisCPU[0][1-m_currentBuffer]); - m_sortedAxisGPU1prev.copyFromHost(m_sortedAxisCPU[1][1-m_currentBuffer]); - m_sortedAxisGPU2prev.copyFromHost(m_sortedAxisCPU[2][1-m_currentBuffer]); - - - m_addedHostPairsGPU.resize(maxCapacity); - m_removedHostPairsGPU.resize(maxCapacity); - - m_addedCountGPU.resize(0); - m_addedCountGPU.push_back(0); - m_removedCountGPU.resize(0); - m_removedCountGPU.push_back(0); - } - - { - B3_PROFILE("launch1D"); - b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel,"m_computePairsIncremental3dSapKernel"); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis0prev.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis1prev.getBufferCL()); - launcher.setBuffer(m_objectMinMaxIndexGPUaxis2prev.getBufferCL()); - - launcher.setBuffer(m_sortedAxisGPU0.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU1.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU2.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU0prev.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU1prev.getBufferCL()); - launcher.setBuffer(m_sortedAxisGPU2prev.getBufferCL()); - - - launcher.setBuffer(m_addedHostPairsGPU.getBufferCL()); - launcher.setBuffer(m_removedHostPairsGPU.getBufferCL()); - launcher.setBuffer(m_addedCountGPU.getBufferCL()); - launcher.setBuffer(m_removedCountGPU.getBufferCL()); - launcher.setConst(maxCapacity); - launcher.setConst( numObjects); - launcher.launch1D( numObjects); - clFinish(m_queue); - } - - { - B3_PROFILE("copy to host"); - int addedCountGPU = m_addedCountGPU.at(0); - m_addedHostPairsGPU.resize(addedCountGPU); - m_addedHostPairsGPU.copyToHost(addedHostPairs); - - //printf("addedCountGPU=%d\n",addedCountGPU); - int removedCountGPU = m_removedCountGPU.at(0); - m_removedHostPairsGPU.resize(removedCountGPU); - m_removedHostPairsGPU.copyToHost(removedHostPairs); - //printf("removedCountGPU=%d\n",removedCountGPU); - - } - - - - } - else - */ - { - int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); - - B3_PROFILE("actual search"); - for (int i = 0; i < numObjects; i++) - { - //int numObjects = m_objectMinMaxIndexCPU[axis][m_currentBuffer].size(); - //int checkObjects[]={40,53}; - //int numCheckObjects = sizeof(checkObjects)/sizeof(int); - - //for (int a=0;a<numCheckObjects ;a++) - - for (int axis = 0; axis < 3; axis++) - { - //int i = checkObjects[a]; - - unsigned int curMinIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].x; - unsigned int curMaxIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].y; - unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1 - m_currentBuffer][i].x; - int dmin = curMinIndex - prevMinIndex; - - unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1 - m_currentBuffer][i].y; - - int dmax = curMaxIndex - prevMaxIndex; - if (dmin != 0) - { - //printf("for object %d, dmin=%d\n",i,dmin); - } - if (dmax != 0) - { - //printf("for object %d, dmax=%d\n",i,dmax); - } - for (int otherbuffer = 0; otherbuffer < 2; otherbuffer++) - { - if (dmin != 0) - { - int stepMin = dmin < 0 ? -1 : 1; - for (int j = prevMinIndex; j != curMinIndex; j += stepMin) - { - int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; - int otherIndex = otherIndex2 / 2; - if (otherIndex != i) - { - bool otherIsMax = ((otherIndex2 & 1) != 0); - - if (otherIsMax) - { - //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); - //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); - - bool overlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) - overlap = false; - } - - // b3Assert(overlap2==overlap); - - bool prevOverlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x)) - prevOverlap = false; - } - - //b3Assert(overlap==overlap2); - - if (dmin < 0) - { - if (overlap && !prevOverlap) - { - //add a pair - b3Int4 newPair; - if (i <= otherIndex) - { - newPair.x = i; - newPair.y = otherIndex; - } - else - { - newPair.x = otherIndex; - newPair.y = i; - } - addedHostPairs.push_back(newPair); - } - } - else - { - if (!overlap && prevOverlap) - { - //remove a pair - b3Int4 removedPair; - if (i <= otherIndex) - { - removedPair.x = i; - removedPair.y = otherIndex; - } - else - { - removedPair.x = otherIndex; - removedPair.y = i; - } - removedHostPairs.push_back(removedPair); - } - } //otherisMax - } //if (dmin<0) - } //if (otherIndex!=i) - } //for (int j= - } - - if (dmax != 0) - { - int stepMax = dmax < 0 ? -1 : 1; - for (int j = prevMaxIndex; j != curMaxIndex; j += stepMax) - { - int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; - int otherIndex = otherIndex2 / 2; - if (otherIndex != i) - { - //bool otherIsMin = ((otherIndex2&1)==0); - //if (otherIsMin) - { - //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); - //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); - - bool overlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) - overlap = false; - } - //b3Assert(overlap2==overlap); - - bool prevOverlap = true; - - for (int ax = 0; ax < 3; ax++) - { - if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) || - (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x)) - prevOverlap = false; - } - - if (dmax > 0) - { - if (overlap && !prevOverlap) - { - //add a pair - b3Int4 newPair; - if (i <= otherIndex) - { - newPair.x = i; - newPair.y = otherIndex; - } - else - { - newPair.x = otherIndex; - newPair.y = i; - } - addedHostPairs.push_back(newPair); - } - } - else - { - if (!overlap && prevOverlap) - { - //if (otherIndex2&1==0) -> min? - //remove a pair - b3Int4 removedPair; - if (i <= otherIndex) - { - removedPair.x = i; - removedPair.y = otherIndex; - } - else - { - removedPair.x = otherIndex; - removedPair.y = i; - } - removedHostPairs.push_back(removedPair); - } - } - - } //if (dmin<0) - } //if (otherIndex!=i) - } //for (int j= - } - } //for (int otherbuffer - } //for (int axis=0; - } //for (int i=0;i<numObjects - } - - //remove duplicates and add/remove then to existing m_overlappingPairs - - { - { - B3_PROFILE("sort allPairs"); - allPairs.quickSort(b3PairCmp); - } - { - B3_PROFILE("sort addedHostPairs"); - addedHostPairs.quickSort(b3PairCmp); - } - { - B3_PROFILE("sort removedHostPairs"); - removedHostPairs.quickSort(b3PairCmp); - } - } - - b3Int4 prevPair; - prevPair.x = -1; - prevPair.y = -1; - - int uniqueRemovedPairs = 0; - - b3AlignedObjectArray<int> removedPositions; - - { - B3_PROFILE("actual removing"); - for (int i = 0; i < removedHostPairs.size(); i++) - { - b3Int4 removedPair = removedHostPairs[i]; - if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y)) - { - int index1 = allPairs.findBinarySearch(removedPair); - - //#ifdef _DEBUG - - int index2 = allPairs.findLinearSearch(removedPair); - b3Assert(index1 == index2); - - //b3Assert(index1!=allPairs.size()); - if (index1 < allPairs.size()) - //#endif//_DEBUG - { - uniqueRemovedPairs++; - removedPositions.push_back(index1); - { - //printf("framepje(%d) remove pair(%d):%d,%d\n",framepje,i,removedPair.x,removedPair.y); - } - } - } - prevPair = removedPair; - } - - if (uniqueRemovedPairs) - { - for (int i = 0; i < removedPositions.size(); i++) - { - allPairs[removedPositions[i]].x = INT_MAX; - allPairs[removedPositions[i]].y = INT_MAX; - } - allPairs.quickSort(b3PairCmp); - allPairs.resize(allPairs.size() - uniqueRemovedPairs); - } - } - //if (uniqueRemovedPairs) - // printf("uniqueRemovedPairs=%d\n",uniqueRemovedPairs); - //printf("removedHostPairs.size = %d\n",removedHostPairs.size()); - - prevPair.x = -1; - prevPair.y = -1; - - int uniqueAddedPairs = 0; - b3AlignedObjectArray<b3Int4> actualAddedPairs; - - { - B3_PROFILE("actual adding"); - for (int i = 0; i < addedHostPairs.size(); i++) - { - b3Int4 newPair = addedHostPairs[i]; - if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y)) - { - //#ifdef _DEBUG - int index1 = allPairs.findBinarySearch(newPair); - - int index2 = allPairs.findLinearSearch(newPair); - b3Assert(index1 == index2); - - b3Assert(index1 == allPairs.size()); - if (index1 != allPairs.size()) - { - printf("??\n"); - } - - if (index1 == allPairs.size()) - //#endif //_DEBUG - { - uniqueAddedPairs++; - actualAddedPairs.push_back(newPair); - } - } - prevPair = newPair; - } - for (int i = 0; i < actualAddedPairs.size(); i++) - { - //printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y); - allPairs.push_back(actualAddedPairs[i]); - } - } - - //if (uniqueAddedPairs) - // printf("uniqueAddedPairs=%d\n", uniqueAddedPairs); - - { - B3_PROFILE("m_overlappingPairs.copyFromHost"); - m_overlappingPairs.copyFromHost(allPairs); - } -} - -void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) -{ - //test - // if (m_currentBuffer>=0) - // return calculateOverlappingPairsHostIncremental3Sap(); - - b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); - m_allAabbsGPU.copyToHost(m_allAabbsCPU); - - int axis = 0; - { - B3_PROFILE("CPU compute best variance axis"); - b3Vector3 s = b3MakeVector3(0, 0, 0), s2 = b3MakeVector3(0, 0, 0); - int numRigidBodies = m_smallAabbsMappingCPU.size(); - - for (int i = 0; i < numRigidBodies; i++) - { - b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - - b3Vector3 maxAabb = b3MakeVector3(aabb.m_max[0], aabb.m_max[1], aabb.m_max[2]); - b3Vector3 minAabb = b3MakeVector3(aabb.m_min[0], aabb.m_min[1], aabb.m_min[2]); - b3Vector3 centerAabb = (maxAabb + minAabb) * 0.5f; - - s += centerAabb; - s2 += centerAabb * centerAabb; - } - b3Vector3 v = s2 - (s * s) / (float)numRigidBodies; - - if (v[1] > v[0]) - axis = 1; - if (v[2] > v[axis]) - axis = 2; - } - - b3AlignedObjectArray<b3Int4> hostPairs; - - { - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - for (int i = 0; i < numSmallAabbs; i++) - { - b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - //float reference = smallAabbi.m_max[axis]; - - for (int j = i + 1; j < numSmallAabbs; j++) - { - b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]]; - - if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, - (b3Vector3&)smallAabbj.m_min, (b3Vector3&)smallAabbj.m_max)) - { - b3Int4 pair; - int a = smallAabbi.m_minIndices[3]; - int b = smallAabbj.m_minIndices[3]; - if (a <= b) - { - pair.x = a; //store the original index in the unsorted aabb array - pair.y = b; - } - else - { - pair.x = b; //store the original index in the unsorted aabb array - pair.y = a; - } - hostPairs.push_back(pair); - } - } - } - } - - { - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - for (int i = 0; i < numSmallAabbs; i++) - { - b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; - - //float reference = smallAabbi.m_max[axis]; - int numLargeAabbs = m_largeAabbsMappingCPU.size(); - - for (int j = 0; j < numLargeAabbs; j++) - { - b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]]; - if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, - (b3Vector3&)largeAabbj.m_min, (b3Vector3&)largeAabbj.m_max)) - { - b3Int4 pair; - int a = largeAabbj.m_minIndices[3]; - int b = smallAabbi.m_minIndices[3]; - if (a <= b) - { - pair.x = a; - pair.y = b; //store the original index in the unsorted aabb array - } - else - { - pair.x = b; - pair.y = a; //store the original index in the unsorted aabb array - } - - hostPairs.push_back(pair); - } - } - } - } - - if (hostPairs.size() > maxPairs) - { - hostPairs.resize(maxPairs); - } - - if (hostPairs.size()) - { - m_overlappingPairs.copyFromHost(hostPairs); - } - else - { - m_overlappingPairs.resize(0); - } - - //init3dSap(); -} - -void b3GpuSapBroadphase::reset() -{ - m_allAabbsGPU.resize(0); - m_allAabbsCPU.resize(0); - - m_smallAabbsMappingGPU.resize(0); - m_smallAabbsMappingCPU.resize(0); - - m_pairCount.resize(0); - - m_largeAabbsMappingGPU.resize(0); - m_largeAabbsMappingCPU.resize(0); -} - -void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) -{ - if (m_sapKernel == 0) - { - calculateOverlappingPairsHost(maxPairs); - return; - } - - //if (m_currentBuffer>=0) - // return calculateOverlappingPairsHostIncremental3Sap(); - - //calculateOverlappingPairsHost(maxPairs); - - B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs"); - - int axis = 0; - - { - //bool syncOnHost = false; - - int numSmallAabbs = m_smallAabbsMappingCPU.size(); - if (m_prefixScanFloat4 && numSmallAabbs) - { - B3_PROFILE("GPU compute best variance axis"); - - if (m_dst.size() != (numSmallAabbs + 1)) - { - m_dst.resize(numSmallAabbs + 128); - m_sum.resize(numSmallAabbs + 128); - m_sum2.resize(numSmallAabbs + 128); - m_sum.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow? - m_sum2.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow? - } - - b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel, "m_prepareSumVarianceKernel"); - launcher.setBuffer(m_allAabbsGPU.getBufferCL()); - - launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); - launcher.setBuffer(m_sum.getBufferCL()); - launcher.setBuffer(m_sum2.getBufferCL()); - launcher.setConst(numSmallAabbs); - int num = numSmallAabbs; - launcher.launch1D(num); - - b3Vector3 s; - b3Vector3 s2; - m_prefixScanFloat4->execute(m_sum, m_dst, numSmallAabbs + 1, &s); - m_prefixScanFloat4->execute(m_sum2, m_dst, numSmallAabbs + 1, &s2); - - b3Vector3 v = s2 - (s * s) / (float)numSmallAabbs; - - if (v[1] > v[0]) - axis = 1; - if (v[2] > v[axis]) - axis = 2; - } - - m_gpuSmallSortData.resize(numSmallAabbs); - -#if 1 - if (m_smallAabbsMappingGPU.size()) - { - B3_PROFILE("flipFloatKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true), - b3BufferInfoCL(m_gpuSmallSortData.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_flipFloatKernel, "m_flipFloatKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - launcher.setConst(axis); - - int num = numSmallAabbs; - launcher.launch1D(num); - clFinish(m_queue); - } - - if (m_gpuSmallSortData.size()) - { - B3_PROFILE("gpu radix sort"); - m_sorter->execute(m_gpuSmallSortData); - clFinish(m_queue); - } - - m_gpuSmallSortedAabbs.resize(numSmallAabbs); - if (numSmallAabbs) - { - B3_PROFILE("scatterKernel"); - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true), - b3BufferInfoCL(m_gpuSmallSortData.getBufferCL(), true), - b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_scatterKernel, "m_scatterKernel "); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - int num = numSmallAabbs; - launcher.launch1D(num); - clFinish(m_queue); - } - - m_overlappingPairs.resize(maxPairs); - - m_pairCount.resize(0); - m_pairCount.push_back(0); - int numPairs = 0; - - { - int numLargeAabbs = m_largeAabbsMappingGPU.size(); - if (numLargeAabbs && numSmallAabbs) - { - //@todo - B3_PROFILE("sap2Kernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_allAabbsGPU.getBufferCL()), - b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()), - b3BufferInfoCL(m_overlappingPairs.getBufferCL()), - b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numLargeAabbs); - launcher.setConst(numSmallAabbs); - launcher.setConst(axis); - launcher.setConst(maxPairs); - //@todo: use actual maximum work item sizes of the device instead of hardcoded values - launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64); - - numPairs = m_pairCount.at(0); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - } - } - } - if (m_gpuSmallSortedAabbs.size()) - { - B3_PROFILE("sapKernel"); - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL()), b3BufferInfoCL(m_overlappingPairs.getBufferCL()), b3BufferInfoCL(m_pairCount.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_sapKernel, "m_sapKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numSmallAabbs); - launcher.setConst(axis); - launcher.setConst(maxPairs); - - int num = numSmallAabbs; -#if 0 - int buffSize = launcher.getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; - for (int i=0;i<buffSize+1;i++) - { - unsigned char* ptr = (unsigned char*)&buf[i]; - *ptr = 0xff; - } - int actualWrite = launcher.serializeArguments(buf,buffSize); - - unsigned char* cptr = (unsigned char*)&buf[buffSize]; - // printf("buf[buffSize] = %d\n",*cptr); - - assert(buf[buffSize]==0xff);//check for buffer overrun - int* ptr = (int*)&buf[buffSize]; - - *ptr = num; - - FILE* f = fopen("m_sapKernelArgs.bin","wb"); - fwrite(buf,buffSize+sizeof(int),1,f); - fclose(f); -#endif // - - launcher.launch1D(num); - clFinish(m_queue); - - numPairs = m_pairCount.at(0); - if (numPairs > maxPairs) - { - b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); - numPairs = maxPairs; - m_pairCount.resize(0); - m_pairCount.push_back(maxPairs); - } - } - -#else - int numPairs = 0; - - b3LauncherCL launcher(m_queue, m_sapKernel); - - const char* fileName = "m_sapKernelArgs.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - launcher.launch1D(num); - - b3OpenCLArray<int> pairCount(m_context, m_queue); - int numElements = launcher.m_arrays[2]->size() / sizeof(int); - pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(), numElements); - numPairs = pairCount.at(0); - //printf("overlapping pairs = %d\n",numPairs); - b3AlignedObjectArray<b3Int4> hostOoverlappingPairs; - b3OpenCLArray<b3Int4> tmpGpuPairs(m_context, m_queue); - tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(), numPairs); - - tmpGpuPairs.copyToHost(hostOoverlappingPairs); - m_overlappingPairs.copyFromHost(hostOoverlappingPairs); - //printf("hello %d\n", m_overlappingPairs.size()); - free(buf); - fclose(f); - } - else - { - printf("error: cannot find file %s\n", fileName); - } - - clFinish(m_queue); - -#endif - - m_overlappingPairs.resize(numPairs); - - } //B3_PROFILE("GPU_RADIX SORT"); - //init3dSap(); -} - -void b3GpuSapBroadphase::writeAabbsToGpu() -{ - m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); - m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); - - m_allAabbsGPU.copyFromHost(m_allAabbsCPU); //might not be necessary, the 'setupGpuAabbsFull' already takes care of this -} - -void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int index = userPtr; - b3SapAabb aabb; - for (int i = 0; i < 4; i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - } - aabb.m_minIndices[3] = index; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); - m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size()); - - m_allAabbsCPU.push_back(aabb); -} - -void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask) -{ - int index = userPtr; - b3SapAabb aabb; - for (int i = 0; i < 4; i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - } - aabb.m_minIndices[3] = index; - aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); - m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size()); - - m_allAabbsCPU.push_back(aabb); -} - -cl_mem b3GpuSapBroadphase::getAabbBufferWS() -{ - return m_allAabbsGPU.getBufferCL(); -} - -int b3GpuSapBroadphase::getNumOverlap() -{ - return m_overlappingPairs.size(); -} -cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer() -{ - return m_overlappingPairs.getBufferCL(); -} - -b3OpenCLArray<b3Int4>& b3GpuSapBroadphase::getOverlappingPairsGPU() -{ - return m_overlappingPairs; -} -b3OpenCLArray<int>& b3GpuSapBroadphase::getSmallAabbIndicesGPU() -{ - return m_smallAabbsMappingGPU; -} -b3OpenCLArray<int>& b3GpuSapBroadphase::getLargeAabbIndicesGPU() -{ - return m_largeAabbsMappingGPU; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h deleted file mode 100644 index d17590b14a..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h +++ /dev/null @@ -1,143 +0,0 @@ -#ifndef B3_GPU_SAP_BROADPHASE_H -#define B3_GPU_SAP_BROADPHASE_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 -class b3Vector3; -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -#include "b3SapAabb.h" -#include "Bullet3Common/shared/b3Int2.h" - -#include "b3GpuBroadphaseInterface.h" - -class b3GpuSapBroadphase : public b3GpuBroadphaseInterface -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - cl_kernel m_flipFloatKernel; - cl_kernel m_scatterKernel; - cl_kernel m_copyAabbsKernel; - cl_kernel m_sapKernel; - cl_kernel m_sap2Kernel; - cl_kernel m_prepareSumVarianceKernel; - - class b3RadixSort32CL* m_sorter; - - ///test for 3d SAP - b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2]; - b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2]; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev; - b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev; - - b3OpenCLArray<b3SortData> m_sortedAxisGPU0; - b3OpenCLArray<b3SortData> m_sortedAxisGPU1; - b3OpenCLArray<b3SortData> m_sortedAxisGPU2; - b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev; - b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev; - b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev; - - b3OpenCLArray<b3Int4> m_addedHostPairsGPU; - b3OpenCLArray<b3Int4> m_removedHostPairsGPU; - b3OpenCLArray<int> m_addedCountGPU; - b3OpenCLArray<int> m_removedCountGPU; - - int m_currentBuffer; - -public: - b3OpenCLArray<int> m_pairCount; - - b3OpenCLArray<b3SapAabb> m_allAabbsGPU; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; - - virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() - { - return m_allAabbsGPU; - } - virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() - { - return m_allAabbsCPU; - } - - b3OpenCLArray<b3Vector3> m_sum; - b3OpenCLArray<b3Vector3> m_sum2; - b3OpenCLArray<b3Vector3> m_dst; - - b3OpenCLArray<int> m_smallAabbsMappingGPU; - b3AlignedObjectArray<int> m_smallAabbsMappingCPU; - - b3OpenCLArray<int> m_largeAabbsMappingGPU; - b3AlignedObjectArray<int> m_largeAabbsMappingCPU; - - b3OpenCLArray<b3Int4> m_overlappingPairs; - - //temporary gpu work memory - b3OpenCLArray<b3SortData> m_gpuSmallSortData; - b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs; - - class b3PrefixScanFloat4CL* m_prefixScanFloat4; - - enum b3GpuSapKernelType - { - B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU = 1, - B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU, - B3_GPU_SAP_KERNEL_ORIGINAL, - B3_GPU_SAP_KERNEL_BARRIER, - B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY - }; - - b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType = B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); - virtual ~b3GpuSapBroadphase(); - - static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU); - } - - static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU); - } - - static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_ORIGINAL); - } - static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_BARRIER); - } - static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx, cl_device_id device, cl_command_queue q) - { - return new b3GpuSapBroadphase(ctx, device, q, B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY); - } - - virtual void calculateOverlappingPairs(int maxPairs); - virtual void calculateOverlappingPairsHost(int maxPairs); - - void reset(); - - void init3dSap(); - virtual void calculateOverlappingPairsHostIncremental3Sap(); - - virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask); - - //call writeAabbsToGpu after done making all changes (createProxy etc) - virtual void writeAabbsToGpu(); - - virtual cl_mem getAabbBufferWS(); - virtual int getNumOverlap(); - virtual cl_mem getOverlappingPairBuffer(); - - virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU(); - virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU(); - virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU(); -}; - -#endif //B3_GPU_SAP_BROADPHASE_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h deleted file mode 100644 index 60570f2605..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef B3_SAP_AABB_H -#define B3_SAP_AABB_H - -#include "Bullet3Common/b3Scalar.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" - -///just make sure that the b3Aabb is 16-byte aligned -B3_ATTRIBUTE_ALIGNED16(struct) -b3SapAabb : public b3Aabb{ - - }; - -#endif //B3_SAP_AABB_H diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl deleted file mode 100644 index ded4796d33..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl +++ /dev/null @@ -1,216 +0,0 @@ - - -int getPosHash(int4 gridPos, __global float4* pParams) -{ - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x &= gridDim.x - 1; - gridPos.y &= gridDim.y - 1; - gridPos.z &= gridDim.z - 1; - int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; - return hash; -} - -int4 getGridPos(float4 worldPos, __global float4* pParams) -{ - int4 gridPos; - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1); - gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1); - gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1); - return gridPos; -} - - -// calculate grid hash value for each body using its AABB -__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams ) -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - float4 bbMin = allpAABB[smallAabbMapping[index]*2]; - float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - pos.w = 0.f; - // get address in grid - int4 gridPos = getGridPos(pos, pParams); - int gridHash = getPosHash(gridPos, pParams); - // store grid hash and body index - int2 hashVal; - hashVal.x = gridHash; - hashVal.y = index; - pHash[index] = hashVal; -} - -__kernel void kClearCellStart( int numCells, - __global int* pCellStart ) -{ - int index = get_global_id(0); - if(index >= numCells) - { - return; - } - pCellStart[index] = -1; -} - -__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart ) -{ - __local int sharedHash[513]; - int index = get_global_id(0); - int2 sortedData; - - if(index < numObjects) - { - sortedData = pHash[index]; - // Load hash data into shared memory so that we can look - // at neighboring body's hash value without loading - // two hash values per thread - sharedHash[get_local_id(0) + 1] = sortedData.x; - if((index > 0) && (get_local_id(0) == 0)) - { - // first thread in block must load neighbor body hash - sharedHash[0] = pHash[index-1].x; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - if(index < numObjects) - { - if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)])) - { - cellStart[sortedData.x] = index; - } - } -} - -int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1) -{ - return (min0.x <= max1.x)&& (min1.x <= max0.x) && - (min0.y <= max1.y)&& (min1.y <= max0.y) && - (min0.z <= max1.z)&& (min1.z <= max0.z); -} - - - - -//search for AABB 'index' against other AABBs' in this cell -void findPairsInCell( int numObjects, - int4 gridPos, - int index, - __global int2* pHash, - __global int* pCellStart, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global float4* pParams, - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) -{ - int4 pGridDim = *((__global int4*)(pParams + 1)); - int maxBodiesPerCell = pGridDim.w; - int gridHash = getPosHash(gridPos, pParams); - // get start of bucket for this cell - int bucketStart = pCellStart[gridHash]; - if (bucketStart == -1) - { - return; // cell empty - } - // iterate over bodies in this cell - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - int handleIndex = as_int(min0.w); - - int bucketEnd = bucketStart + maxBodiesPerCell; - bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd; - for(int index2 = bucketStart; index2 < bucketEnd; index2++) - { - int2 cellData = pHash[index2]; - if (cellData.x != gridHash) - { - break; // no longer in same bucket - } - int unsorted_indx2 = cellData.y; - //if (unsorted_indx2 < unsorted_indx) // check not colliding with self - if (unsorted_indx2 != unsorted_indx) // check not colliding with self - { - float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0]; - float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1]; - if(testAABBOverlap(min0, max0, min1, max1)) - { - if (pairCount) - { - int handleIndex2 = as_int(min1.w); - if (handleIndex<handleIndex2) - { - int curPair = atomic_add(pairCount,1); - if (curPair<maxPairs) - { - int4 newpair; - newpair.x = handleIndex; - newpair.y = handleIndex2; - newpair.z = -1; - newpair.w = -1; - pPairBuff2[curPair] = newpair; - } - } - - } - } - } - } -} - -__kernel void kFindOverlappingPairs( int numObjects, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global int2* pHash, - __global int* pCellStart, - __global float4* pParams , - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) - -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - // get address in grid - int4 gridPosA = getGridPos(pos, pParams); - int4 gridPosB; - // examine only neighbouring cells - for(int z=-1; z<=1; z++) - { - gridPosB.z = gridPosA.z + z; - for(int y=-1; y<=1; y++) - { - gridPosB.y = gridPosA.y + y; - for(int x=-1; x<=1; x++) - { - gridPosB.x = gridPosA.x + x; - findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs); - } - } - } -} - - - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h deleted file mode 100644 index 0185417786..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h +++ /dev/null @@ -1,198 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* gridBroadphaseCL = - "int getPosHash(int4 gridPos, __global float4* pParams)\n" - "{\n" - " int4 gridDim = *((__global int4*)(pParams + 1));\n" - " gridPos.x &= gridDim.x - 1;\n" - " gridPos.y &= gridDim.y - 1;\n" - " gridPos.z &= gridDim.z - 1;\n" - " int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n" - " return hash;\n" - "} \n" - "int4 getGridPos(float4 worldPos, __global float4* pParams)\n" - "{\n" - " int4 gridPos;\n" - " int4 gridDim = *((__global int4*)(pParams + 1));\n" - " gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n" - " gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n" - " gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n" - " return gridPos;\n" - "}\n" - "// calculate grid hash value for each body using its AABB\n" - "__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numObjects)\n" - " {\n" - " return;\n" - " }\n" - " float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n" - " float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n" - " float4 pos;\n" - " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" - " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" - " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" - " pos.w = 0.f;\n" - " // get address in grid\n" - " int4 gridPos = getGridPos(pos, pParams);\n" - " int gridHash = getPosHash(gridPos, pParams);\n" - " // store grid hash and body index\n" - " int2 hashVal;\n" - " hashVal.x = gridHash;\n" - " hashVal.y = index;\n" - " pHash[index] = hashVal;\n" - "}\n" - "__kernel void kClearCellStart( int numCells, \n" - " __global int* pCellStart )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numCells)\n" - " {\n" - " return;\n" - " }\n" - " pCellStart[index] = -1;\n" - "}\n" - "__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n" - "{\n" - " __local int sharedHash[513];\n" - " int index = get_global_id(0);\n" - " int2 sortedData;\n" - " if(index < numObjects)\n" - " {\n" - " sortedData = pHash[index];\n" - " // Load hash data into shared memory so that we can look \n" - " // at neighboring body's hash value without loading\n" - " // two hash values per thread\n" - " sharedHash[get_local_id(0) + 1] = sortedData.x;\n" - " if((index > 0) && (get_local_id(0) == 0))\n" - " {\n" - " // first thread in block must load neighbor body hash\n" - " sharedHash[0] = pHash[index-1].x;\n" - " }\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(index < numObjects)\n" - " {\n" - " if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n" - " {\n" - " cellStart[sortedData.x] = index;\n" - " }\n" - " }\n" - "}\n" - "int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n" - "{\n" - " return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n" - " (min0.y <= max1.y)&& (min1.y <= max0.y) && \n" - " (min0.z <= max1.z)&& (min1.z <= max0.z); \n" - "}\n" - "//search for AABB 'index' against other AABBs' in this cell\n" - "void findPairsInCell( int numObjects,\n" - " int4 gridPos,\n" - " int index,\n" - " __global int2* pHash,\n" - " __global int* pCellStart,\n" - " __global float4* allpAABB, \n" - " __global const int* smallAabbMapping,\n" - " __global float4* pParams,\n" - " volatile __global int* pairCount,\n" - " __global int4* pPairBuff2,\n" - " int maxPairs\n" - " )\n" - "{\n" - " int4 pGridDim = *((__global int4*)(pParams + 1));\n" - " int maxBodiesPerCell = pGridDim.w;\n" - " int gridHash = getPosHash(gridPos, pParams);\n" - " // get start of bucket for this cell\n" - " int bucketStart = pCellStart[gridHash];\n" - " if (bucketStart == -1)\n" - " {\n" - " return; // cell empty\n" - " }\n" - " // iterate over bodies in this cell\n" - " int2 sortedData = pHash[index];\n" - " int unsorted_indx = sortedData.y;\n" - " float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n" - " float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" - " int handleIndex = as_int(min0.w);\n" - " \n" - " int bucketEnd = bucketStart + maxBodiesPerCell;\n" - " bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n" - " for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n" - " {\n" - " int2 cellData = pHash[index2];\n" - " if (cellData.x != gridHash)\n" - " {\n" - " break; // no longer in same bucket\n" - " }\n" - " int unsorted_indx2 = cellData.y;\n" - " //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n" - " if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n" - " { \n" - " float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n" - " float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n" - " if(testAABBOverlap(min0, max0, min1, max1))\n" - " {\n" - " if (pairCount)\n" - " {\n" - " int handleIndex2 = as_int(min1.w);\n" - " if (handleIndex<handleIndex2)\n" - " {\n" - " int curPair = atomic_add(pairCount,1);\n" - " if (curPair<maxPairs)\n" - " {\n" - " int4 newpair;\n" - " newpair.x = handleIndex;\n" - " newpair.y = handleIndex2;\n" - " newpair.z = -1;\n" - " newpair.w = -1;\n" - " pPairBuff2[curPair] = newpair;\n" - " }\n" - " }\n" - " \n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void kFindOverlappingPairs( int numObjects,\n" - " __global float4* allpAABB, \n" - " __global const int* smallAabbMapping,\n" - " __global int2* pHash, \n" - " __global int* pCellStart, \n" - " __global float4* pParams ,\n" - " volatile __global int* pairCount,\n" - " __global int4* pPairBuff2,\n" - " int maxPairs\n" - " )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numObjects)\n" - " {\n" - " return;\n" - " }\n" - " int2 sortedData = pHash[index];\n" - " int unsorted_indx = sortedData.y;\n" - " float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n" - " float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" - " float4 pos;\n" - " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" - " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" - " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" - " // get address in grid\n" - " int4 gridPosA = getGridPos(pos, pParams);\n" - " int4 gridPosB; \n" - " // examine only neighbouring cells\n" - " for(int z=-1; z<=1; z++) \n" - " {\n" - " gridPosB.z = gridPosA.z + z;\n" - " for(int y=-1; y<=1; y++) \n" - " {\n" - " gridPosB.y = gridPosA.y + y;\n" - " for(int x=-1; x<=1; x++) \n" - " {\n" - " gridPosB.x = gridPosA.x + x;\n" - " findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n" - " }\n" - " }\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl deleted file mode 100644 index c375b9bf37..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ /dev/null @@ -1,767 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -typedef float b3Scalar; -typedef float4 b3Vector3; -#define b3Max max -#define b3Min min -#define b3Sqrt sqrt - -typedef struct -{ - unsigned int m_key; - unsigned int m_value; -} SortDataCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} b3AabbCL; - - -unsigned int interleaveBits(unsigned int x) -{ - //........ ........ ......12 3456789A //x - //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits - - //......12 3456789A ......12 3456789A //x ^ (x << 16) - //11111111 ........ ........ 11111111 //0x FF 00 00 FF - //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF; - - //......12 ........ 3456789A 3456789A //x ^ (x << 8) - //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F - //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F; - - //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4) - //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3 - //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3; - - //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2) - //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49 - //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249; - - //........ ........ ......11 11111111 //0x000003FF - x &= 0x000003FF; //Clear all bits above bit 10 - - x = (x ^ (x << 16)) & 0xFF0000FF; - x = (x ^ (x << 8)) & 0x0300F00F; - x = (x ^ (x << 4)) & 0x030C30C3; - x = (x ^ (x << 2)) & 0x09249249; - - return x; -} -unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z) -{ - return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2; -} - -__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate) -{ - int separatedAabbIndex = get_global_id(0); - if(separatedAabbIndex >= numAabbsToSeparate) return; - - int unseparatedAabbIndex = aabbIndices[separatedAabbIndex]; - out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex]; -} - -//Should replace with an optimized parallel reduction -__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge) -{ - //Each time this kernel is added to the command queue, - //the number of AABBs needing to be merged is halved - // - //Example with 159 AABBs: - // numRemainingAabbs == 159 / 2 + 159 % 2 == 80 - // numMergedAabbs == 159 - 80 == 79 - //So, indices [0, 78] are merged with [0 + 80, 78 + 80] - - int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2; - int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs; - - int aabbIndex = get_global_id(0); - if(aabbIndex >= numMergedAabbs) return; - - int otherAabbIndex = aabbIndex + numRemainingAabbs; - - b3AabbCL aabb = out_mergedAabb[aabbIndex]; - b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min); - mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max); - out_mergedAabb[aabbIndex] = mergedAabb; -} - -__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, - __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs) -{ - int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index - if(leafNodeIndex >= numAabbs) return; - - b3AabbCL mergedAabb = mergedAabbOfAllNodes[0]; - b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f; - b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024; - - b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex]; - b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f; - b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter; - - //Quantize into integer coordinates - //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size - b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize; - - int4 discretePosition; - discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) ); - discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) ); - discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) ); - - //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023] - discretePosition = b3Max( -512, b3Min(discretePosition, 511) ); - discretePosition += 512; - - //Interleave bits(assign a morton code, also known as a z-curve) - unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z); - - // - SortDataCL mortonCodeIndexPair; - mortonCodeIndexPair.m_key = mortonCode; - mortonCodeIndexPair.m_value = leafNodeIndex; - - out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair; -} - -#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128 - -//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes. -//If it is set, then the index is for an internal node; otherwise, it is a leaf node. -//In both cases, the bit should be cleared to access the actual node index. -int isLeafNode(int index) { return (index >> 31 == 0); } -int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); } -int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); } - -//From sap.cl -#define NEW_PAIR_MARKER -1 - -bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -//From sap.cl - -__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - - __global SortDataCL* mortonCodesAndAabbIndices, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numQueryAabbs) -{ - //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since - //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent) - int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(queryBvhNodeIndex >= numQueryAabbs) return; - - int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value; - b3AabbCL queryAabb = rigidAabbs[queryRigidIndex]; - - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //Optimization - if the BVH is structured as a binary radix tree, then - //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]). - //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself. - { - int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y; - if(highestLeafIndex <= queryBvhNodeIndex) continue; - } - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) ) - { - if(isLeaf) - { - int4 pair; - pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3]; - pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - - } -} - - -//From rayCastKernels.cl -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; -//From rayCastKernels.cl - -b3Vector3 b3Vector3_normalize(b3Vector3 v) -{ - b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f}; - return normalize(normal); //OpenCL normalize == vector4 normalize -} -b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; } -b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } - -int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) -{ - //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ). - //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane. - // - //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane - //and min.x will be the far plane; otherwise, it is reversed. - // - //In order for there to be a collision, the t_min and t_max of each pair must overlap. - //This can be tested for by selecting the highest t_min and lowest t_max and comparing them. - - int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y) - - //When using vector types, the select() function checks the most signficant bit, - //but isless() sets the least significant bit. - isNegative <<= 31; - - //select(b, a, condition) == condition ? a : b - //When using select() with vector types, (condition[i]) is true if its most significant bit is 1 - b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection; - b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection; - - b3Scalar t_min_final = 0.0f; - b3Scalar t_max_final = rayLength; - - //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. - //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4]) - //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN. - t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) ); - t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) ); - - return (t_min_final <= t_max_final); -} - -__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - __global SortDataCL* mortonCodesAndAabbIndices, - - __global b3RayInfo* rays, - - __global int* out_numRayRigidPairs, - __global int2* out_rayRigidPairs, - int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - // - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - // - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) ) - { - if(isLeaf) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - } -} - -__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids) -{ - int smallAabbIndex = get_global_id(0); - if(smallAabbIndex >= numSmallAabbRigids) return; - - b3AabbCL smallAabb = smallAabbs[smallAabbIndex]; - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL largeAabb = largeAabbs[i]; - if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) ) - { - int4 pair; - pair.x = largeAabb.m_minIndices[3]; - pair.y = smallAabb.m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - } -} -__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays, - __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs, - int numLargeAabbRigids, int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL rigidAabb = largeRigidAabbs[i]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) ) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabb.m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - } -} - - -//Set so that it is always greater than the actual common prefixes, and never selected as a parent node. -//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve. -//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node. -//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve). -#define B3_PLBVH_INVALID_COMMON_PREFIX 128 - -#define B3_PLBVH_ROOT_NODE_MARKER -1 - -#define b3Int64 long - -int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); } -b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) -{ - //This function only needs to return (i & j) in order for the algorithm to work, - //but it may help with debugging to mask out the lower bits. - - b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j); - - b3Int64 sharedBits = i & j; - b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0 - - return sharedBits & bitmask; -} - -//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths -int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB) -{ - return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) ); -} - -__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices, - __global b3Int64* out_commonPrefixes, - __global int* out_commonPrefixLengths, - int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if (internalNodeIndex >= numInternalNodes) return; - - //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index, - //and the number of internal nodes is always numLeafNodes - 1 - int leftLeafIndex = internalNodeIndex; - int rightLeafIndex = internalNodeIndex + 1; - - int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key; - int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key; - - //Binary radix tree construction algorithm does not work if there are duplicate morton codes. - //Append the index of each leaf node to each morton code so that there are no duplicates. - //The algorithm also requires that the morton codes are sorted in ascending order; this requirement - //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true. - // - //upsample(a, b) == ( ((b3Int64)a) << 32) | b - b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex); - b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex); - - out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); - out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); -} - - -__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes, - __global int2* out_childNodes, int numLeafNodes) -{ - int leafNodeIndex = get_global_id(0); - if (leafNodeIndex >= numLeafNodes) return; - - int numInternalNodes = numLeafNodes - 1; - - int leftSplitIndex = leafNodeIndex - 1; - int rightSplitIndex = leafNodeIndex; - - int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - //Parent node is the highest adjacent common prefix that is lower than the node's common prefix - //Leaf nodes are considered as having the highest common prefix - int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix); - - //Handle cases for the edge nodes; the first and last node - //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX - if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false; - if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true; - - int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex; - out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex; - - int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - int isLeaf = 1; - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex); -} - -__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths, - __global int2* out_childNodes, - __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex, - int numInternalNodes) -{ - int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - b3Int64 nodePrefix = commonPrefixes[internalNodeIndex]; - int nodePrefixLength = commonPrefixLengths[internalNodeIndex]; - -//#define USE_LINEAR_SEARCH -#ifdef USE_LINEAR_SEARCH - int leftIndex = -1; - int rightIndex = -1; - - //Find nearest element to left with a lower common prefix - for(int i = internalNodeIndex - 1; i >= 0; --i) - { - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - leftIndex = i; - break; - } - } - - //Find nearest element to right with a lower common prefix - for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i) - { - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - rightIndex = i; - break; - } - } - -#else //Use binary search - - //Find nearest element to left with a lower common prefix - int leftIndex = -1; - { - int lower = 0; - int upper = internalNodeIndex - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int right = mid + 1; - if(right < internalNodeIndex) - { - b3Int64 rightPrefix = commonPrefixes[right]; - int rightPrefixLength = commonPrefixLengths[right]; - - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - lower = right; - leftIndex = right; - } - else - { - leftIndex = mid; - break; - } - } - else - { - leftIndex = mid; - break; - } - } - else upper = mid - 1; - } - } - - //Find nearest element to right with a lower common prefix - int rightIndex = -1; - { - int lower = internalNodeIndex + 1; - int upper = numInternalNodes - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int left = mid - 1; - if(left > internalNodeIndex) - { - b3Int64 leftPrefix = commonPrefixes[left]; - int leftPrefixLength = commonPrefixLengths[left]; - - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - upper = left; - rightIndex = left; - } - else - { - rightIndex = mid; - break; - } - } - else - { - rightIndex = mid; - break; - } - } - else lower = mid + 1; - } - } -#endif - - //Select parent - { - int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength); - - if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false; - else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true; - - int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex; - - int isRootNode = (leftIndex == -1 && rightIndex == -1); - out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER; - - int isLeaf = 0; - if(!isRootNode) - { - int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } - else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } -} - -__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes, - __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes) -{ - if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0); - - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - // - int distanceFromRoot = 0; - { - int parentIndex = internalNodeParentNodes[internalNodeIndex]; - while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER) - { - parentIndex = internalNodeParentNodes[parentIndex]; - ++distanceFromRoot; - } - } - out_distanceFromRoot[internalNodeIndex] = distanceFromRoot; - - // - __local int localMaxDistanceFromRoot; - if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0; - barrier(CLK_LOCAL_MEM_FENCE); - - atomic_max(&localMaxDistanceFromRoot, distanceFromRoot); - barrier(CLK_LOCAL_MEM_FENCE); - - if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot); -} - -__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices, - __global int2* childNodes, - __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs, - int maxDistanceFromRoot, int processedDistance, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int distance = distanceFromRoot[internalNodeIndex]; - - if(distance == processedDistance) - { - int leftChildIndex = childNodes[internalNodeIndex].x; - int rightChildIndex = childNodes[internalNodeIndex].y; - - int isLeftChildLeaf = isLeafNode(leftChildIndex); - int isRightChildLeaf = isLeafNode(rightChildIndex); - - leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex); - rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex); - - //leftRigidIndex/rightRigidIndex is not used if internal node - int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1; - int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1; - - b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex]; - b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min); - mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max); - internalNodeAabbs[internalNodeIndex] = mergedAabb; - } -} - -__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int numLeafNodes = numInternalNodes + 1; - - int2 childNodes = internalNodeChildNodes[internalNodeIndex]; - - int2 leafIndexRange; //x == min leaf index, y == max leaf index - - //Find lowest leaf index covered by this internal node - { - int lowestIndex = childNodes.x; //childNodes.x == Left child - while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x; - leafIndexRange.x = lowestIndex; - } - - //Find highest leaf index covered by this internal node - { - int highestIndex = childNodes.y; //childNodes.y == Right child - while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y; - leafIndexRange.y = highestIndex; - } - - // - out_leafIndexRanges[internalNodeIndex] = leafIndexRange; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h deleted file mode 100644 index c02877dde9..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ /dev/null @@ -1,728 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* parallelLinearBvhCL = - "/*\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Initial Author Jackson Lee, 2014\n" - "typedef float b3Scalar;\n" - "typedef float4 b3Vector3;\n" - "#define b3Max max\n" - "#define b3Min min\n" - "#define b3Sqrt sqrt\n" - "typedef struct\n" - "{\n" - " unsigned int m_key;\n" - " unsigned int m_value;\n" - "} SortDataCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} b3AabbCL;\n" - "unsigned int interleaveBits(unsigned int x)\n" - "{\n" - " //........ ........ ......12 3456789A //x\n" - " //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n" - " \n" - " //......12 3456789A ......12 3456789A //x ^ (x << 16)\n" - " //11111111 ........ ........ 11111111 //0x FF 00 00 FF\n" - " //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;\n" - " \n" - " //......12 ........ 3456789A 3456789A //x ^ (x << 8)\n" - " //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F\n" - " //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;\n" - " \n" - " //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)\n" - " //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3\n" - " //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;\n" - " \n" - " //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)\n" - " //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49\n" - " //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;\n" - " \n" - " //........ ........ ......11 11111111 //0x000003FF\n" - " x &= 0x000003FF; //Clear all bits above bit 10\n" - " \n" - " x = (x ^ (x << 16)) & 0xFF0000FF;\n" - " x = (x ^ (x << 8)) & 0x0300F00F;\n" - " x = (x ^ (x << 4)) & 0x030C30C3;\n" - " x = (x ^ (x << 2)) & 0x09249249;\n" - " \n" - " return x;\n" - "}\n" - "unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n" - "{\n" - " return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n" - "}\n" - "__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n" - "{\n" - " int separatedAabbIndex = get_global_id(0);\n" - " if(separatedAabbIndex >= numAabbsToSeparate) return;\n" - " int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n" - " out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n" - "}\n" - "//Should replace with an optimized parallel reduction\n" - "__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n" - "{\n" - " //Each time this kernel is added to the command queue, \n" - " //the number of AABBs needing to be merged is halved\n" - " //\n" - " //Example with 159 AABBs:\n" - " // numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n" - " // numMergedAabbs == 159 - 80 == 79\n" - " //So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n" - " \n" - " int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n" - " int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n" - " \n" - " int aabbIndex = get_global_id(0);\n" - " if(aabbIndex >= numMergedAabbs) return;\n" - " \n" - " int otherAabbIndex = aabbIndex + numRemainingAabbs;\n" - " \n" - " b3AabbCL aabb = out_mergedAabb[aabbIndex];\n" - " b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n" - " \n" - " b3AabbCL mergedAabb;\n" - " mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n" - " mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n" - " out_mergedAabb[aabbIndex] = mergedAabb;\n" - "}\n" - "__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n" - " __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n" - "{\n" - " int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n" - " if(leafNodeIndex >= numAabbs) return;\n" - " \n" - " b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n" - " b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n" - " b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n" - " \n" - " b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n" - " b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n" - " b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n" - " \n" - " //Quantize into integer coordinates\n" - " //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n" - " b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n" - " \n" - " int4 discretePosition;\n" - " discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n" - " discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n" - " discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n" - " \n" - " //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n" - " discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n" - " discretePosition += 512;\n" - " \n" - " //Interleave bits(assign a morton code, also known as a z-curve)\n" - " unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n" - " \n" - " //\n" - " SortDataCL mortonCodeIndexPair;\n" - " mortonCodeIndexPair.m_key = mortonCode;\n" - " mortonCodeIndexPair.m_value = leafNodeIndex;\n" - " \n" - " out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n" - "}\n" - "#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n" - "//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n" - "//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n" - "//In both cases, the bit should be cleared to access the actual node index.\n" - "int isLeafNode(int index) { return (index >> 31 == 0); }\n" - "int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n" - "int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n" - "//From sap.cl\n" - "#define NEW_PAIR_MARKER -1\n" - "bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "//From sap.cl\n" - "__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n" - " __global int* rootNodeIndex, \n" - " __global int2* internalNodeChildIndices, \n" - " __global b3AabbCL* internalNodeAabbs,\n" - " __global int2* internalNodeLeafIndexRanges,\n" - " \n" - " __global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" - " int maxPairs, int numQueryAabbs)\n" - "{\n" - " //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n" - " //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n" - " int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" - " if(queryBvhNodeIndex >= numQueryAabbs) return;\n" - " \n" - " int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n" - " b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n" - " \n" - " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" - " \n" - " int stackSize = 1;\n" - " stack[0] = *rootNodeIndex;\n" - " \n" - " while(stackSize)\n" - " {\n" - " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" - " --stackSize;\n" - " \n" - " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" - " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" - " \n" - " //Optimization - if the BVH is structured as a binary radix tree, then\n" - " //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n" - " //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n" - " {\n" - " int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n" - " if(highestLeafIndex <= queryBvhNodeIndex) continue;\n" - " }\n" - " \n" - " //bvhRigidIndex is not used if internal node\n" - " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" - " \n" - " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" - " if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n" - " {\n" - " if(isLeaf)\n" - " {\n" - " int4 pair;\n" - " pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n" - " pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" - " pair.z = NEW_PAIR_MARKER;\n" - " pair.w = NEW_PAIR_MARKER;\n" - " \n" - " int pairIndex = atomic_inc(out_numPairs);\n" - " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" - " }\n" - " \n" - " if(!isLeaf) //Internal node\n" - " {\n" - " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" - " {\n" - " //Error\n" - " }\n" - " else\n" - " {\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" - " }\n" - " }\n" - " }\n" - " \n" - " }\n" - "}\n" - "//From rayCastKernels.cl\n" - "typedef struct\n" - "{\n" - " float4 m_from;\n" - " float4 m_to;\n" - "} b3RayInfo;\n" - "//From rayCastKernels.cl\n" - "b3Vector3 b3Vector3_normalize(b3Vector3 v)\n" - "{\n" - " b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n" - " return normalize(normal); //OpenCL normalize == vector4 normalize\n" - "}\n" - "b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n" - "b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n" - "int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" - "{\n" - " //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n" - " //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n" - " //\n" - " //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n" - " //and min.x will be the far plane; otherwise, it is reversed.\n" - " //\n" - " //In order for there to be a collision, the t_min and t_max of each pair must overlap.\n" - " //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n" - " \n" - " int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)\n" - " \n" - " //When using vector types, the select() function checks the most signficant bit, \n" - " //but isless() sets the least significant bit.\n" - " isNegative <<= 31;\n" - " //select(b, a, condition) == condition ? a : b\n" - " //When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n" - " b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" - " b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" - " \n" - " b3Scalar t_min_final = 0.0f;\n" - " b3Scalar t_max_final = rayLength;\n" - " \n" - " //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n" - " //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n" - " //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n" - " t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n" - " t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n" - " \n" - " return (t_min_final <= t_max_final);\n" - "}\n" - "__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" - " __global int* rootNodeIndex, \n" - " __global int2* internalNodeChildIndices, \n" - " __global b3AabbCL* internalNodeAabbs,\n" - " __global int2* internalNodeLeafIndexRanges,\n" - " __global SortDataCL* mortonCodesAndAabbIndices,\n" - " \n" - " __global b3RayInfo* rays,\n" - " \n" - " __global int* out_numRayRigidPairs, \n" - " __global int2* out_rayRigidPairs,\n" - " int maxRayRigidPairs, int numRays)\n" - "{\n" - " int rayIndex = get_global_id(0);\n" - " if(rayIndex >= numRays) return;\n" - " \n" - " //\n" - " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" - " b3Vector3 rayTo = rays[rayIndex].m_to;\n" - " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" - " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" - " \n" - " //\n" - " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" - " \n" - " int stackSize = 1;\n" - " stack[0] = *rootNodeIndex;\n" - " \n" - " while(stackSize)\n" - " {\n" - " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" - " --stackSize;\n" - " \n" - " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" - " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" - " \n" - " //bvhRigidIndex is not used if internal node\n" - " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" - " \n" - " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" - " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )\n" - " {\n" - " if(isLeaf)\n" - " {\n" - " int2 rayRigidPair;\n" - " rayRigidPair.x = rayIndex;\n" - " rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" - " \n" - " int pairIndex = atomic_inc(out_numRayRigidPairs);\n" - " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" - " }\n" - " \n" - " if(!isLeaf) //Internal node\n" - " {\n" - " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" - " {\n" - " //Error\n" - " }\n" - " else\n" - " {\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n" - " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" - " int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n" - "{\n" - " int smallAabbIndex = get_global_id(0);\n" - " if(smallAabbIndex >= numSmallAabbRigids) return;\n" - " \n" - " b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n" - " for(int i = 0; i < numLargeAabbRigids; ++i)\n" - " {\n" - " b3AabbCL largeAabb = largeAabbs[i];\n" - " if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n" - " {\n" - " int4 pair;\n" - " pair.x = largeAabb.m_minIndices[3];\n" - " pair.y = smallAabb.m_minIndices[3];\n" - " pair.z = NEW_PAIR_MARKER;\n" - " pair.w = NEW_PAIR_MARKER;\n" - " \n" - " int pairIndex = atomic_inc(out_numPairs);\n" - " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" - " }\n" - " }\n" - "}\n" - "__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n" - " __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n" - " int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n" - "{\n" - " int rayIndex = get_global_id(0);\n" - " if(rayIndex >= numRays) return;\n" - " \n" - " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" - " b3Vector3 rayTo = rays[rayIndex].m_to;\n" - " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" - " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" - " \n" - " for(int i = 0; i < numLargeAabbRigids; ++i)\n" - " {\n" - " b3AabbCL rigidAabb = largeRigidAabbs[i];\n" - " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n" - " {\n" - " int2 rayRigidPair;\n" - " rayRigidPair.x = rayIndex;\n" - " rayRigidPair.y = rigidAabb.m_minIndices[3];\n" - " \n" - " int pairIndex = atomic_inc(out_numRayRigidPairs);\n" - " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" - " }\n" - " }\n" - "}\n" - "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n" - "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n" - "//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n" - "//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n" - "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n" - "#define B3_PLBVH_ROOT_NODE_MARKER -1\n" - "#define b3Int64 long\n" - "int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n" - "b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n" - "{\n" - " //This function only needs to return (i & j) in order for the algorithm to work,\n" - " //but it may help with debugging to mask out the lower bits.\n" - " b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n" - " b3Int64 sharedBits = i & j;\n" - " b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0\n" - " \n" - " return sharedBits & bitmask;\n" - "}\n" - "//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n" - "int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n" - "{\n" - " return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n" - "}\n" - "__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global b3Int64* out_commonPrefixes,\n" - " __global int* out_commonPrefixLengths,\n" - " int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if (internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n" - " //and the number of internal nodes is always numLeafNodes - 1\n" - " int leftLeafIndex = internalNodeIndex;\n" - " int rightLeafIndex = internalNodeIndex + 1;\n" - " \n" - " int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n" - " int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n" - " \n" - " //Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n" - " //Append the index of each leaf node to each morton code so that there are no duplicates.\n" - " //The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n" - " //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n" - " //\n" - " //upsample(a, b) == ( ((b3Int64)a) << 32) | b\n" - " b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n" - " b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n" - " \n" - " out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" - " out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" - "}\n" - "__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n" - " __global int2* out_childNodes, int numLeafNodes)\n" - "{\n" - " int leafNodeIndex = get_global_id(0);\n" - " if (leafNodeIndex >= numLeafNodes) return;\n" - " \n" - " int numInternalNodes = numLeafNodes - 1;\n" - " \n" - " int leftSplitIndex = leafNodeIndex - 1;\n" - " int rightSplitIndex = leafNodeIndex;\n" - " \n" - " int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " \n" - " //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n" - " //Leaf nodes are considered as having the highest common prefix\n" - " int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n" - " \n" - " //Handle cases for the edge nodes; the first and last node\n" - " //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n" - " if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n" - " if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n" - " \n" - " int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n" - " out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n" - " \n" - " int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa\n" - " \n" - " //out_childNodesAsInt[0] == int2.x == left child\n" - " //out_childNodesAsInt[1] == int2.y == right child\n" - " int isLeaf = 1;\n" - " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" - " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n" - "}\n" - "__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n" - " __global int2* out_childNodes,\n" - " __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n" - " int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n" - " int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n" - " \n" - "//#define USE_LINEAR_SEARCH\n" - "#ifdef USE_LINEAR_SEARCH\n" - " int leftIndex = -1;\n" - " int rightIndex = -1;\n" - " \n" - " //Find nearest element to left with a lower common prefix\n" - " for(int i = internalNodeIndex - 1; i >= 0; --i)\n" - " {\n" - " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" - " if(nodeLeftSharedPrefixLength < nodePrefixLength)\n" - " {\n" - " leftIndex = i;\n" - " break;\n" - " }\n" - " }\n" - " \n" - " //Find nearest element to right with a lower common prefix\n" - " for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n" - " {\n" - " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" - " if(nodeRightSharedPrefixLength < nodePrefixLength)\n" - " {\n" - " rightIndex = i;\n" - " break;\n" - " }\n" - " }\n" - " \n" - "#else //Use binary search\n" - " //Find nearest element to left with a lower common prefix\n" - " int leftIndex = -1;\n" - " {\n" - " int lower = 0;\n" - " int upper = internalNodeIndex - 1;\n" - " \n" - " while(lower <= upper)\n" - " {\n" - " int mid = (lower + upper) / 2;\n" - " b3Int64 midPrefix = commonPrefixes[mid];\n" - " int midPrefixLength = commonPrefixLengths[mid];\n" - " \n" - " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" - " if(nodeMidSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " int right = mid + 1;\n" - " if(right < internalNodeIndex)\n" - " {\n" - " b3Int64 rightPrefix = commonPrefixes[right];\n" - " int rightPrefixLength = commonPrefixLengths[right];\n" - " \n" - " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n" - " if(nodeRightSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " lower = right;\n" - " leftIndex = right;\n" - " }\n" - " else \n" - " {\n" - " leftIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else \n" - " {\n" - " leftIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else upper = mid - 1;\n" - " }\n" - " }\n" - " \n" - " //Find nearest element to right with a lower common prefix\n" - " int rightIndex = -1;\n" - " {\n" - " int lower = internalNodeIndex + 1;\n" - " int upper = numInternalNodes - 1;\n" - " \n" - " while(lower <= upper)\n" - " {\n" - " int mid = (lower + upper) / 2;\n" - " b3Int64 midPrefix = commonPrefixes[mid];\n" - " int midPrefixLength = commonPrefixLengths[mid];\n" - " \n" - " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" - " if(nodeMidSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " int left = mid - 1;\n" - " if(left > internalNodeIndex)\n" - " {\n" - " b3Int64 leftPrefix = commonPrefixes[left];\n" - " int leftPrefixLength = commonPrefixLengths[left];\n" - " \n" - " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n" - " if(nodeLeftSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " upper = left;\n" - " rightIndex = left;\n" - " }\n" - " else \n" - " {\n" - " rightIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else \n" - " {\n" - " rightIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else lower = mid + 1;\n" - " }\n" - " }\n" - "#endif\n" - " \n" - " //Select parent\n" - " {\n" - " int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " \n" - " int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n" - " \n" - " if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n" - " else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n" - " \n" - " int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n" - " \n" - " int isRootNode = (leftIndex == -1 && rightIndex == -1);\n" - " out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n" - " \n" - " int isLeaf = 0;\n" - " if(!isRootNode)\n" - " {\n" - " int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa\n" - " \n" - " //out_childNodesAsInt[0] == int2.x == left child\n" - " //out_childNodesAsInt[1] == int2.y == right child\n" - " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" - " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" - " }\n" - " else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" - " }\n" - "}\n" - "__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n" - " __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n" - "{\n" - " if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " //\n" - " int distanceFromRoot = 0;\n" - " {\n" - " int parentIndex = internalNodeParentNodes[internalNodeIndex];\n" - " while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n" - " {\n" - " parentIndex = internalNodeParentNodes[parentIndex];\n" - " ++distanceFromRoot;\n" - " }\n" - " }\n" - " out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n" - " \n" - " //\n" - " __local int localMaxDistanceFromRoot;\n" - " if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n" - "}\n" - "__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global int2* childNodes,\n" - " __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n" - " int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " int distance = distanceFromRoot[internalNodeIndex];\n" - " \n" - " if(distance == processedDistance)\n" - " {\n" - " int leftChildIndex = childNodes[internalNodeIndex].x;\n" - " int rightChildIndex = childNodes[internalNodeIndex].y;\n" - " \n" - " int isLeftChildLeaf = isLeafNode(leftChildIndex);\n" - " int isRightChildLeaf = isLeafNode(rightChildIndex);\n" - " \n" - " leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n" - " rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n" - " \n" - " //leftRigidIndex/rightRigidIndex is not used if internal node\n" - " int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n" - " int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n" - " \n" - " b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n" - " b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n" - " \n" - " b3AabbCL mergedAabb;\n" - " mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n" - " mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n" - " internalNodeAabbs[internalNodeIndex] = mergedAabb;\n" - " }\n" - "}\n" - "__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " int numLeafNodes = numInternalNodes + 1;\n" - " \n" - " int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n" - " \n" - " int2 leafIndexRange; //x == min leaf index, y == max leaf index\n" - " \n" - " //Find lowest leaf index covered by this internal node\n" - " {\n" - " int lowestIndex = childNodes.x; //childNodes.x == Left child\n" - " while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n" - " leafIndexRange.x = lowestIndex;\n" - " }\n" - " \n" - " //Find highest leaf index covered by this internal node\n" - " {\n" - " int highestIndex = childNodes.y; //childNodes.y == Right child\n" - " while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n" - " leafIndexRange.y = highestIndex;\n" - " }\n" - " \n" - " //\n" - " out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl deleted file mode 100644 index 93f77a6433..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl +++ /dev/null @@ -1,389 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define NEW_PAIR_MARKER -1 - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -/// conservative test for overlap between two aabbs -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2); -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - - -__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numUnsortedAabbs) - return; - - int j = get_global_id(1); - if (j>=numUnSortedAabbs2) - return; - - - __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]]; - __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]]; - - if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2)) - { - int4 myPair; - - int xIndex = unsortedAabbPtr[0].m_minIndices[3]; - int yIndex = unsortedAabbPtr2[0].m_minIndices[3]; - if (xIndex>yIndex) - { - int tmp = xIndex; - xIndex=yIndex; - yIndex=tmp; - } - - myPair.x = xIndex; - myPair.y = yIndex; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } -} - - - -__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - for (int j=i+1;j<numObjects;j++) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } -} - -__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - for (int j=i+1;j<numObjects;j++) - { - if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) - { - break; - } - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } -} - - - - -__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - int localId = get_local_id(0); - - __local int numActiveWgItems[1]; - __local int breakRequest[1]; - - if (localId==0) - { - numActiveWgItems[0] = 0; - breakRequest[0] = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - atomic_inc(numActiveWgItems); - barrier(CLK_LOCAL_MEM_FENCE); - int localBreak = 0; - - int j=i+1; - do - { - barrier(CLK_LOCAL_MEM_FENCE); - - if (j<numObjects) - { - if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) - { - if (!localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (j>=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } - j++; - - } while (breakRequest[0]<numActiveWgItems[0]); -} - - -__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - int localId = get_local_id(0); - - __local int numActiveWgItems[1]; - __local int breakRequest[1]; - __local btAabbCL localAabbs[128];// = aabbs[i]; - - btAabbCL myAabb; - - myAabb = (i<numObjects)? aabbs[i]:aabbs[0]; - float testValue = myAabb.m_maxElems[axis]; - - if (localId==0) - { - numActiveWgItems[0] = 0; - breakRequest[0] = 0; - } - int localCount=0; - int block=0; - localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0]; - localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0]; - - barrier(CLK_LOCAL_MEM_FENCE); - atomic_inc(numActiveWgItems); - barrier(CLK_LOCAL_MEM_FENCE); - int localBreak = 0; - - int j=i+1; - do - { - barrier(CLK_LOCAL_MEM_FENCE); - - if (j<numObjects) - { - if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) - { - if (!localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (j>=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1])) - { - int4 myPair; - myPair.x = myAabb.m_minIndices[3]; - myPair.y = localAabbs[localCount+localId+1].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - localCount++; - if (localCount==64) - { - localCount = 0; - block+=64; - localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0]; - localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0]; - } - j++; - - } while (breakRequest[0]<numActiveWgItems[0]); - -} - - - - -//http://stereopsis.com/radix.html -unsigned int FloatFlip(float fl); -unsigned int FloatFlip(float fl) -{ - unsigned int f = *(unsigned int*)&fl; - unsigned int mask = -(int)(f >> 31) | 0x80000000; - return f ^ mask; -} -float IFloatFlip(unsigned int f); -float IFloatFlip(unsigned int f) -{ - unsigned int mask = ((f >> 31) - 1) | 0x80000000; - unsigned int fl = f ^ mask; - return *(float*)&fl; -} - - - - -__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - int src = destAabbs[i].m_maxIndices[3]; - destAabbs[i] = allAabbs[src]; - destAabbs[i].m_maxIndices[3] = src; -} - - -__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - - sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]); - sortData[i].y = i; - -} - - -__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]]; -} - - - -__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs) -{ - int i = get_global_id(0); - if (i>=numAabbs) - return; - - btAabbCL smallAabb = allAabbs[smallAabbMapping[i]]; - - float4 s; - s = (smallAabb.m_max+smallAabb.m_min)*0.5f; - sum[i]=s; - sum2[i]=s*s; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h deleted file mode 100644 index d6999b94cb..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ /dev/null @@ -1,341 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* sapCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#define NEW_PAIR_MARKER -1\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "/// conservative test for overlap between two aabbs\n" - "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numUnsortedAabbs)\n" - " return;\n" - " int j = get_global_id(1);\n" - " if (j>=numUnSortedAabbs2)\n" - " return;\n" - " __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" - " __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" - " if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" - " {\n" - " int4 myPair;\n" - " \n" - " int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" - " int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" - " if (xIndex>yIndex)\n" - " {\n" - " int tmp = xIndex;\n" - " xIndex=yIndex;\n" - " yIndex=tmp;\n" - " }\n" - " \n" - " myPair.x = xIndex;\n" - " myPair.y = yIndex;\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - "}\n" - "__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " for (int j=i+1;j<numObjects;j++)\n" - " {\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " for (int j=i+1;j<numObjects;j++)\n" - " {\n" - " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" - " {\n" - " break;\n" - " }\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int localId = get_local_id(0);\n" - " __local int numActiveWgItems[1];\n" - " __local int breakRequest[1];\n" - " if (localId==0)\n" - " {\n" - " numActiveWgItems[0] = 0;\n" - " breakRequest[0] = 0;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " atomic_inc(numActiveWgItems);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " int localBreak = 0;\n" - " int j=i+1;\n" - " do\n" - " {\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j<numObjects)\n" - " {\n" - " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" - " {\n" - " if (!localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " }\n" - " }\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j>=numObjects && !localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (!localBreak)\n" - " {\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - " j++;\n" - " } while (breakRequest[0]<numActiveWgItems[0]);\n" - "}\n" - "__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int localId = get_local_id(0);\n" - " __local int numActiveWgItems[1];\n" - " __local int breakRequest[1];\n" - " __local btAabbCL localAabbs[128];// = aabbs[i];\n" - " \n" - " btAabbCL myAabb;\n" - " \n" - " myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n" - " float testValue = myAabb.m_maxElems[axis];\n" - " \n" - " if (localId==0)\n" - " {\n" - " numActiveWgItems[0] = 0;\n" - " breakRequest[0] = 0;\n" - " }\n" - " int localCount=0;\n" - " int block=0;\n" - " localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n" - " localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " atomic_inc(numActiveWgItems);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " int localBreak = 0;\n" - " \n" - " int j=i+1;\n" - " do\n" - " {\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j<numObjects)\n" - " {\n" - " if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n" - " {\n" - " if (!localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " }\n" - " }\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j>=numObjects && !localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (!localBreak)\n" - " {\n" - " if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = myAabb.m_minIndices[3];\n" - " myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " localCount++;\n" - " if (localCount==64)\n" - " {\n" - " localCount = 0;\n" - " block+=64; \n" - " localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n" - " localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n" - " }\n" - " j++;\n" - " \n" - " } while (breakRequest[0]<numActiveWgItems[0]);\n" - " \n" - "}\n" - "//http://stereopsis.com/radix.html\n" - "unsigned int FloatFlip(float fl);\n" - "unsigned int FloatFlip(float fl)\n" - "{\n" - " unsigned int f = *(unsigned int*)&fl;\n" - " unsigned int mask = -(int)(f >> 31) | 0x80000000;\n" - " return f ^ mask;\n" - "}\n" - "float IFloatFlip(unsigned int f);\n" - "float IFloatFlip(unsigned int f)\n" - "{\n" - " unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n" - " unsigned int fl = f ^ mask;\n" - " return *(float*)&fl;\n" - "}\n" - "__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " int src = destAabbs[i].m_maxIndices[3];\n" - " destAabbs[i] = allAabbs[src];\n" - " destAabbs[i].m_maxIndices[3] = src;\n" - "}\n" - "__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " \n" - " \n" - " sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n" - " sortData[i].y = i;\n" - " \n" - "}\n" - "__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " \n" - " sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n" - "}\n" - "__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numAabbs)\n" - " return;\n" - " \n" - " btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n" - " \n" - " float4 s;\n" - " s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n" - " sum[i]=s;\n" - " sum2[i]=s*s; \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLInclude.h b/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLInclude.h deleted file mode 100644 index 6146538263..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLInclude.h +++ /dev/null @@ -1,51 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_OPENCL_INCLUDE_H -#define B3_OPENCL_INCLUDE_H - -#ifdef B3_USE_CLEW -#include "clew/clew.h" -#else - -#ifdef __APPLE__ -#ifdef USE_MINICL -#include <MiniCL/cl.h> -#else -#include <OpenCL/cl.h> -#include <OpenCL/cl_ext.h> //clLogMessagesToStderrAPPLE -#endif -#else -#ifdef USE_MINICL -#include <MiniCL/cl.h> -#else -#include <CL/cl.h> -#ifdef _WIN32 -#include "CL/cl_gl.h" -#endif //_WIN32 -#endif -#endif //__APPLE__ -#endif //B3_USE_CLEW - -#include <assert.h> -#include <stdio.h> -#define oclCHECKERROR(a, b) \ - if ((a) != (b)) \ - { \ - printf("OCL Error : %d\n", (a)); \ - assert((a) == (b)); \ - } - -#endif //B3_OPENCL_INCLUDE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp b/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp deleted file mode 100644 index fe54ea5ec9..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.cpp +++ /dev/null @@ -1,963 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org -Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//Original author: Roman Ponomarev -//Mostly Reimplemented by Erwin Coumans - -bool gDebugForceLoadingFromSource = false; -bool gDebugSkipLoadingBinary = false; - -#include "Bullet3Common/b3Logging.h" - -#include <string.h> - -#ifdef _WIN32 -#pragma warning(disable : 4996) -#endif -#include "b3OpenCLUtils.h" -//#include "b3OpenCLInclude.h" - -#include <stdio.h> -#include <stdlib.h> - -#define B3_MAX_CL_DEVICES 16 //who needs 16 devices? - -#ifdef _WIN32 -#include <windows.h> -#endif - -#include <assert.h> -#define b3Assert assert -#ifndef _WIN32 -#include <sys/stat.h> - -#endif - -static const char* sCachedBinaryPath = "cache"; - -//Set the preferred platform vendor using the OpenCL SDK -static const char* spPlatformVendor = -#if defined(CL_PLATFORM_MINI_CL) - "MiniCL, SCEA"; -#elif defined(CL_PLATFORM_AMD) - "Advanced Micro Devices, Inc."; -#elif defined(CL_PLATFORM_NVIDIA) - "NVIDIA Corporation"; -#elif defined(CL_PLATFORM_INTEL) - "Intel(R) Corporation"; -#elif defined(B3_USE_CLEW) - "clew (OpenCL Extension Wrangler library)"; -#else - "Unknown Vendor"; -#endif - -#ifndef CL_PLATFORM_MINI_CL -#ifdef _WIN32 -#ifndef B3_USE_CLEW -#include "CL/cl_gl.h" -#endif //B3_USE_CLEW -#endif //_WIN32 -#endif - -void MyFatalBreakAPPLE(const char* errstr, - const void* private_info, - size_t cb, - void* user_data) -{ - const char* patloc = strstr(errstr, "Warning"); - //find out if it is a warning or error, exit if error - - if (patloc) - { - b3Warning("Warning: %s\n", errstr); - } - else - { - b3Error("Error: %s\n", errstr); - b3Assert(0); - } -} - -#ifdef B3_USE_CLEW - -int b3OpenCLUtils_clewInit() -{ - int result = -1; - -#ifdef _WIN32 - const char* cl = "OpenCL.dll"; -#elif defined __APPLE__ - const char* cl = "/System/Library/Frameworks/OpenCL.framework/Versions/Current/OpenCL"; -#else //presumable Linux? \ - //linux (tested on Ubuntu 12.10 with Catalyst 13.4 beta drivers, not that there is no symbolic link from libOpenCL.so - const char* cl = "libOpenCL.so.1"; - result = clewInit(cl); - if (result != CLEW_SUCCESS) - { - cl = "libOpenCL.so"; - } - else - { - clewExit(); - } -#endif - result = clewInit(cl); - if (result != CLEW_SUCCESS) - { - b3Error("clewInit failed with error code %d\n", result); - } - else - { - b3Printf("clewInit succesfull using %s\n", cl); - } - return result; -} -#endif - -int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_platform_id pPlatforms[10] = {0}; - - cl_uint numPlatforms = 0; - cl_int ciErrNum = clGetPlatformIDs(10, pPlatforms, &numPlatforms); - //cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) - *pErrNum = ciErrNum; - } - return numPlatforms; -} - -const char* b3OpenCLUtils_getSdkVendorName() -{ - return spPlatformVendor; -} - -void b3OpenCLUtils_setCachePath(const char* path) -{ - sCachedBinaryPath = path; -} - -cl_platform_id b3OpenCLUtils_getPlatform(int platformIndex0, cl_int* pErrNum) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_platform_id platform = 0; - unsigned int platformIndex = (unsigned int)platformIndex0; - cl_uint numPlatforms; - cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - - if (platformIndex < numPlatforms) - { - cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms); - ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) - *pErrNum = ciErrNum; - return platform; - } - - platform = platforms[platformIndex]; - - free(platforms); - } - - return platform; -} - -void b3OpenCLUtils::getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo) -{ - b3Assert(platform); - cl_int ciErrNum; - ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, B3_MAX_STRING_LENGTH, platformInfo->m_platformVendor, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_NAME, B3_MAX_STRING_LENGTH, platformInfo->m_platformName, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); - ciErrNum = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, B3_MAX_STRING_LENGTH, platformInfo->m_platformVersion, NULL); - oclCHECKERROR(ciErrNum, CL_SUCCESS); -} - -void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform) -{ - b3OpenCLPlatformInfo platformInfo; - b3OpenCLUtils::getPlatformInfo(platform, &platformInfo); - b3Printf("Platform info:\n"); - b3Printf(" CL_PLATFORM_VENDOR: \t\t\t%s\n", platformInfo.m_platformVendor); - b3Printf(" CL_PLATFORM_NAME: \t\t\t%s\n", platformInfo.m_platformName); - b3Printf(" CL_PLATFORM_VERSION: \t\t\t%s\n", platformInfo.m_platformVersion); -} - -cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex) -{ - cl_context retContext = 0; - cl_int ciErrNum = 0; - cl_uint num_entries; - cl_device_id devices[B3_MAX_CL_DEVICES]; - cl_uint num_devices; - cl_context_properties* cprops; - - /* - * If we could find our platform, use it. Otherwise pass a NULL and get whatever the - * implementation thinks we should be using. - */ - cl_context_properties cps[7] = {0, 0, 0, 0, 0, 0, 0}; - cps[0] = CL_CONTEXT_PLATFORM; - cps[1] = (cl_context_properties)platform; -#ifdef _WIN32 -#ifndef B3_USE_CLEW - if (pGLContext && pGLDC) - { - cps[2] = CL_GL_CONTEXT_KHR; - cps[3] = (cl_context_properties)pGLContext; - cps[4] = CL_WGL_HDC_KHR; - cps[5] = (cl_context_properties)pGLDC; - } -#endif //B3_USE_CLEW -#endif //_WIN32 - num_entries = B3_MAX_CL_DEVICES; - - num_devices = -1; - - ciErrNum = clGetDeviceIDs( - platform, - deviceType, - num_entries, - devices, - &num_devices); - - if (ciErrNum < 0) - { - b3Printf("clGetDeviceIDs returned %d\n", ciErrNum); - return 0; - } - cprops = (NULL == platform) ? NULL : cps; - - if (!num_devices) - return 0; - - if (pGLContext) - { - //search for the GPU that relates to the OpenCL context - unsigned int i; - for (i = 0; i < num_devices; i++) - { - retContext = clCreateContext(cprops, 1, &devices[i], NULL, NULL, &ciErrNum); - if (ciErrNum == CL_SUCCESS) - break; - } - } - else - { - if (preferredDeviceIndex >= 0 && (unsigned int)preferredDeviceIndex < num_devices) - { - //create a context of the preferred device index - retContext = clCreateContext(cprops, 1, &devices[preferredDeviceIndex], NULL, NULL, &ciErrNum); - } - else - { - //create a context of all devices -#if defined(__APPLE__) - retContext = clCreateContext(cprops, num_devices, devices, MyFatalBreakAPPLE, NULL, &ciErrNum); -#else - b3Printf("numDevices=%d\n", num_devices); - - retContext = clCreateContext(cprops, num_devices, devices, NULL, NULL, &ciErrNum); -#endif - } - } - if (pErrNum != NULL) - { - *pErrNum = ciErrNum; - }; - - return retContext; -} - -cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLContext, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* retPlatformId) -{ -#ifdef B3_USE_CLEW - b3OpenCLUtils_clewInit(); -#endif - - cl_uint numPlatforms; - cl_context retContext = 0; - unsigned int i; - - cl_int ciErrNum = clGetPlatformIDs(0, NULL, &numPlatforms); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) *pErrNum = ciErrNum; - return NULL; - } - if (numPlatforms > 0) - { - cl_platform_id* platforms = (cl_platform_id*)malloc(sizeof(cl_platform_id) * numPlatforms); - ciErrNum = clGetPlatformIDs(numPlatforms, platforms, NULL); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) - *pErrNum = ciErrNum; - free(platforms); - return NULL; - } - - for (i = 0; i < numPlatforms; ++i) - { - char pbuf[128]; - ciErrNum = clGetPlatformInfo(platforms[i], - CL_PLATFORM_VENDOR, - sizeof(pbuf), - pbuf, - NULL); - if (ciErrNum != CL_SUCCESS) - { - if (pErrNum != NULL) *pErrNum = ciErrNum; - return NULL; - } - - if (preferredPlatformIndex >= 0 && i == preferredPlatformIndex) - { - cl_platform_id tmpPlatform = platforms[0]; - platforms[0] = platforms[i]; - platforms[i] = tmpPlatform; - break; - } - else - { - if (!strcmp(pbuf, spPlatformVendor)) - { - cl_platform_id tmpPlatform = platforms[0]; - platforms[0] = platforms[i]; - platforms[i] = tmpPlatform; - } - } - } - - for (i = 0; i < numPlatforms; ++i) - { - cl_platform_id platform = platforms[i]; - assert(platform); - - retContext = b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLContext, pGLDC, preferredDeviceIndex, preferredPlatformIndex); - - if (retContext) - { - // printf("OpenCL platform details:\n"); - b3OpenCLPlatformInfo platformInfo; - - b3OpenCLUtils::getPlatformInfo(platform, &platformInfo); - - if (retPlatformId) - *retPlatformId = platform; - - break; - } - } - - free(platforms); - } - return retContext; -} - -////////////////////////////////////////////////////////////////////////////// -//! Gets the id of the nth device from the context -//! -//! @return the id or -1 when out of range -//! @param cxMainContext OpenCL context -//! @param device_idx index of the device of interest -////////////////////////////////////////////////////////////////////////////// -cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int deviceIndex) -{ - assert(cxMainContext); - - size_t szParmDataBytes; - cl_device_id* cdDevices; - cl_device_id device; - - // get the list of devices associated with context - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParmDataBytes); - - if (szParmDataBytes / sizeof(cl_device_id) < (unsigned int)deviceIndex) - { - return (cl_device_id)-1; - } - - cdDevices = (cl_device_id*)malloc(szParmDataBytes); - - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, szParmDataBytes, cdDevices, NULL); - - device = cdDevices[deviceIndex]; - free(cdDevices); - - return device; -} - -int b3OpenCLUtils_getNumDevices(cl_context cxMainContext) -{ - size_t szParamDataBytes; - int device_count; - clGetContextInfo(cxMainContext, CL_CONTEXT_DEVICES, 0, NULL, &szParamDataBytes); - device_count = (int)szParamDataBytes / sizeof(cl_device_id); - return device_count; -} - -void b3OpenCLUtils::getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info) -{ - // CL_DEVICE_NAME - clGetDeviceInfo(device, CL_DEVICE_NAME, B3_MAX_STRING_LENGTH, &info->m_deviceName, NULL); - - // CL_DEVICE_VENDOR - clGetDeviceInfo(device, CL_DEVICE_VENDOR, B3_MAX_STRING_LENGTH, &info->m_deviceVendor, NULL); - - // CL_DRIVER_VERSION - clGetDeviceInfo(device, CL_DRIVER_VERSION, B3_MAX_STRING_LENGTH, &info->m_driverVersion, NULL); - - // CL_DEVICE_INFO - clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(cl_device_type), &info->m_deviceType, NULL); - - // CL_DEVICE_MAX_COMPUTE_UNITS - clGetDeviceInfo(device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(info->m_computeUnits), &info->m_computeUnits, NULL); - - // CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(info->m_workitemDims), &info->m_workitemDims, NULL); - - // CL_DEVICE_MAX_WORK_ITEM_SIZES - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(info->m_workItemSize), &info->m_workItemSize, NULL); - - // CL_DEVICE_MAX_WORK_GROUP_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(info->m_workgroupSize), &info->m_workgroupSize, NULL); - - // CL_DEVICE_MAX_CLOCK_FREQUENCY - clGetDeviceInfo(device, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(info->m_clockFrequency), &info->m_clockFrequency, NULL); - - // CL_DEVICE_ADDRESS_BITS - clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(info->m_addressBits), &info->m_addressBits, NULL); - - // CL_DEVICE_MAX_MEM_ALLOC_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(info->m_maxMemAllocSize), &info->m_maxMemAllocSize, NULL); - - // CL_DEVICE_GLOBAL_MEM_SIZE - clGetDeviceInfo(device, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(info->m_globalMemSize), &info->m_globalMemSize, NULL); - - // CL_DEVICE_ERROR_CORRECTION_SUPPORT - clGetDeviceInfo(device, CL_DEVICE_ERROR_CORRECTION_SUPPORT, sizeof(info->m_errorCorrectionSupport), &info->m_errorCorrectionSupport, NULL); - - // CL_DEVICE_LOCAL_MEM_TYPE - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_TYPE, sizeof(info->m_localMemType), &info->m_localMemType, NULL); - - // CL_DEVICE_LOCAL_MEM_SIZE - clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(info->m_localMemSize), &info->m_localMemSize, NULL); - - // CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE - clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(info->m_constantBufferSize), &info->m_constantBufferSize, NULL); - - // CL_DEVICE_QUEUE_PROPERTIES - clGetDeviceInfo(device, CL_DEVICE_QUEUE_PROPERTIES, sizeof(info->m_queueProperties), &info->m_queueProperties, NULL); - - // CL_DEVICE_IMAGE_SUPPORT - clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(info->m_imageSupport), &info->m_imageSupport, NULL); - - // CL_DEVICE_MAX_READ_IMAGE_ARGS - clGetDeviceInfo(device, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof(info->m_maxReadImageArgs), &info->m_maxReadImageArgs, NULL); - - // CL_DEVICE_MAX_WRITE_IMAGE_ARGS - clGetDeviceInfo(device, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof(info->m_maxWriteImageArgs), &info->m_maxWriteImageArgs, NULL); - - // CL_DEVICE_IMAGE2D_MAX_WIDTH, CL_DEVICE_IMAGE2D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_WIDTH, CL_DEVICE_IMAGE3D_MAX_HEIGHT, CL_DEVICE_IMAGE3D_MAX_DEPTH - clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof(size_t), &info->m_image2dMaxWidth, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof(size_t), &info->m_image2dMaxHeight, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(size_t), &info->m_image3dMaxWidth, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(size_t), &info->m_image3dMaxHeight, NULL); - clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(size_t), &info->m_image3dMaxDepth, NULL); - - // CL_DEVICE_EXTENSIONS: get device extensions, and if any then parse & log the string onto separate lines - clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, B3_MAX_STRING_LENGTH, &info->m_deviceExtensions, NULL); - - // CL_DEVICE_PREFERRED_VECTOR_WIDTH_<type> - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(cl_uint), &info->m_vecWidthChar, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(cl_uint), &info->m_vecWidthShort, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), &info->m_vecWidthInt, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(cl_uint), &info->m_vecWidthLong, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(cl_uint), &info->m_vecWidthFloat, NULL); - clGetDeviceInfo(device, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(cl_uint), &info->m_vecWidthDouble, NULL); -} - -void b3OpenCLUtils_printDeviceInfo(cl_device_id device) -{ - b3OpenCLDeviceInfo info; - b3OpenCLUtils::getDeviceInfo(device, &info); - b3Printf("Device Info:\n"); - b3Printf(" CL_DEVICE_NAME: \t\t\t%s\n", info.m_deviceName); - b3Printf(" CL_DEVICE_VENDOR: \t\t\t%s\n", info.m_deviceVendor); - b3Printf(" CL_DRIVER_VERSION: \t\t\t%s\n", info.m_driverVersion); - - if (info.m_deviceType & CL_DEVICE_TYPE_CPU) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_CPU"); - if (info.m_deviceType & CL_DEVICE_TYPE_GPU) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_GPU"); - if (info.m_deviceType & CL_DEVICE_TYPE_ACCELERATOR) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_ACCELERATOR"); - if (info.m_deviceType & CL_DEVICE_TYPE_DEFAULT) - b3Printf(" CL_DEVICE_TYPE:\t\t\t%s\n", "CL_DEVICE_TYPE_DEFAULT"); - - b3Printf(" CL_DEVICE_MAX_COMPUTE_UNITS:\t\t%u\n", info.m_computeUnits); - b3Printf(" CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS:\t%u\n", info.m_workitemDims); - b3Printf(" CL_DEVICE_MAX_WORK_ITEM_SIZES:\t%u / %u / %u \n", info.m_workItemSize[0], info.m_workItemSize[1], info.m_workItemSize[2]); - b3Printf(" CL_DEVICE_MAX_WORK_GROUP_SIZE:\t%u\n", info.m_workgroupSize); - b3Printf(" CL_DEVICE_MAX_CLOCK_FREQUENCY:\t%u MHz\n", info.m_clockFrequency); - b3Printf(" CL_DEVICE_ADDRESS_BITS:\t\t%u\n", info.m_addressBits); - b3Printf(" CL_DEVICE_MAX_MEM_ALLOC_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_maxMemAllocSize / (1024 * 1024))); - b3Printf(" CL_DEVICE_GLOBAL_MEM_SIZE:\t\t%u MByte\n", (unsigned int)(info.m_globalMemSize / (1024 * 1024))); - b3Printf(" CL_DEVICE_ERROR_CORRECTION_SUPPORT:\t%s\n", info.m_errorCorrectionSupport == CL_TRUE ? "yes" : "no"); - b3Printf(" CL_DEVICE_LOCAL_MEM_TYPE:\t\t%s\n", info.m_localMemType == 1 ? "local" : "global"); - b3Printf(" CL_DEVICE_LOCAL_MEM_SIZE:\t\t%u KByte\n", (unsigned int)(info.m_localMemSize / 1024)); - b3Printf(" CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE:\t%u KByte\n", (unsigned int)(info.m_constantBufferSize / 1024)); - if (info.m_queueProperties & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) - b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE"); - if (info.m_queueProperties & CL_QUEUE_PROFILING_ENABLE) - b3Printf(" CL_DEVICE_QUEUE_PROPERTIES:\t\t%s\n", "CL_QUEUE_PROFILING_ENABLE"); - - b3Printf(" CL_DEVICE_IMAGE_SUPPORT:\t\t%u\n", info.m_imageSupport); - - b3Printf(" CL_DEVICE_MAX_READ_IMAGE_ARGS:\t%u\n", info.m_maxReadImageArgs); - b3Printf(" CL_DEVICE_MAX_WRITE_IMAGE_ARGS:\t%u\n", info.m_maxWriteImageArgs); - b3Printf("\n CL_DEVICE_IMAGE <dim>"); - b3Printf("\t\t\t2D_MAX_WIDTH\t %u\n", info.m_image2dMaxWidth); - b3Printf("\t\t\t\t\t2D_MAX_HEIGHT\t %u\n", info.m_image2dMaxHeight); - b3Printf("\t\t\t\t\t3D_MAX_WIDTH\t %u\n", info.m_image3dMaxWidth); - b3Printf("\t\t\t\t\t3D_MAX_HEIGHT\t %u\n", info.m_image3dMaxHeight); - b3Printf("\t\t\t\t\t3D_MAX_DEPTH\t %u\n", info.m_image3dMaxDepth); - if (*info.m_deviceExtensions != 0) - { - b3Printf("\n CL_DEVICE_EXTENSIONS:%s\n", info.m_deviceExtensions); - } - else - { - b3Printf(" CL_DEVICE_EXTENSIONS: None\n"); - } - b3Printf(" CL_DEVICE_PREFERRED_VECTOR_WIDTH_<t>\t"); - b3Printf("CHAR %u, SHORT %u, INT %u,LONG %u, FLOAT %u, DOUBLE %u\n\n\n", - info.m_vecWidthChar, info.m_vecWidthShort, info.m_vecWidthInt, info.m_vecWidthLong, info.m_vecWidthFloat, info.m_vecWidthDouble); -} - -static const char* strip2(const char* name, const char* pattern) -{ - size_t const patlen = strlen(pattern); - size_t patcnt = 0; - const char* oriptr; - const char* patloc; - // find how many times the pattern occurs in the original string - for (oriptr = name; (patloc = strstr(oriptr, pattern)); oriptr = patloc + patlen) - { - patcnt++; - } - return oriptr; -} - -cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSourceOrg, cl_int* pErrNum, const char* additionalMacrosArg, const char* clFileNameForCaching, bool disableBinaryCaching) -{ - const char* additionalMacros = additionalMacrosArg ? additionalMacrosArg : ""; - - if (disableBinaryCaching) - { - //kernelSourceOrg = 0; - } - - cl_program m_cpProgram = 0; - cl_int status; - - char binaryFileName[B3_MAX_STRING_LENGTH]; - - char deviceName[256]; - char driverVersion[256]; - const char* strippedName; - int fileUpToDate = 0; -#ifdef _WIN32 - int binaryFileValid = 0; -#endif - if (!disableBinaryCaching && clFileNameForCaching) - { - clGetDeviceInfo(device, CL_DEVICE_NAME, 256, &deviceName, NULL); - clGetDeviceInfo(device, CL_DRIVER_VERSION, 256, &driverVersion, NULL); - - strippedName = strip2(clFileNameForCaching, "\\"); - strippedName = strip2(strippedName, "/"); - -#ifdef _MSC_VER - sprintf_s(binaryFileName, B3_MAX_STRING_LENGTH, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion); -#else - sprintf(binaryFileName, "%s/%s.%s.%s.bin", sCachedBinaryPath, strippedName, deviceName, driverVersion); -#endif - } - if (clFileNameForCaching && !(disableBinaryCaching || gDebugSkipLoadingBinary || gDebugForceLoadingFromSource)) - { -#ifdef _WIN32 - char* bla = 0; - - //printf("searching for %s\n", binaryFileName); - - FILETIME modtimeBinary; - CreateDirectoryA(sCachedBinaryPath, 0); - { - HANDLE binaryFileHandle = CreateFileA(binaryFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - if (binaryFileHandle == INVALID_HANDLE_VALUE) - { - DWORD errorCode; - errorCode = GetLastError(); - switch (errorCode) - { - case ERROR_FILE_NOT_FOUND: - { - b3Warning("\nCached file not found %s\n", binaryFileName); - break; - } - case ERROR_PATH_NOT_FOUND: - { - b3Warning("\nCached file path not found %s\n", binaryFileName); - break; - } - default: - { - b3Warning("\nFailed reading cached file with errorCode = %d\n", errorCode); - } - } - } - else - { - if (GetFileTime(binaryFileHandle, NULL, NULL, &modtimeBinary) == 0) - { - DWORD errorCode; - errorCode = GetLastError(); - b3Warning("\nGetFileTime errorCode = %d\n", errorCode); - } - else - { - binaryFileValid = 1; - } - CloseHandle(binaryFileHandle); - } - - if (binaryFileValid) - { - HANDLE srcFileHandle = CreateFileA(clFileNameForCaching, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - - if (srcFileHandle == INVALID_HANDLE_VALUE) - { - const char* prefix[] = {"./", "../", "../../", "../../../", "../../../../"}; - for (int i = 0; (srcFileHandle == INVALID_HANDLE_VALUE) && i < 5; i++) - { - char relativeFileName[1024]; - sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching); - srcFileHandle = CreateFileA(relativeFileName, GENERIC_READ, 0, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0); - } - } - - if (srcFileHandle != INVALID_HANDLE_VALUE) - { - FILETIME modtimeSrc; - if (GetFileTime(srcFileHandle, NULL, NULL, &modtimeSrc) == 0) - { - DWORD errorCode; - errorCode = GetLastError(); - b3Warning("\nGetFileTime errorCode = %d\n", errorCode); - } - if ((modtimeSrc.dwHighDateTime < modtimeBinary.dwHighDateTime) || ((modtimeSrc.dwHighDateTime == modtimeBinary.dwHighDateTime) && (modtimeSrc.dwLowDateTime <= modtimeBinary.dwLowDateTime))) - { - fileUpToDate = 1; - } - else - { - b3Warning("\nCached binary file out-of-date (%s)\n", binaryFileName); - } - CloseHandle(srcFileHandle); - } - else - { -#ifdef _DEBUG - DWORD errorCode; - errorCode = GetLastError(); - switch (errorCode) - { - case ERROR_FILE_NOT_FOUND: - { - b3Warning("\nSrc file not found %s\n", clFileNameForCaching); - break; - } - case ERROR_PATH_NOT_FOUND: - { - b3Warning("\nSrc path not found %s\n", clFileNameForCaching); - break; - } - default: - { - b3Warning("\nnSrc file reading errorCode = %d\n", errorCode); - } - } - - //we should make sure the src file exists so we can verify the timestamp with binary - // assert(0); - b3Warning("Warning: cannot find OpenCL kernel %s to verify timestamp of binary cached kernel %s\n", clFileNameForCaching, binaryFileName); - fileUpToDate = true; -#else - //if we cannot find the source, assume it is OK in release builds - fileUpToDate = true; -#endif - } - } - } - -#else - fileUpToDate = true; - if (mkdir(sCachedBinaryPath, 0777) == -1) - { - } - else - { - b3Printf("Succesfully created cache directory: %s\n", sCachedBinaryPath); - } -#endif //_WIN32 - } - - if (fileUpToDate) - { -#ifdef _MSC_VER - FILE* file; - if (fopen_s(&file, binaryFileName, "rb") != 0) - file = 0; -#else - FILE* file = fopen(binaryFileName, "rb"); -#endif - - if (file) - { - size_t binarySize = 0; - char* binary = 0; - - fseek(file, 0L, SEEK_END); - binarySize = ftell(file); - rewind(file); - binary = (char*)malloc(sizeof(char) * binarySize); - int bytesRead; - bytesRead = fread(binary, sizeof(char), binarySize, file); - fclose(file); - - m_cpProgram = clCreateProgramWithBinary(clContext, 1, &device, &binarySize, (const unsigned char**)&binary, 0, &status); - b3Assert(status == CL_SUCCESS); - status = clBuildProgram(m_cpProgram, 1, &device, additionalMacros, 0, 0); - b3Assert(status == CL_SUCCESS); - - if (status != CL_SUCCESS) - { - char* build_log; - size_t ret_val_size; - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1)); - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - build_log[ret_val_size] = '\0'; - b3Error("%s\n", build_log); - free(build_log); - b3Assert(0); - m_cpProgram = 0; - - b3Warning("clBuildProgram reported failure on cached binary: %s\n", binaryFileName); - } - else - { - b3Printf("clBuildProgram successfully compiled cached binary: %s\n", binaryFileName); - } - free(binary); - } - else - { - b3Warning("Cannot open cached binary: %s\n", binaryFileName); - } - } - - if (!m_cpProgram) - { - cl_int localErrNum; - char* compileFlags; - int flagsize; - - const char* kernelSource = kernelSourceOrg; - - if (!kernelSourceOrg || gDebugForceLoadingFromSource) - { - if (clFileNameForCaching) - { - FILE* file = fopen(clFileNameForCaching, "rb"); - //in many cases the relative path is a few levels up the directory hierarchy, so try it - if (!file) - { - const char* prefix[] = {"../", "../../", "../../../", "../../../../"}; - for (int i = 0; !file && i < 3; i++) - { - char relativeFileName[1024]; - sprintf(relativeFileName, "%s%s", prefix[i], clFileNameForCaching); - file = fopen(relativeFileName, "rb"); - } - } - - if (file) - { - char* kernelSrc = 0; - fseek(file, 0L, SEEK_END); - int kernelSize = ftell(file); - rewind(file); - kernelSrc = (char*)malloc(kernelSize + 1); - int readBytes; - readBytes = fread((void*)kernelSrc, 1, kernelSize, file); - kernelSrc[kernelSize] = 0; - fclose(file); - kernelSource = kernelSrc; - } - } - } - - size_t program_length = kernelSource ? strlen(kernelSource) : 0; -#ifdef MAC //or __APPLE__? - char* flags = "-cl-mad-enable -DMAC "; -#else - const char* flags = ""; -#endif - - m_cpProgram = clCreateProgramWithSource(clContext, 1, (const char**)&kernelSource, &program_length, &localErrNum); - if (localErrNum != CL_SUCCESS) - { - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - // Build the program with 'mad' Optimization option - - flagsize = sizeof(char) * (strlen(additionalMacros) + strlen(flags) + 5); - compileFlags = (char*)malloc(flagsize); -#ifdef _MSC_VER - sprintf_s(compileFlags, flagsize, "%s %s", flags, additionalMacros); -#else - sprintf(compileFlags, "%s %s", flags, additionalMacros); -#endif - localErrNum = clBuildProgram(m_cpProgram, 1, &device, compileFlags, NULL, NULL); - if (localErrNum != CL_SUCCESS) - { - char* build_log; - size_t ret_val_size; - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &ret_val_size); - build_log = (char*)malloc(sizeof(char) * (ret_val_size + 1)); - clGetProgramBuildInfo(m_cpProgram, device, CL_PROGRAM_BUILD_LOG, ret_val_size, build_log, NULL); - - // to be carefully, terminate with \0 - // there's no information in the reference whether the string is 0 terminated or not - build_log[ret_val_size] = '\0'; - - b3Error("Error in clBuildProgram, Line %u in file %s, Log: \n%s\n !!!\n\n", __LINE__, __FILE__, build_log); - free(build_log); - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - if (!disableBinaryCaching && clFileNameForCaching) - { // write to binary - - cl_uint numAssociatedDevices; - status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &numAssociatedDevices, 0); - b3Assert(status == CL_SUCCESS); - if (numAssociatedDevices == 1) - { - size_t binarySize; - char* binary; - - status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &binarySize, 0); - b3Assert(status == CL_SUCCESS); - - binary = (char*)malloc(sizeof(char) * binarySize); - - status = clGetProgramInfo(m_cpProgram, CL_PROGRAM_BINARIES, sizeof(char*), &binary, 0); - b3Assert(status == CL_SUCCESS); - - { - FILE* file = 0; -#ifdef _MSC_VER - if (fopen_s(&file, binaryFileName, "wb") != 0) - file = 0; -#else - file = fopen(binaryFileName, "wb"); -#endif - if (file) - { - fwrite(binary, sizeof(char), binarySize, file); - fclose(file); - } - else - { - b3Warning("cannot write file %s\n", binaryFileName); - } - } - - free(binary); - } - } - - free(compileFlags); - } - return m_cpProgram; -} - -cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros) -{ - cl_kernel kernel; - cl_int localErrNum; - - cl_program m_cpProgram = prog; - - b3Printf("compiling kernel %s ", kernelName); - - if (!m_cpProgram) - { - m_cpProgram = b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, 0, false); - } - - // Create the kernel - kernel = clCreateKernel(m_cpProgram, kernelName, &localErrNum); - if (localErrNum != CL_SUCCESS) - { - b3Error("Error in clCreateKernel, Line %u in file %s, cannot find kernel function %s !!!\n\n", __LINE__, __FILE__, kernelName); - assert(0); - if (pErrNum) - *pErrNum = localErrNum; - return 0; - } - - if (!prog && m_cpProgram) - { - clReleaseProgram(m_cpProgram); - } - b3Printf("ready. \n"); - - if (pErrNum) - *pErrNum = CL_SUCCESS; - return kernel; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.h b/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.h deleted file mode 100644 index 6c82eed2a6..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Initialize/b3OpenCLUtils.h +++ /dev/null @@ -1,190 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library, http://bulletphysics.org -Copyright (C) 2006 - 2011 Sony Computer Entertainment Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -//original author: Roman Ponomarev -//cleanup by Erwin Coumans - -#ifndef B3_OPENCL_UTILS_H -#define B3_OPENCL_UTILS_H - -#include "b3OpenCLInclude.h" - -#ifdef __cplusplus -extern "C" -{ -#endif - - ///C API for OpenCL utilities: convenience functions, see below for C++ API - - /// CL Context optionally takes a GL context. This is a generic type because we don't really want this code - /// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. - cl_context b3OpenCLUtils_createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex, cl_platform_id* platformId); - - int b3OpenCLUtils_getNumDevices(cl_context cxMainContext); - - cl_device_id b3OpenCLUtils_getDevice(cl_context cxMainContext, int nr); - - void b3OpenCLUtils_printDeviceInfo(cl_device_id device); - - cl_kernel b3OpenCLUtils_compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum, cl_program prog, const char* additionalMacros); - - //optional - cl_program b3OpenCLUtils_compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum, const char* additionalMacros, const char* srcFileNameForCaching, bool disableBinaryCaching); - - //the following optional APIs provide access using specific platform information - int b3OpenCLUtils_getNumPlatforms(cl_int* pErrNum); - - ///get the nr'th platform, where nr is in the range [0..getNumPlatforms) - cl_platform_id b3OpenCLUtils_getPlatform(int nr, cl_int* pErrNum); - - void b3OpenCLUtils_printPlatformInfo(cl_platform_id platform); - - const char* b3OpenCLUtils_getSdkVendorName(); - - ///set the path (directory/folder) where the compiled OpenCL kernel are stored - void b3OpenCLUtils_setCachePath(const char* path); - - cl_context b3OpenCLUtils_createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx, void* pGLDC, int preferredDeviceIndex, int preferredPlatformIndex); - -#ifdef __cplusplus -} - -#define B3_MAX_STRING_LENGTH 1024 - -typedef struct -{ - char m_deviceName[B3_MAX_STRING_LENGTH]; - char m_deviceVendor[B3_MAX_STRING_LENGTH]; - char m_driverVersion[B3_MAX_STRING_LENGTH]; - char m_deviceExtensions[B3_MAX_STRING_LENGTH]; - - cl_device_type m_deviceType; - cl_uint m_computeUnits; - size_t m_workitemDims; - size_t m_workItemSize[3]; - size_t m_image2dMaxWidth; - size_t m_image2dMaxHeight; - size_t m_image3dMaxWidth; - size_t m_image3dMaxHeight; - size_t m_image3dMaxDepth; - size_t m_workgroupSize; - cl_uint m_clockFrequency; - cl_ulong m_constantBufferSize; - cl_ulong m_localMemSize; - cl_ulong m_globalMemSize; - cl_bool m_errorCorrectionSupport; - cl_device_local_mem_type m_localMemType; - cl_uint m_maxReadImageArgs; - cl_uint m_maxWriteImageArgs; - - cl_uint m_addressBits; - cl_ulong m_maxMemAllocSize; - cl_command_queue_properties m_queueProperties; - cl_bool m_imageSupport; - cl_uint m_vecWidthChar; - cl_uint m_vecWidthShort; - cl_uint m_vecWidthInt; - cl_uint m_vecWidthLong; - cl_uint m_vecWidthFloat; - cl_uint m_vecWidthDouble; - -} b3OpenCLDeviceInfo; - -struct b3OpenCLPlatformInfo -{ - char m_platformVendor[B3_MAX_STRING_LENGTH]; - char m_platformName[B3_MAX_STRING_LENGTH]; - char m_platformVersion[B3_MAX_STRING_LENGTH]; - - b3OpenCLPlatformInfo() - { - m_platformVendor[0] = 0; - m_platformName[0] = 0; - m_platformVersion[0] = 0; - } -}; - -///C++ API for OpenCL utilities: convenience functions -struct b3OpenCLUtils -{ - /// CL Context optionally takes a GL context. This is a generic type because we don't really want this code - /// to have to understand GL types. It is a HGLRC in _WIN32 or a GLXContext otherwise. - static inline cl_context createContextFromType(cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex = -1, cl_platform_id* platformId = 0) - { - return b3OpenCLUtils_createContextFromType(deviceType, pErrNum, pGLCtx, pGLDC, preferredDeviceIndex, preferredPlatformIndex, platformId); - } - - static inline int getNumDevices(cl_context cxMainContext) - { - return b3OpenCLUtils_getNumDevices(cxMainContext); - } - static inline cl_device_id getDevice(cl_context cxMainContext, int nr) - { - return b3OpenCLUtils_getDevice(cxMainContext, nr); - } - - static void getDeviceInfo(cl_device_id device, b3OpenCLDeviceInfo* info); - - static inline void printDeviceInfo(cl_device_id device) - { - b3OpenCLUtils_printDeviceInfo(device); - } - - static inline cl_kernel compileCLKernelFromString(cl_context clContext, cl_device_id device, const char* kernelSource, const char* kernelName, cl_int* pErrNum = 0, cl_program prog = 0, const char* additionalMacros = "") - { - return b3OpenCLUtils_compileCLKernelFromString(clContext, device, kernelSource, kernelName, pErrNum, prog, additionalMacros); - } - - //optional - static inline cl_program compileCLProgramFromString(cl_context clContext, cl_device_id device, const char* kernelSource, cl_int* pErrNum = 0, const char* additionalMacros = "", const char* srcFileNameForCaching = 0, bool disableBinaryCaching = false) - { - return b3OpenCLUtils_compileCLProgramFromString(clContext, device, kernelSource, pErrNum, additionalMacros, srcFileNameForCaching, disableBinaryCaching); - } - - //the following optional APIs provide access using specific platform information - static inline int getNumPlatforms(cl_int* pErrNum = 0) - { - return b3OpenCLUtils_getNumPlatforms(pErrNum); - } - ///get the nr'th platform, where nr is in the range [0..getNumPlatforms) - static inline cl_platform_id getPlatform(int nr, cl_int* pErrNum = 0) - { - return b3OpenCLUtils_getPlatform(nr, pErrNum); - } - - static void getPlatformInfo(cl_platform_id platform, b3OpenCLPlatformInfo* platformInfo); - - static inline void printPlatformInfo(cl_platform_id platform) - { - b3OpenCLUtils_printPlatformInfo(platform); - } - - static inline const char* getSdkVendorName() - { - return b3OpenCLUtils_getSdkVendorName(); - } - static inline cl_context createContextFromPlatform(cl_platform_id platform, cl_device_type deviceType, cl_int* pErrNum, void* pGLCtx = 0, void* pGLDC = 0, int preferredDeviceIndex = -1, int preferredPlatformIndex = -1) - { - return b3OpenCLUtils_createContextFromPlatform(platform, deviceType, pErrNum, pGLCtx, pGLDC, preferredDeviceIndex, preferredPlatformIndex); - } - static void setCachePath(const char* path) - { - b3OpenCLUtils_setCachePath(path); - } -}; - -#endif //__cplusplus - -#endif // B3_OPENCL_UTILS_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h deleted file mode 100644 index 27835bb747..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef B3_BVH_INFO_H -#define B3_BVH_INFO_H - -#include "Bullet3Common/b3Vector3.h" - -struct b3BvhInfo -{ - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - b3Vector3 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; -}; - -#endif //B3_BVH_INFO_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp deleted file mode 100644 index 4db717f8c3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.cpp +++ /dev/null @@ -1,253 +0,0 @@ - -#if 0 -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3ContactCache.h" -#include "Bullet3Common/b3Transform.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -b3Scalar gContactBreakingThreshold = b3Scalar(0.02); - -///gContactCalcArea3Points will approximate the convex hull area using 3 points -///when setting it to false, it will use 4 points to compute the area: it is more accurate but slower -bool gContactCalcArea3Points = true; - - - - -static inline b3Scalar calcArea4Points(const b3Vector3 &p0,const b3Vector3 &p1,const b3Vector3 &p2,const b3Vector3 &p3) -{ - // It calculates possible 3 area constructed from random 4 points and returns the biggest one. - - b3Vector3 a[3],b[3]; - a[0] = p0 - p1; - a[1] = p0 - p2; - a[2] = p0 - p3; - b[0] = p2 - p3; - b[1] = p1 - p3; - b[2] = p1 - p2; - - //todo: Following 3 cross production can be easily optimized by SIMD. - b3Vector3 tmp0 = a[0].cross(b[0]); - b3Vector3 tmp1 = a[1].cross(b[1]); - b3Vector3 tmp2 = a[2].cross(b[2]); - - return b3Max(b3Max(tmp0.length2(),tmp1.length2()),tmp2.length2()); -} -#if 0 - -//using localPointA for all points -int b3ContactCache::sortCachedPoints(const b3Vector3& pt) -{ - //calculate 4 possible cases areas, and take biggest area - //also need to keep 'deepest' - - int maxPenetrationIndex = -1; -#define KEEP_DEEPEST_POINT 1 -#ifdef KEEP_DEEPEST_POINT - b3Scalar maxPenetration = pt.getDistance(); - for (int i=0;i<4;i++) - { - if (m_pointCache[i].getDistance() < maxPenetration) - { - maxPenetrationIndex = i; - maxPenetration = m_pointCache[i].getDistance(); - } - } -#endif //KEEP_DEEPEST_POINT - - b3Scalar res0(b3Scalar(0.)),res1(b3Scalar(0.)),res2(b3Scalar(0.)),res3(b3Scalar(0.)); - - if (gContactCalcArea3Points) - { - if (maxPenetrationIndex != 0) - { - b3Vector3 a0 = pt.m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 b0 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - b3Vector3 cross = a0.cross(b0); - res0 = cross.length2(); - } - if (maxPenetrationIndex != 1) - { - b3Vector3 a1 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b1 = m_pointCache[3].m_localPointA-m_pointCache[2].m_localPointA; - b3Vector3 cross = a1.cross(b1); - res1 = cross.length2(); - } - - if (maxPenetrationIndex != 2) - { - b3Vector3 a2 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b2 = m_pointCache[3].m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 cross = a2.cross(b2); - res2 = cross.length2(); - } - - if (maxPenetrationIndex != 3) - { - b3Vector3 a3 = pt.m_localPointA-m_pointCache[0].m_localPointA; - b3Vector3 b3 = m_pointCache[2].m_localPointA-m_pointCache[1].m_localPointA; - b3Vector3 cross = a3.cross(b3); - res3 = cross.length2(); - } - } - else - { - if(maxPenetrationIndex != 0) { - res0 = calcArea4Points(pt.m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 1) { - res1 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[2].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 2) { - res2 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[3].m_localPointA); - } - - if(maxPenetrationIndex != 3) { - res3 = calcArea4Points(pt.m_localPointA,m_pointCache[0].m_localPointA,m_pointCache[1].m_localPointA,m_pointCache[2].m_localPointA); - } - } - b3Vector4 maxvec(res0,res1,res2,res3); - int biggestarea = maxvec.closestAxis4(); - return biggestarea; - -} - - -int b3ContactCache::getCacheEntry(const b3Vector3& newPoint) const -{ - b3Scalar shortestDist = getContactBreakingThreshold() * getContactBreakingThreshold(); - int size = getNumContacts(); - int nearestPoint = -1; - for( int i = 0; i < size; i++ ) - { - const b3Vector3 &mp = m_pointCache[i]; - - b3Vector3 diffA = mp.m_localPointA- newPoint.m_localPointA; - const b3Scalar distToManiPoint = diffA.dot(diffA); - if( distToManiPoint < shortestDist ) - { - shortestDist = distToManiPoint; - nearestPoint = i; - } - } - return nearestPoint; -} - -int b3ContactCache::addManifoldPoint(const b3Vector3& newPoint) -{ - b3Assert(validContactDistance(newPoint)); - - int insertIndex = getNumContacts(); - if (insertIndex == MANIFOLD_CACHE_SIZE) - { -#if MANIFOLD_CACHE_SIZE >= 4 - //sort cache so best points come first, based on area - insertIndex = sortCachedPoints(newPoint); -#else - insertIndex = 0; -#endif - clearUserCache(m_pointCache[insertIndex]); - - } else - { - m_cachedPoints++; - - - } - if (insertIndex<0) - insertIndex=0; - - //b3Assert(m_pointCache[insertIndex].m_userPersistentData==0); - m_pointCache[insertIndex] = newPoint; - return insertIndex; -} - -#endif - -bool b3ContactCache::validContactDistance(const b3Vector3& pt) -{ - return pt.w <= gContactBreakingThreshold; -} - -void b3ContactCache::removeContactPoint(struct b3Contact4Data& newContactCache,int i) -{ - int numContacts = b3Contact4Data_getNumPoints(&newContactCache); - if (i!=(numContacts-1)) - { - b3Swap(newContactCache.m_localPosA[i],newContactCache.m_localPosA[numContacts-1]); - b3Swap(newContactCache.m_localPosB[i],newContactCache.m_localPosB[numContacts-1]); - b3Swap(newContactCache.m_worldPosB[i],newContactCache.m_worldPosB[numContacts-1]); - } - b3Contact4Data_setNumPoints(&newContactCache,numContacts-1); - -} - - -void b3ContactCache::refreshContactPoints(const b3Transform& trA,const b3Transform& trB, struct b3Contact4Data& contacts) -{ - - int numContacts = b3Contact4Data_getNumPoints(&contacts); - - - int i; - /// first refresh worldspace positions and distance - for (i=numContacts-1;i>=0;i--) - { - b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); - b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); - contacts.m_worldPosB[i] = worldPosB; - float distance = (worldPosA - worldPosB).dot(contacts.m_worldNormalOnB); - contacts.m_worldPosB[i].w = distance; - } - - /// then - b3Scalar distance2d; - b3Vector3 projectedDifference,projectedPoint; - for (i=numContacts-1;i>=0;i--) - { - b3Vector3 worldPosA = trA( contacts.m_localPosA[i]); - b3Vector3 worldPosB = trB( contacts.m_localPosB[i]); - b3Vector3&pt = contacts.m_worldPosB[i]; - //contact becomes invalid when signed distance exceeds margin (projected on contactnormal direction) - if (!validContactDistance(pt)) - { - removeContactPoint(contacts,i); - } else - { - //contact also becomes invalid when relative movement orthogonal to normal exceeds margin - projectedPoint = worldPosA - contacts.m_worldNormalOnB * contacts.m_worldPosB[i].w; - projectedDifference = contacts.m_worldPosB[i] - projectedPoint; - distance2d = projectedDifference.dot(projectedDifference); - if (distance2d > gContactBreakingThreshold*gContactBreakingThreshold ) - { - removeContactPoint(contacts,i); - } else - { - ////contact point processed callback - //if (gContactProcessedCallback) - // (*gContactProcessedCallback)(manifoldPoint,(void*)m_body0,(void*)m_body1); - } - } - } - - -} - -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h deleted file mode 100644 index a15fd0b2a9..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h +++ /dev/null @@ -1,62 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_CONTACT_CACHE_H -#define B3_CONTACT_CACHE_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3AlignedAllocator.h" - -///maximum contact breaking and merging threshold -extern b3Scalar gContactBreakingThreshold; - -#define MANIFOLD_CACHE_SIZE 4 - -///b3ContactCache is a contact point cache, it stays persistent as long as objects are overlapping in the broadphase. -///Those contact points are created by the collision narrow phase. -///The cache can be empty, or hold 1,2,3 or 4 points. Some collision algorithms (GJK) might only add one point at a time. -///updates/refreshes old contact points, and throw them away if necessary (distance becomes too large) -///reduces the cache to 4 points, when more then 4 points are added, using following rules: -///the contact point with deepest penetration is always kept, and it tries to maximuze the area covered by the points -///note that some pairs of objects might have more then one contact manifold. -B3_ATTRIBUTE_ALIGNED16(class) -b3ContactCache -{ - /// sort cached points so most isolated points come first - int sortCachedPoints(const b3Vector3& pt); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int addManifoldPoint(const b3Vector3& newPoint); - - /*void replaceContactPoint(const b3Vector3& newPoint,int insertIndex) - { - b3Assert(validContactDistance(newPoint)); - m_pointCache[insertIndex] = newPoint; - } - */ - - static bool validContactDistance(const b3Vector3& pt); - - /// calculated new worldspace coordinates and depth, and reject points that exceed the collision margin - static void refreshContactPoints(const b3Transform& trA, const b3Transform& trB, struct b3Contact4Data& newContactCache); - - static void removeContactPoint(struct b3Contact4Data & newContactCache, int i); -}; - -#endif //B3_CONTACT_CACHE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp deleted file mode 100644 index 54a104c5c8..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.cpp +++ /dev/null @@ -1,4408 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -bool findSeparatingAxisOnGpu = true; -bool splitSearchSepAxisConcave = false; -bool splitSearchSepAxisConvex = true; -bool useMprGpu = true; //use mpr for edge-edge (+contact point) or sat. Needs testing on main OpenCL platforms, before enabling... -bool bvhTraversalKernelGPU = true; -bool findConcaveSeparatingAxisKernelGPU = true; -bool clipConcaveFacesAndFindContactsCPU = false; //false;//true; -bool clipConvexFacesAndFindContactsCPU = false; //false;//true; -bool reduceConcaveContactsOnGPU = true; //false; -bool reduceConvexContactsOnGPU = true; //false; -bool findConvexClippingFacesGPU = true; -bool useGjk = false; ///option for CPU/host testing, when findSeparatingAxisOnGpu = false -bool useGjkContacts = false; //////option for CPU/host testing when findSeparatingAxisOnGpu = false - -static int myframecount = 0; ///for testing - -///This file was written by Erwin Coumans -///Separating axis rest based on work from Pierre Terdiman, see -///And contact clipping based on work from Simon Hobbs - -//#define B3_DEBUG_SAT_FACE - -//#define CHECK_ON_HOST - -#ifdef CHECK_ON_HOST -//#define PERSISTENT_CONTACTS_HOST -#endif - -int b3g_actualSATPairTests = 0; - -#include "b3ConvexHullContact.h" -#include <string.h> //memcpy -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" - -#include "Bullet3OpenCL/NarrowphaseCollision/b3ContactCache.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -typedef b3AlignedObjectArray<b3Vector3> b3VertexArray; - -#include <float.h> //for FLT_MAX -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -//#include "AdlQuaternion.h" - -#include "kernels/satKernels.h" -#include "kernels/mprKernels.h" - -#include "kernels/satConcaveKernels.h" - -#include "kernels/satClipHullContacts.h" -#include "kernels/bvhTraversal.h" -#include "kernels/primitiveContacts.h" - -#include "Bullet3Geometry/b3AabbUtil.h" - -#define BT_NARROWPHASE_SAT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl" -#define BT_NARROWPHASE_SAT_CONCAVE_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl" - -#define BT_NARROWPHASE_MPR_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl" - -#define BT_NARROWPHASE_CLIPHULL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl" -#define BT_NARROWPHASE_BVH_TRAVERSAL_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl" -#define BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH "src/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl" - -#ifndef __global -#define __global -#endif - -#ifndef __kernel -#define __kernel -#endif - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhTraversal.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3FindConcaveSatAxis.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ClipFaces.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3NewContactReduction.h" - -#define dot3F4 b3Dot - -GpuSatCollision::GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q) - : m_context(ctx), - m_device(device), - m_queue(q), - - m_findSeparatingAxisKernel(0), - m_findSeparatingAxisVertexFaceKernel(0), - m_findSeparatingAxisEdgeEdgeKernel(0), - m_unitSphereDirections(m_context, m_queue), - - m_totalContactsOut(m_context, m_queue), - m_sepNormals(m_context, m_queue), - m_dmins(m_context, m_queue), - - m_hasSeparatingNormals(m_context, m_queue), - m_concaveSepNormals(m_context, m_queue), - m_concaveHasSeparatingNormals(m_context, m_queue), - m_numConcavePairsOut(m_context, m_queue), - - m_gpuCompoundPairs(m_context, m_queue), - - m_gpuCompoundSepNormals(m_context, m_queue), - m_gpuHasCompoundSepNormals(m_context, m_queue), - - m_numCompoundPairsOut(m_context, m_queue) -{ - m_totalContactsOut.push_back(0); - - cl_int errNum = 0; - - if (1) - { - const char* mprSrc = mprKernelsCL; - - const char* srcConcave = satConcaveKernelsCL; - char flags[1024] = {0}; - //#ifdef CL_PLATFORM_INTEL - // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/sat.cl"); - //#endif - m_mprPenetrationKernel = 0; - m_findSeparatingAxisUnitSphereKernel = 0; - - if (useMprGpu) - { - cl_program mprProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, mprSrc, &errNum, flags, BT_NARROWPHASE_MPR_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_mprPenetrationKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "mprPenetrationKernel", &errNum, mprProg); - b3Assert(m_mprPenetrationKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisUnitSphereKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, mprSrc, "findSeparatingAxisUnitSphereKernel", &errNum, mprProg); - b3Assert(m_findSeparatingAxisUnitSphereKernel); - b3Assert(errNum == CL_SUCCESS); - - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - m_unitSphereDirections.resize(numDirections); - m_unitSphereDirections.copyFromHostPointer(unitSphere162, numDirections, 0, true); - } - - cl_program satProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, satKernelsCL, &errNum, flags, BT_NARROWPHASE_SAT_PATH); - b3Assert(errNum == CL_SUCCESS); - - cl_program satConcaveProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcConcave, &errNum, flags, BT_NARROWPHASE_SAT_CONCAVE_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisKernel", &errNum, satProg); - b3Assert(m_findSeparatingAxisKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisVertexFaceKernel", &errNum, satProg); - b3Assert(m_findSeparatingAxisVertexFaceKernel); - - m_findSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findSeparatingAxisEdgeEdgeKernel", &errNum, satProg); - b3Assert(m_findSeparatingAxisVertexFaceKernel); - - m_findConcaveSeparatingAxisKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findConcaveSeparatingAxisKernel", &errNum, satProg); - b3Assert(m_findConcaveSeparatingAxisKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findConcaveSeparatingAxisVertexFaceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisVertexFaceKernel", &errNum, satConcaveProg); - b3Assert(m_findConcaveSeparatingAxisVertexFaceKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findConcaveSeparatingAxisEdgeEdgeKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcConcave, "findConcaveSeparatingAxisEdgeEdgeKernel", &errNum, satConcaveProg); - b3Assert(m_findConcaveSeparatingAxisEdgeEdgeKernel); - b3Assert(errNum == CL_SUCCESS); - - m_findCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "findCompoundPairsKernel", &errNum, satProg); - b3Assert(m_findCompoundPairsKernel); - b3Assert(errNum == CL_SUCCESS); - m_processCompoundPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, satKernelsCL, "processCompoundPairsKernel", &errNum, satProg); - b3Assert(m_processCompoundPairsKernel); - b3Assert(errNum == CL_SUCCESS); - } - - if (1) - { - const char* srcClip = satClipKernelsCL; - - char flags[1024] = {0}; - //#ifdef CL_PLATFORM_INTEL - // sprintf(flags,"-g -s \"%s\"","C:/develop/bullet3_experiments2/opencl/gpu_narrowphase/kernels/satClipHullContacts.cl"); - //#endif - - cl_program satClipContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcClip, &errNum, flags, BT_NARROWPHASE_CLIPHULL_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_clipHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_clipCompoundsHullHullKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipCompoundsHullHullKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_findClippingFacesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "findClippingFacesKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_clipFacesAndFindContacts = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipFacesAndFindContactsKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - m_clipHullHullConcaveConvexKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, "clipHullHullConcaveConvexKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - - // m_extractManifoldAndAddContactKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,srcClip, "extractManifoldAndAddContactKernel",&errNum,satClipContactsProg); - // b3Assert(errNum==CL_SUCCESS); - - m_newContactReductionKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcClip, - "newContactReductionKernel", &errNum, satClipContactsProg); - b3Assert(errNum == CL_SUCCESS); - } - else - { - m_clipHullHullKernel = 0; - m_clipCompoundsHullHullKernel = 0; - m_findClippingFacesKernel = 0; - m_newContactReductionKernel = 0; - m_clipFacesAndFindContacts = 0; - m_clipHullHullConcaveConvexKernel = 0; - // m_extractManifoldAndAddContactKernel = 0; - } - - if (1) - { - const char* srcBvh = bvhTraversalKernelCL; - cl_program bvhTraversalProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, srcBvh, &errNum, "", BT_NARROWPHASE_BVH_TRAVERSAL_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_bvhTraversalKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, srcBvh, "bvhTraversalKernel", &errNum, bvhTraversalProg, ""); - b3Assert(errNum == CL_SUCCESS); - } - - { - const char* primitiveContactsSrc = primitiveContactsKernelsCL; - cl_program primitiveContactsProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, primitiveContactsSrc, &errNum, "", BT_NARROWPHASE_PRIMITIVE_CONTACT_PATH); - b3Assert(errNum == CL_SUCCESS); - - m_primitiveContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "primitiveContactsKernel", &errNum, primitiveContactsProg, ""); - b3Assert(errNum == CL_SUCCESS); - - m_findConcaveSphereContactsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "findConcaveSphereContactsKernel", &errNum, primitiveContactsProg); - b3Assert(errNum == CL_SUCCESS); - b3Assert(m_findConcaveSphereContactsKernel); - - m_processCompoundPairsPrimitivesKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, primitiveContactsSrc, "processCompoundPairsPrimitivesKernel", &errNum, primitiveContactsProg, ""); - b3Assert(errNum == CL_SUCCESS); - b3Assert(m_processCompoundPairsPrimitivesKernel); - } -} - -GpuSatCollision::~GpuSatCollision() -{ - if (m_findSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findSeparatingAxisVertexFaceKernel); - - if (m_findSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findSeparatingAxisEdgeEdgeKernel); - - if (m_findSeparatingAxisUnitSphereKernel) - clReleaseKernel(m_findSeparatingAxisUnitSphereKernel); - - if (m_mprPenetrationKernel) - clReleaseKernel(m_mprPenetrationKernel); - - if (m_findSeparatingAxisKernel) - clReleaseKernel(m_findSeparatingAxisKernel); - - if (m_findConcaveSeparatingAxisVertexFaceKernel) - clReleaseKernel(m_findConcaveSeparatingAxisVertexFaceKernel); - - if (m_findConcaveSeparatingAxisEdgeEdgeKernel) - clReleaseKernel(m_findConcaveSeparatingAxisEdgeEdgeKernel); - - if (m_findConcaveSeparatingAxisKernel) - clReleaseKernel(m_findConcaveSeparatingAxisKernel); - - if (m_findCompoundPairsKernel) - clReleaseKernel(m_findCompoundPairsKernel); - - if (m_processCompoundPairsKernel) - clReleaseKernel(m_processCompoundPairsKernel); - - if (m_findClippingFacesKernel) - clReleaseKernel(m_findClippingFacesKernel); - - if (m_clipFacesAndFindContacts) - clReleaseKernel(m_clipFacesAndFindContacts); - if (m_newContactReductionKernel) - clReleaseKernel(m_newContactReductionKernel); - if (m_primitiveContactsKernel) - clReleaseKernel(m_primitiveContactsKernel); - - if (m_findConcaveSphereContactsKernel) - clReleaseKernel(m_findConcaveSphereContactsKernel); - - if (m_processCompoundPairsPrimitivesKernel) - clReleaseKernel(m_processCompoundPairsPrimitivesKernel); - - if (m_clipHullHullKernel) - clReleaseKernel(m_clipHullHullKernel); - if (m_clipCompoundsHullHullKernel) - clReleaseKernel(m_clipCompoundsHullHullKernel); - - if (m_clipHullHullConcaveConvexKernel) - clReleaseKernel(m_clipHullHullConcaveConvexKernel); - // if (m_extractManifoldAndAddContactKernel) - // clReleaseKernel(m_extractManifoldAndAddContactKernel); - - if (m_bvhTraversalKernel) - clReleaseKernel(m_bvhTraversalKernel); -} - -struct MyTriangleCallback : public b3NodeOverlapCallback -{ - int m_bodyIndexA; - int m_bodyIndexB; - - virtual void processNode(int subPart, int triangleIndex) - { - printf("bodyIndexA %d, bodyIndexB %d\n", m_bodyIndexA, m_bodyIndexB); - printf("triangleIndex %d\n", triangleIndex); - } -}; - -#define float4 b3Vector3 -#define make_float4(x, y, z, w) b3MakeVector3(x, y, z, w) - -float signedDistanceFromPointToPlane(const float4& point, const float4& planeEqn, float4* closestPointOnFace) -{ - float4 n = planeEqn; - n[3] = 0.f; - float dist = dot3F4(n, point) + planeEqn[3]; - *closestPointOnFace = point - dist * n; - return dist; -} - -#define cross3(a, b) (a.cross(b)) -b3Vector3 transform(const b3Vector3* v, const b3Vector3* pos, const b3Quaternion* orn) -{ - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(*pos); - tr.setRotation(*orn); - b3Vector3 res = tr(*v); - return res; -} - -inline bool IsPointInPolygon(const float4& p, - const b3GpuFace* face, - const float4* baseVertex, - const int* convexIndices, - float4* out) -{ - float4 a; - float4 b; - float4 ab; - float4 ap; - float4 v; - - float4 plane = b3MakeVector3(face->m_plane.x, face->m_plane.y, face->m_plane.z, 0.f); - - if (face->m_numIndices < 2) - return false; - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices - 1]]; - b = v0; - - for (unsigned i = 0; i != face->m_numIndices; ++i) - { - a = b; - float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; - b = vi; - ab = b - a; - ap = p - a; - v = cross3(ab, plane); - - if (b3Dot(ap, v) > 0.f) - { - float ab_m2 = b3Dot(ab, ab); - float rt = ab_m2 != 0.f ? b3Dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; - out[0].x = s * a.x + rt * b.x; - out[0].y = s * a.y + rt * b.y; - out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; -} - -#define normalize3(a) (a.normalize()) - -int extractManifoldSequentialGlobal(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if (nPoints == 0) - return 0; - - if (nPoints <= 4) - return nPoints; - - if (nPoints > 64) - nPoints = 64; - - float4 center = b3MakeVector3(0, 0, 0, 0); - { - for (int i = 0; i < nPoints; i++) - center += p[i]; - center /= (float)nPoints; - } - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3(nearNormal, aVector); - float4 v = cross3(nearNormal, u); - u = normalize3(u); - v = normalize3(v); - - //keep point with deepest penetration - float minW = FLT_MAX; - - int minIndex = -1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for (int ie = 0; ie < nPoints; ie++) - { - if (p[ie].w < minW) - { - minW = p[ie].w; - minIndex = ie; - } - float f; - float4 r = p[ie] - center; - f = dot3F4(u, r); - if (f < maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4(-u, r); - if (f < maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - f = dot3F4(v, r); - if (f < maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4(-v, r); - if (f < maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; -} - -#define MAX_VERTS 1024 - -inline void project(const b3ConvexPolyhedronData& hull, const float4& pos, const b3Quaternion& orn, const float4& dir, const b3AlignedObjectArray<b3Vector3>& vertices, b3Scalar& min, b3Scalar& max) -{ - min = FLT_MAX; - max = -FLT_MAX; - int numVerts = hull.m_numVertices; - - const float4 localDir = b3QuatRotate(orn.inverse(), dir); - - b3Scalar offset = dot3F4(pos, dir); - - for (int i = 0; i < numVerts; i++) - { - //b3Vector3 pt = trans * vertices[m_vertexOffset+i]; - //b3Scalar dp = pt.dot(dir); - //b3Vector3 vertex = vertices[hull.m_vertexOffset+i]; - b3Scalar dp = dot3F4((float4&)vertices[hull.m_vertexOffset + i], localDir); - //b3Assert(dp==dpL); - if (dp < min) min = dp; - if (dp > max) max = dp; - } - if (min > max) - { - b3Scalar tmp = min; - min = max; - max = tmp; - } - min += offset; - max += offset; -} - -static bool TestSepAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA, const b3Quaternion& ornA, - const float4& posB, const b3Quaternion& ornB, - const float4& sep_axis, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB, b3Scalar& depth) -{ - b3Scalar Min0, Max0; - b3Scalar Min1, Max1; - project(hullA, posA, ornA, sep_axis, verticesA, Min0, Max0); - project(hullB, posB, ornB, sep_axis, verticesB, Min1, Max1); - - if (Max0 < Min1 || Max1 < Min0) - return false; - - b3Scalar d0 = Max0 - Min1; - assert(d0 >= 0.0f); - b3Scalar d1 = Max1 - Min0; - assert(d1 >= 0.0f); - depth = d0 < d1 ? d0 : d1; - return true; -} - -inline bool IsAlmostZero(const b3Vector3& v) -{ - if (fabsf(v.x) > 1e-6 || fabsf(v.y) > 1e-6 || fabsf(v.z) > 1e-6) return false; - return true; -} - -static bool findSeparatingAxis(const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA1, - const b3Quaternion& ornA, - const float4& posB1, - const b3Quaternion& ornB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, - const b3AlignedObjectArray<b3GpuFace>& facesA, - const b3AlignedObjectArray<int>& indicesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, - const b3AlignedObjectArray<b3GpuFace>& facesB, - const b3AlignedObjectArray<int>& indicesB, - - b3Vector3& sep) -{ - B3_PROFILE("findSeparatingAxis"); - - b3g_actualSATPairTests++; - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - //#ifdef TEST_INTERNAL_OBJECTS - float4 c0local = (float4&)hullA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = (float4&)hullB.m_localCenter; - float4 c1 = transform(&c1local, &posB, &ornB); - const float4 deltaC2 = c0 - c1; - //#endif - - b3Scalar dmin = FLT_MAX; - int curPlaneTests = 0; - - int numFacesA = hullA.m_numFaces; - // Test normals from hullA - for (int i = 0; i < numFacesA; i++) - { - const float4& normal = (float4&)facesA[hullA.m_faceOffset + i].m_plane; - float4 faceANormalWS = b3QuatRotate(ornA, normal); - - if (dot3F4(deltaC2, faceANormalWS) < 0) - faceANormalWS *= -1.f; - - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, faceANormalWS, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, faceANormalWS, verticesA, verticesB, d)) - return false; - - if (d < dmin) - { - dmin = d; - sep = (b3Vector3&)faceANormalWS; - } - } - - int numFacesB = hullB.m_numFaces; - // Test normals from hullB - for (int i = 0; i < numFacesB; i++) - { - float4 normal = (float4&)facesB[hullB.m_faceOffset + i].m_plane; - float4 WorldNormal = b3QuatRotate(ornB, normal); - - if (dot3F4(deltaC2, WorldNormal) < 0) - { - WorldNormal *= -1.f; - } - curPlaneTests++; -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, WorldNormal, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar d; - if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, WorldNormal, verticesA, verticesB, d)) - return false; - - if (d < dmin) - { - dmin = d; - sep = (b3Vector3&)WorldNormal; - } - } - - int curEdgeEdge = 0; - // Test edges - for (int e0 = 0; e0 < hullA.m_numUniqueEdges; e0++) - { - const float4& edge0 = (float4&)uniqueEdgesA[hullA.m_uniqueEdgesOffset + e0]; - float4 edge0World = b3QuatRotate(ornA, (float4&)edge0); - - for (int e1 = 0; e1 < hullB.m_numUniqueEdges; e1++) - { - const b3Vector3 edge1 = uniqueEdgesB[hullB.m_uniqueEdgesOffset + e1]; - float4 edge1World = b3QuatRotate(ornB, (float4&)edge1); - - float4 crossje = cross3(edge0World, edge1World); - - curEdgeEdge++; - if (!IsAlmostZero((b3Vector3&)crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(deltaC2, crossje) < 0) - crossje *= -1.f; - -#ifdef TEST_INTERNAL_OBJECTS - gExpectedNbTests++; - if (gUseInternalObject && !TestInternalObjects(transA, transB, DeltaC2, Cross, hullA, hullB, dmin)) - continue; - gActualNbTests++; -#endif - - b3Scalar dist; - if (!TestSepAxis(hullA, hullB, posA, ornA, posB, ornB, crossje, verticesA, verticesB, dist)) - return false; - - if (dist < dmin) - { - dmin = dist; - sep = (b3Vector3&)crossje; - } - } - } - } - - if ((dot3F4(-deltaC2, (float4&)sep)) > 0.0f) - sep = -sep; - - return true; -} - -bool findSeparatingAxisEdgeEdge(__global const b3ConvexPolyhedronData* hullA, __global const b3ConvexPolyhedronData* hullB, - const b3Float4& posA1, - const b3Quat& ornA, - const b3Float4& posB1, - const b3Quat& ornB, - const b3Float4& DeltaC2, - __global const b3AlignedObjectArray<float4>& vertices, - __global const b3AlignedObjectArray<float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - float4* sep, - float* dmin) -{ - // int i = get_global_id(0); - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - //int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for (int e0 = 0; e0 < hullA->m_numUniqueEdges; e0++) - { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset + e0]; - float4 edge0World = b3QuatRotate(ornA, edge0); - - for (int e1 = 0; e1 < hullB->m_numUniqueEdges; e1++) - { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset + e1]; - float4 edge1World = b3QuatRotate(ornB, edge1); - - float4 crossje = cross3(edge0World, edge1World); - - curEdgeEdge++; - if (!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2, crossje) < 0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0, Max0; - float Min1, Max1; - project(*hullA, posA, ornA, crossje, vertices, Min0, Max0); - project(*hullB, posB, ornB, crossje, vertices, Min1, Max1); - - if (Max0 < Min1 || Max1 < Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0 < d1 ? d0 : d1; - result = true; - } - - if (dist < *dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - } - - if ((dot3F4(-DeltaC2, *sep)) > 0.0f) - { - *sep = -(*sep); - } - return true; -} - -__inline float4 lerp3(const float4& a, const float4& b, float t) -{ - return b3MakeVector3(a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4& planeNormalWS, float planeEqWS, float4* ppVtxOut) -{ - int ve; - float ds, de; - int numVertsOut = 0; - if (numVertsIn < 2) - return 0; - - float4 firstVertex = pVtxIn[numVertsIn - 1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS, firstVertex) + planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex = pVtxIn[ve]; - - de = dot3F4(planeNormalWS, endVertex) + planeEqWS; - - if (ds < 0) - { - if (de < 0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - } - } - else - { - if (de < 0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex, (ds * 1.f / (ds - de))); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - -int clipFaceAgainstHull(const float4& separatingNormal, const b3ConvexPolyhedronData* hullA, - const float4& posA, const b3Quaternion& ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, - //const float4* verticesB, const b3GpuFace* facesB, const int* indicesB, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA = -1; - { - float dmin = FLT_MAX; - for (int face = 0; face < hullA->m_numFaces; face++) - { - const float4 Normal = b3MakeVector3( - facesA[hullA->m_faceOffset + face].m_plane.x, - facesA[hullA->m_faceOffset + face].m_plane.y, - facesA[hullA->m_faceOffset + face].m_plane.z, 0.f); - const float4 faceANormalWS = b3QuatRotate(ornA, Normal); - - float d = dot3F4(faceANormalWS, separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA < 0) - return numContactsOut; - - b3GpuFace polyA = facesA[hullA->m_faceOffset + closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - // int numContacts = numWorldVertsB1; - int numVerticesA = polyA.m_numIndices; - for (int e0 = 0; e0 < numVerticesA; e0++) - { - const float4 a = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + e0]]; - const float4 b = verticesA[hullA->m_vertexOffset + indicesA[polyA.m_indexOffset + ((e0 + 1) % numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = b3QuatRotate(ornA, edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); - float4 worldPlaneAnormal1 = b3QuatRotate(ornA, planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0, worldPlaneAnormal1); - float4 worldA1 = transform(&a, &posA, &ornA); - float planeEqWS1 = -dot3F4(worldA1, planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS = planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS, planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x, polyA.m_plane.y, polyA.m_plane.z, 0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = b3QuatRotate(ornA, localPlaneNormal); - float planeEqWS = localPlaneEq - dot3F4(planeNormalWS, posA); - for (int i = 0; i < numVertsIn; i++) - { - float depth = dot3F4(planeNormalWS, pVtxIn[i]) + planeEqWS; - if (depth <= minDist) - { - depth = minDist; - } - if (numContactsOut < contactCapacity) - { - if (depth <= maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = b3MakeVector3(pointInWorld.x, pointInWorld.y, pointInWorld.z, depth); - //printf("depth=%f\n",depth); - } - } - else - { - b3Error("exceeding contact capacity (%d,%df)\n", numContactsOut, contactCapacity); - } - } - } - - return numContactsOut; -} - -static int clipHullAgainstHull(const float4& separatingNormal, - const b3ConvexPolyhedronData& hullA, const b3ConvexPolyhedronData& hullB, - const float4& posA, const b3Quaternion& ornA, const float4& posB, const b3Quaternion& ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const b3AlignedObjectArray<float4>& verticesA, const b3AlignedObjectArray<b3GpuFace>& facesA, const b3AlignedObjectArray<int>& indicesA, - const b3AlignedObjectArray<float4>& verticesB, const b3AlignedObjectArray<b3GpuFace>& facesB, const b3AlignedObjectArray<int>& indicesB, - - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1 = 0; - - B3_PROFILE("clipHullAgainstHull"); - - // float curMaxDist=maxDist; - int closestFaceB = -1; - float dmax = -FLT_MAX; - - { - //B3_PROFILE("closestFaceB"); - if (hullB.m_numFaces != 1) - { - //printf("wtf\n"); - } - static bool once = true; - //printf("separatingNormal=%f,%f,%f\n",separatingNormal.x,separatingNormal.y,separatingNormal.z); - - for (int face = 0; face < hullB.m_numFaces; face++) - { -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("face %d\n", face); - const b3GpuFace* faceB = &facesB[hullB.m_faceOffset + face]; - if (once) - { - for (int i = 0; i < faceB->m_numIndices; i++) - { - float4 vert = verticesB[hullB.m_vertexOffset + indicesB[faceB->m_indexOffset + i]]; - printf("vert[%d] = %f,%f,%f\n", i, vert.x, vert.y, vert.z); - } - } -#endif //BT_DEBUG_SAT_FACE \ - //if (facesB[hullB.m_faceOffset+face].m_numIndices>2) - { - const float4 Normal = b3MakeVector3(facesB[hullB.m_faceOffset + face].m_plane.x, - facesB[hullB.m_faceOffset + face].m_plane.y, facesB[hullB.m_faceOffset + face].m_plane.z, 0.f); - const float4 WorldNormal = b3QuatRotate(ornB, Normal); -#ifdef BT_DEBUG_SAT_FACE - if (once) - printf("faceNormal = %f,%f,%f\n", Normal.x, Normal.y, Normal.z); -#endif - float d = dot3F4(WorldNormal, separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - once = false; - } - - b3Assert(closestFaceB >= 0); - { - //B3_PROFILE("worldVertsB1"); - const b3GpuFace& polyB = facesB[hullB.m_faceOffset + closestFaceB]; - const int numVertices = polyB.m_numIndices; - for (int e0 = 0; e0 < numVertices; e0++) - { - const float4& b = verticesB[hullB.m_vertexOffset + indicesB[polyB.m_indexOffset + e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b, &posB, &ornB); - } - } - - if (closestFaceB >= 0) - { - //B3_PROFILE("clipFaceAgainstHull"); - numContactsOut = clipFaceAgainstHull((float4&)separatingNormal, &hullA, - posA, ornA, - worldVertsB1, numWorldVertsB1, worldVertsB2, capacityWorldVerts, minDist, maxDist, - verticesA, facesA, indicesA, - contactsOut, contactCapacity); - } - - return numContactsOut; -} - -#define PARALLEL_SUM(v, n) \ - for (int j = 1; j < n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) \ - for (int ie = 0; ie < n; ie++) \ - { \ - execution; \ - } -#define REDUCE_MAX(v, n) \ - { \ - int i = 0; \ - for (int offset = 0; offset < n; offset++) v[i] = (v[i].y > v[i + offset].y) ? v[i] : v[i + offset]; \ - } -#define REDUCE_MIN(v, n) \ - { \ - int i = 0; \ - for (int offset = 0; offset < n; offset++) v[i] = (v[i].y < v[i + offset].y) ? v[i] : v[i + offset]; \ - } - -int extractManifold(const float4* p, int nPoints, const float4& nearNormal, b3Int4* contactIdx) -{ - if (nPoints == 0) - return 0; - - if (nPoints <= 4) - return nPoints; - - if (nPoints > 64) - nPoints = 64; - - float4 center = make_float4(0, 0, 0, 0); - { - for (int i = 0; i < nPoints; i++) - center += p[i]; - center /= (float)nPoints; - } - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3(nearNormal, aVector); - float4 v = cross3(nearNormal, u); - u = normalize3(u); - v = normalize3(v); - - //keep point with deepest penetration - float minW = FLT_MAX; - - int minIndex = -1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for (int ie = 0; ie < nPoints; ie++) - { - if (p[ie].w < minW) - { - minW = p[ie].w; - minIndex = ie; - } - float f; - float4 r = p[ie] - center; - f = dot3F4(u, r); - if (f < maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4(-u, r); - if (f < maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - f = dot3F4(v, r); - if (f < maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4(-v, r); - if (f < maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; -} - -int clipHullHullSingle( - int bodyIndexA, int bodyIndexB, - const float4& posA, - const b3Quaternion& ornA, - const float4& posB, - const b3Quaternion& ornB, - - int collidableIndexA, int collidableIndexB, - - const b3AlignedObjectArray<b3RigidBodyData>* bodyBuf, - b3AlignedObjectArray<b3Contact4>* globalContactOut, - int& nContacts, - - const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataA, - const b3AlignedObjectArray<b3ConvexPolyhedronData>& hostConvexDataB, - - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesA, - const b3AlignedObjectArray<b3GpuFace>& facesA, - const b3AlignedObjectArray<int>& indicesA, - - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3AlignedObjectArray<b3Vector3>& uniqueEdgesB, - const b3AlignedObjectArray<b3GpuFace>& facesB, - const b3AlignedObjectArray<int>& indicesB, - - const b3AlignedObjectArray<b3Collidable>& hostCollidablesA, - const b3AlignedObjectArray<b3Collidable>& hostCollidablesB, - const b3Vector3& sepNormalWorldSpace, - int maxContactCapacity) -{ - int contactIndex = -1; - b3ConvexPolyhedronData hullA, hullB; - - b3Collidable colA = hostCollidablesA[collidableIndexA]; - hullA = hostConvexDataA[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - b3Collidable colB = hostCollidablesB[collidableIndexB]; - hullB = hostConvexDataB[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - float4 contactsOut[MAX_VERTS]; - int localContactCapacity = MAX_VERTS; - -#ifdef _WIN32 - b3Assert(_finite(bodyBuf->at(bodyIndexA).m_pos.x)); - b3Assert(_finite(bodyBuf->at(bodyIndexB).m_pos.x)); -#endif - - { - float4 worldVertsB1[MAX_VERTS]; - float4 worldVertsB2[MAX_VERTS]; - int capacityWorldVerts = MAX_VERTS; - - float4 hostNormal = make_float4(sepNormalWorldSpace.x, sepNormalWorldSpace.y, sepNormalWorldSpace.z, 0.f); - int shapeA = hostCollidablesA[collidableIndexA].m_shapeIndex; - int shapeB = hostCollidablesB[collidableIndexB].m_shapeIndex; - - b3Scalar minDist = -1; - b3Scalar maxDist = 0.; - - b3Transform trA, trB; - { - //B3_PROFILE("transform computation"); - //trA.setIdentity(); - trA.setOrigin(b3MakeVector3(posA.x, posA.y, posA.z)); - trA.setRotation(b3Quaternion(ornA.x, ornA.y, ornA.z, ornA.w)); - - //trB.setIdentity(); - trB.setOrigin(b3MakeVector3(posB.x, posB.y, posB.z)); - trB.setRotation(b3Quaternion(ornB.x, ornB.y, ornB.z, ornB.w)); - } - - b3Quaternion trAorn = trA.getRotation(); - b3Quaternion trBorn = trB.getRotation(); - - int numContactsOut = clipHullAgainstHull(hostNormal, - hostConvexDataA.at(shapeA), - hostConvexDataB.at(shapeB), - (float4&)trA.getOrigin(), (b3Quaternion&)trAorn, - (float4&)trB.getOrigin(), (b3Quaternion&)trBorn, - worldVertsB1, worldVertsB2, capacityWorldVerts, - minDist, maxDist, - verticesA, facesA, indicesA, - verticesB, facesB, indicesB, - - contactsOut, localContactCapacity); - - if (numContactsOut > 0) - { - B3_PROFILE("overlap"); - - float4 normalOnSurfaceB = (float4&)hostNormal; - - b3Int4 contactIdx; - contactIdx.x = 0; - contactIdx.y = 1; - contactIdx.z = 2; - contactIdx.w = 3; - - int numPoints = 0; - - { - // B3_PROFILE("extractManifold"); - numPoints = extractManifold(contactsOut, numContactsOut, normalOnSurfaceB, &contactIdx); - } - - b3Assert(numPoints); - - if (nContacts < maxContactCapacity) - { - contactIndex = nContacts; - globalContactOut->expand(); - b3Contact4& contact = globalContactOut->at(nContacts); - contact.m_batchIdx = 0; //i; - contact.m_bodyAPtrAndSignBit = (bodyBuf->at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; - contact.m_bodyBPtrAndSignBit = (bodyBuf->at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; - - contact.m_frictionCoeffCmp = 45874; - contact.m_restituitionCoeffCmp = 0; - - // float distance = 0.f; - for (int p = 0; p < numPoints; p++) - { - contact.m_worldPosB[p] = contactsOut[contactIdx.s[p]]; //check if it is actually on B - contact.m_worldNormalOnB = normalOnSurfaceB; - } - //printf("bodyIndexA %d,bodyIndexB %d,normal=%f,%f,%f numPoints %d\n",bodyIndexA,bodyIndexB,normalOnSurfaceB.x,normalOnSurfaceB.y,normalOnSurfaceB.z,numPoints); - contact.m_worldNormalOnB.w = (b3Scalar)numPoints; - nContacts++; - } - else - { - b3Error("Error: exceeding contact capacity (%d/%d)\n", nContacts, maxContactCapacity); - } - } - } - return contactIndex; -} - -void computeContactPlaneConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndex]; - - b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - - // int numContactsOut = 0; - // int numWorldVertsB1= 0; - - b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal); - float planeConstant = planeEq.w; - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(posB); - convexWorldTransform.setRotation(ornB); - b3Transform planeTransform; - planeTransform.setIdentity(); - planeTransform.setOrigin(posA); - planeTransform.setRotation(ornA); - - b3Transform planeInConvex; - planeInConvex = convexWorldTransform.inverse() * planeTransform; - b3Transform convexInPlane; - convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal; - float maxDot = -1e30; - int hitVertex = -1; - b3Vector3 hitVtx; - -#define MAX_PLANE_CONVEX_POINTS 64 - - b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - b3Int4 contactIdx; - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - for (int i = 0; i < hullB->m_numVertices; i++) - { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i]; - float curDot = vtx.dot(planeNormalInConvex); - - if (curDot > maxDot) - { - hitVertex = i; - maxDot = curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints == MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints < MAX_PLANE_CONVEX_POINTS) - { - b3Vector3 vtxWorld = convexWorldTransform * vtx; - b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; - float dist = planeNormal.dot(vtxInPlane) - planeConstant; - if (dist < 0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - } - - int numReducedPoints = 0; - - numReducedPoints = numPoints; - - if (numPoints > 4) - { - numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints > 0) - { - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - for (int i = 0; i < numReducedPoints; i++) - { - b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; - c->m_worldPosB[i] = pOnB1; - } - c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - } //if (dstIdx < numPairs) - } - - // printf("computeContactPlaneConvex\n"); -} - -B3_FORCE_INLINE b3Vector3 MyUnQuantize(const unsigned short* vecIn, const b3Vector3& quantization, const b3Vector3& bvhAabbMin) -{ - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (quantization.x), - (b3Scalar)(vecIn[1]) / (quantization.y), - (b3Scalar)(vecIn[2]) / (quantization.z)); - vecOut += bvhAabbMin; - return vecOut; -} - -void traverseTreeTree() -{ -} - -#include "Bullet3Common/shared/b3Mat3x3.h" - -int numAabbChecks = 0; -int maxNumAabbChecks = 0; -int maxDepth = 0; - -// work-in-progress -__kernel void findCompoundPairsKernel( - int pairIndex, - int bodyIndexA, - int bodyIndexB, - int collidableIndexA, - int collidableIndexB, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Aabb>& aabbsWorldSpace, - __global const b3AlignedObjectArray<b3Aabb>& aabbsLocalSpace, - __global const b3GpuChildShape* gpuChildShapes, - __global b3Int4* gpuCompoundPairsOut, - __global int* numCompoundPairsOut, - int maxNumCompoundPairsCapacity, - b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, - b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, - b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU) -{ - numAabbChecks = 0; - maxNumAabbChecks = 0; - // int i = pairIndex; - { - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass == 0) && (rigidBodies[bodyIndexB].m_invMass == 0)) - { - return; - } - - if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; - int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; - int numSubTreesA = bvhInfoCPU[bvhA].m_numSubTrees; - int subTreesOffsetA = bvhInfoCPU[bvhA].m_subTreeOffset; - int subTreesOffsetB = bvhInfoCPU[bvhB].m_subTreeOffset; - - int numSubTreesB = bvhInfoCPU[bvhB].m_numSubTrees; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - - b3Transform transA; - transA.setIdentity(); - transA.setOrigin(posA); - transA.setRotation(ornA); - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - b3Transform transB; - transB.setIdentity(); - transB.setOrigin(posB); - transB.setRotation(ornB); - - for (int p = 0; p < numSubTreesA; p++) - { - b3BvhSubtreeInfo subtreeA = subTreesCPU[subTreesOffsetA + p]; - //bvhInfoCPU[bvhA].m_quantization - b3Vector3 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - - b3Vector3 aabbAMinOut, aabbAMaxOut; - float margin = 0.f; - b3TransformAabb2(treeAminLocal, treeAmaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - - for (int q = 0; q < numSubTreesB; q++) - { - b3BvhSubtreeInfo subtreeB = subTreesCPU[subTreesOffsetB + q]; - - b3Vector3 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - - b3Vector3 aabbBMinOut, aabbBMaxOut; - float margin = 0.f; - b3TransformAabb2(treeBminLocal, treeBmaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks = 0; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); - if (aabbOverlap) - { - int startNodeIndexA = subtreeA.m_rootNodeIndex + bvhInfoCPU[bvhA].m_nodeOffset; - // int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; - - int startNodeIndexB = subtreeB.m_rootNodeIndex + bvhInfoCPU[bvhB].m_nodeOffset; - // int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; - - b3AlignedObjectArray<b3Int2> nodeStack; - b3Int2 node0; - node0.x = startNodeIndexA; - node0.y = startNodeIndexB; - - int maxStackDepth = 1024; - nodeStack.resize(maxStackDepth); - int depth = 0; - nodeStack[depth++] = node0; - - do - { - if (depth > maxDepth) - { - maxDepth = depth; - printf("maxDepth=%d\n", maxDepth); - } - b3Int2 node = nodeStack[--depth]; - - b3Vector3 aMinLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMin, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - b3Vector3 aMaxLocal = MyUnQuantize(treeNodesCPU[node.x].m_quantizedAabbMax, bvhInfoCPU[bvhA].m_quantization, bvhInfoCPU[bvhA].m_aabbMin); - - b3Vector3 bMinLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMin, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - b3Vector3 bMaxLocal = MyUnQuantize(treeNodesCPU[node.y].m_quantizedAabbMax, bvhInfoCPU[bvhB].m_quantization, bvhInfoCPU[bvhB].m_aabbMin); - - float margin = 0.f; - b3Vector3 aabbAMinOut, aabbAMaxOut; - b3TransformAabb2(aMinLocal, aMaxLocal, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - - b3Vector3 aabbBMinOut, aabbBMaxOut; - b3TransformAabb2(bMinLocal, bMaxLocal, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks++; - bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); - if (nodeOverlap) - { - bool isLeafA = treeNodesCPU[node.x].isLeafNode(); - bool isLeafB = treeNodesCPU[node.y].isLeafNode(); - bool isInternalA = !isLeafA; - bool isInternalB = !isLeafB; - - //fail, even though it might hit two leaf nodes - if (depth + 4 > maxStackDepth && !(isLeafA && isLeafB)) - { - b3Error("Error: traversal exceeded maxStackDepth\n"); - continue; - } - - if (isInternalA) - { - int nodeAleftChild = node.x + 1; - bool isNodeALeftChildLeaf = treeNodesCPU[node.x + 1].isLeafNode(); - int nodeArightChild = isNodeALeftChildLeaf ? node.x + 2 : node.x + 1 + treeNodesCPU[node.x + 1].getEscapeIndex(); - - if (isInternalB) - { - int nodeBleftChild = node.y + 1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); - - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); - } - else - { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, node.y); - } - } - else - { - if (isInternalB) - { - int nodeBleftChild = node.y + 1; - bool isNodeBLeftChildLeaf = treeNodesCPU[node.y + 1].isLeafNode(); - int nodeBrightChild = isNodeBLeftChildLeaf ? node.y + 2 : node.y + 1 + treeNodesCPU[node.y + 1].getEscapeIndex(); - nodeStack[depth++] = b3MakeInt2(node.x, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x, nodeBrightChild); - } - else - { - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - int childShapeIndexA = treeNodesCPU[node.x].getTriangleIndex(); - int childShapeIndexB = treeNodesCPU[node.y].getTriangleIndex(); - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB); - } - } - } - } - } while (depth); - maxNumAabbChecks = b3Max(numAabbChecks, maxNumAabbChecks); - } - } - } - - return; - } - - if ((collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) || (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - if (collidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenA = collidables[collidableIndexA].m_numChildShapes; - for (int c = 0; c < numChildrenA; c++) - { - int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex + c; - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; - b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - - b3Aabb aabbA = aabbsLocalSpace[childColIndexA]; - - b3Transform transA; - transA.setIdentity(); - transA.setOrigin(newPosA); - transA.setRotation(newOrnA); - b3Scalar margin = 0.0f; - - b3Vector3 aabbAMinOut, aabbAMaxOut; - - b3TransformAabb2((const b3Float4&)aabbA.m_min, (const b3Float4&)aabbA.m_max, margin, transA.getOrigin(), transA.getRotation(), &aabbAMinOut, &aabbAMaxOut); - - if (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b = 0; b < numChildrenB; b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB, &posB, &ornB); - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - - b3Aabb aabbB = aabbsLocalSpace[childColIndexB]; - - b3Transform transB; - transB.setIdentity(); - transB.setOrigin(newPosB); - transB.setRotation(newOrnB); - - b3Vector3 aabbBMinOut, aabbBMaxOut; - b3TransformAabb2((const b3Float4&)aabbB.m_min, (const b3Float4&)aabbB.m_max, margin, transB.getOrigin(), transB.getRotation(), &aabbBMinOut, &aabbBMaxOut); - - numAabbChecks++; - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut, aabbAMaxOut, aabbBMinOut, aabbBMaxOut); - if (aabbOverlap) - { - /* - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - */ - { // - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, childShapeIndexB); - } - } // - } //fi(1) - } //for (int b=0 - } //if (collidables[collidableIndexB]. - else //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - if (1) - { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = newOrnA; - float4 c0; - c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 c1; - c1 = transform(&c1local, &posB, &ornB); - // const float4 DeltaC2 = c0 - c1; - - { - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, childShapeIndexA, -1); - } //if (compoundPairIdx<maxNumCompoundPairsCapacity) - } // - } //fi (1) - } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - } //for (int b=0;b<numChildrenB;b++) - return; - } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - if ((collidables[collidableIndexA].m_shapeType != SHAPE_CONCAVE_TRIMESH) && (collidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b = 0; b < numChildrenB; b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - ////////////////////////////////////// - - if (1) - { - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - // float dmin = FLT_MAX; - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0; - c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - b3Quat ornB = newOrnB; - float4 c1; - c1 = transform(&c1local, &posB, &ornB); - // const float4 DeltaC2 = c0 - c1; - { // - int compoundPairIdx = b3AtomicInc(numCompoundPairsOut); - if (compoundPairIdx < maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = b3MakeInt4(bodyIndexA, bodyIndexB, -1, childShapeIndexB); - } //fi (compoundPairIdx<maxNumCompoundPairsCapacity) - } // - } //fi (1) - } //for (int b=0;b<numChildrenB;b++) - return; - } //if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - return; - } //fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - } //i<numPairs -} - -__kernel void processCompoundPairsKernel(__global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - __global b3Aabb* aabbs, - __global const b3GpuChildShape* gpuChildShapes, - __global b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, - __global b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, - int numCompoundPairs, - int i) -{ - // int i = get_global_id(0); - if (i < numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; - b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - posA = newPosA; - ornA = newOrnA; - } - else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB >= 0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - posB = newPosB; - ornB = newOrnB; - } - else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - gpuHasCompoundSepNormalsOut[i] = 0; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) - { - return; - } - - int hasSeparatingAxis = 5; - - // int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - posA.w = 0.f; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local, &posB, &ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1, 0, 0, 0); - // bool sepA = findSeparatingAxis( convexShapes[shapeIndexA], convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - bool sepA = findSeparatingAxis(convexShapes[shapeIndexA], convexShapes[shapeIndexB], posA, ornA, posB, ornB, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin); - - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } - else - { - bool sepB = findSeparatingAxis(convexShapes[shapeIndexB], convexShapes[shapeIndexA], posB, ornB, posA, ornA, vertices, uniqueEdges, faces, indices, vertices, uniqueEdges, faces, indices, sepNormal); //,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } - else //(!sepB) - { - bool sepEE = findSeparatingAxisEdgeEdge(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB], posA, ornA, posB, ornB, DeltaC2, vertices, uniqueEdges, faces, indices, &sepNormal, &dmin); - if (sepEE) - { - gpuCompoundSepNormalsOut[i] = sepNormal; //fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - } //sepEE - } //(!sepB) - } //(!sepA) - } -} - -__kernel void clipCompoundsHullHullKernel(__global const b3Int4* gpuCompoundPairs, - __global const b3RigidBodyData* rigidBodies, - __global const b3Collidable* collidables, - __global const b3ConvexPolyhedronData* convexShapes, - __global const b3AlignedObjectArray<b3Float4>& vertices, - __global const b3AlignedObjectArray<b3Float4>& uniqueEdges, - __global const b3AlignedObjectArray<b3GpuFace>& faces, - __global const b3AlignedObjectArray<int>& indices, - __global const b3GpuChildShape* gpuChildShapes, - __global const b3AlignedObjectArray<b3Float4>& gpuCompoundSepNormalsOut, - __global const b3AlignedObjectArray<int>& gpuHasCompoundSepNormalsOut, - __global struct b3Contact4Data* globalContactsOut, - int* nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity, int i) -{ - // int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity = 64; - - float minDist = -1e30f; - float maxDist = 0.0f; - - if (i < numCompoundPairs) - { - if (gpuHasCompoundSepNormalsOut[i]) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA, childPosA) + posA; - b3Quat newOrnA = b3QuatMul(ornA, childOrnA); - posA = newPosA; - ornA = newOrnA; - } - else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB >= 0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - b3Quat childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = b3QuatRotate(ornB, childPosB) + posB; - b3Quat newOrnB = b3QuatMul(ornB, childOrnB); - posB = newPosB; - ornB = newOrnB; - } - else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - convexShapes[shapeIndexA], convexShapes[shapeIndexB], - posA, ornA, - posB, ornB, - worldVertsB1, worldVertsB2, capacityWorldVerts, - minDist, maxDist, - vertices, faces, indices, - vertices, faces, indices, - localContactsOut, localContactCapacity); - - if (numLocalContactsOut > 0) - { - float4 normal = -gpuCompoundSepNormalsOut[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - b3Int4 contactIdx; // = {-1,-1,-1,-1}; - - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int dstIdx; - dstIdx = b3AtomicInc(nGlobalContactsOut); - if ((dstIdx + nReducedContacts) < maxContactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut + dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f * 0xffff); - c->m_frictionCoeffCmp = (0.7f * 0xffff); - c->m_batchIdx = pairIndex; - int bodyA = gpuCompoundPairs[pairIndex].x; - int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass == 0 ? -bodyA : bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass == 0 ? -bodyB : bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i = 0; i < nReducedContacts; i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx.s[i]]; - } - b3Contact4Data_setNumPoints(c, nReducedContacts); - } - - } // if (numContactsOut>0) - } // if (gpuHasCompoundSepNormalsOut[i]) - } // if (i<numCompoundPairs) -} - -void computeContactCompoundCompound(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3GpuChildShape* cpuChildShapes, - const b3AlignedObjectArray<b3Aabb>& hostAabbsWorldSpace, - const b3AlignedObjectArray<b3Aabb>& hostAabbsLocalSpace, - - const b3AlignedObjectArray<b3Vector3>& convexVertices, - const b3AlignedObjectArray<b3Vector3>& hostUniqueEdges, - const b3AlignedObjectArray<int>& convexIndices, - const b3AlignedObjectArray<b3GpuFace>& faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - b3AlignedObjectArray<b3QuantizedBvhNode>& treeNodesCPU, - b3AlignedObjectArray<b3BvhSubtreeInfo>& subTreesCPU, - b3AlignedObjectArray<b3BvhInfo>& bvhInfoCPU) -{ - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); - - b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; - int numCompoundPairsOut = 0; - int maxNumCompoundPairsCapacity = 8192; //1024; - cpuCompoundPairsOut.resize(maxNumCompoundPairsCapacity); - - // work-in-progress - findCompoundPairsKernel( - pairIndex, - bodyIndexA, bodyIndexB, - collidableIndexA, collidableIndexB, - rigidBodies, - collidables, - convexShapes, - convexVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - cpuChildShapes, - &cpuCompoundPairsOut[0], - &numCompoundPairsOut, - maxNumCompoundPairsCapacity, - treeNodesCPU, - subTreesCPU, - bvhInfoCPU); - - printf("maxNumAabbChecks=%d\n", maxNumAabbChecks); - if (numCompoundPairsOut > maxNumCompoundPairsCapacity) - { - b3Error("numCompoundPairsOut exceeded maxNumCompoundPairsCapacity (%d)\n", maxNumCompoundPairsCapacity); - numCompoundPairsOut = maxNumCompoundPairsCapacity; - } - b3AlignedObjectArray<b3Float4> cpuCompoundSepNormalsOut; - b3AlignedObjectArray<int> cpuHasCompoundSepNormalsOut; - cpuCompoundSepNormalsOut.resize(numCompoundPairsOut); - cpuHasCompoundSepNormalsOut.resize(numCompoundPairsOut); - - for (int i = 0; i < numCompoundPairsOut; i++) - { - processCompoundPairsKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, 0, cpuChildShapes, - cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, numCompoundPairsOut, i); - } - - for (int i = 0; i < numCompoundPairsOut; i++) - { - clipCompoundsHullHullKernel(&cpuCompoundPairsOut[0], rigidBodies, collidables, convexShapes, convexVertices, hostUniqueEdges, faces, convexIndices, cpuChildShapes, - cpuCompoundSepNormalsOut, cpuHasCompoundSepNormalsOut, globalContactsOut, &nGlobalContactsOut, numCompoundPairsOut, maxContactCapacity, i); - } - /* - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - b3Quat childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = b3QuatRotate(ornA,childPosA)+posA; - b3Quat newOrnA = b3QuatMul(ornA,childOrnA); - - int shapeIndexA = collidables[childColIndexA].m_shapeIndex; - - - bool foundSepAxis = findSeparatingAxis(hullA,hullB, - posA, - ornA, - posB, - ornB, - - convexVertices,uniqueEdges,faces,convexIndices, - convexVertices,uniqueEdges,faces,convexIndices, - - sepNormalWorldSpace - ); - */ - - /* - if (foundSepAxis) - { - - - contactIndex = clipHullHullSingle( - bodyIndexA, bodyIndexB, - posA,ornA, - posB,ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - - } - */ - - // return contactIndex; - - /* - - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int c=0;c<numChildrenB;c++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+c; - int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; - - float4 rootPosB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; - b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; - float4 posB = b3QuatRotate(rootOrnB,childPosB)+rootPosB; - b3Quaternion ornB = b3QuatMul(rootOrnB,childOrnB);//b3QuatMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; - - } - */ -} - -void computeContactPlaneCompound(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3GpuChildShape* cpuChildShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - b3Assert(shapeTypeB == SHAPE_COMPOUND_OF_CONVEX_HULLS); - - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int c = 0; c < numChildrenB; c++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex + c; - int childColIndexB = cpuChildShapes[childShapeIndexB].m_shapeIndex; - - float4 rootPosB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion rootOrnB = rigidBodies[bodyIndexB].m_quat; - b3Vector3 childPosB = cpuChildShapes[childShapeIndexB].m_childPosition; - b3Quaternion childOrnB = cpuChildShapes[childShapeIndexB].m_childOrientation; - float4 posB = b3QuatRotate(rootOrnB, childPosB) + rootPosB; - b3Quaternion ornB = rootOrnB * childOrnB; //b3QuatMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - const b3ConvexPolyhedronData* hullB = &convexShapes[shapeIndexB]; - - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - - // int numContactsOut = 0; - // int numWorldVertsB1= 0; - - b3Vector3 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - b3Vector3 planeNormal = b3MakeVector3(planeEq.x, planeEq.y, planeEq.z); - b3Vector3 planeNormalWorld = b3QuatRotate(ornA, planeNormal); - float planeConstant = planeEq.w; - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(posB); - convexWorldTransform.setRotation(ornB); - b3Transform planeTransform; - planeTransform.setIdentity(); - planeTransform.setOrigin(posA); - planeTransform.setRotation(ornA); - - b3Transform planeInConvex; - planeInConvex = convexWorldTransform.inverse() * planeTransform; - b3Transform convexInPlane; - convexInPlane = planeTransform.inverse() * convexWorldTransform; - - b3Vector3 planeNormalInConvex = planeInConvex.getBasis() * -planeNormal; - float maxDot = -1e30; - int hitVertex = -1; - b3Vector3 hitVtx; - -#define MAX_PLANE_CONVEX_POINTS 64 - - b3Vector3 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - b3Int4 contactIdx; - contactIdx.s[0] = 0; - contactIdx.s[1] = 1; - contactIdx.s[2] = 2; - contactIdx.s[3] = 3; - - for (int i = 0; i < hullB->m_numVertices; i++) - { - b3Vector3 vtx = convexVertices[hullB->m_vertexOffset + i]; - float curDot = vtx.dot(planeNormalInConvex); - - if (curDot > maxDot) - { - hitVertex = i; - maxDot = curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints == MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints < MAX_PLANE_CONVEX_POINTS) - { - b3Vector3 vtxWorld = convexWorldTransform * vtx; - b3Vector3 vtxInPlane = planeTransform.inverse() * vtxWorld; - float dist = planeNormal.dot(vtxInPlane) - planeConstant; - if (dist < 0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - } - - int numReducedPoints = 0; - - numReducedPoints = numPoints; - - if (numPoints > 4) - { - numReducedPoints = extractManifoldSequentialGlobal(contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (numReducedPoints > 0) - { - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - for (int i = 0; i < numReducedPoints; i++) - { - b3Vector3 pOnB1 = contactPoints[contactIdx.s[i]]; - c->m_worldPosB[i] = pOnB1; - } - c->m_worldNormalOnB.w = (b3Scalar)numReducedPoints; - } //if (dstIdx < numPairs) - } - } -} - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3RigidBodyData* rigidBodies, - const b3Collidable* collidables, - const b3ConvexPolyhedronData* convexShapes, - const b3Vector3* convexVertices, - const int* convexIndices, - const b3GpuFace* faces, - b3Contact4* globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity) -{ - float radius = collidables[collidableIndexA].m_radius; - float4 spherePos1 = rigidBodies[bodyIndexA].m_pos; - b3Quaternion sphereOrn = rigidBodies[bodyIndexA].m_quat; - - float4 pos = rigidBodies[bodyIndexB].m_pos; - - b3Quaternion quat = rigidBodies[bodyIndexB].m_quat; - - b3Transform tr; - tr.setIdentity(); - tr.setOrigin(pos); - tr.setRotation(quat); - b3Transform trInv = tr.inverse(); - - float4 spherePos = trInv(spherePos1); - - int collidableIndex = rigidBodies[bodyIndexB].m_collidableIdx; - int shapeIndex = collidables[collidableIndex].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = b3MakeVector3(0, 0, 0, 0); - // float4 hitNormalWorld = b3MakeVector3(0, 0, 0, 0); - float minDist = -1000000.f; // TODO: What is the largest/smallest float? - bool bCollide = true; - int region = -1; - float4 localHitNormal; - for (int f = 0; f < numFaces; f++) - { - b3GpuFace face = faces[convexShapes[shapeIndex].m_faceOffset + f]; - float4 planeEqn; - float4 localPlaneNormal = b3MakeVector3(face.m_plane.x, face.m_plane.y, face.m_plane.z, 0.f); - float4 n1 = localPlaneNormal; //quatRotate(quat,localPlaneNormal); - planeEqn = n1; - planeEqn[3] = face.m_plane.w; - - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - if (dist > radius) - { - bCollide = false; - break; - } - - if (dist > 0) - { - //might hit an edge or vertex - b3Vector3 out; - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist > minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region = 1; - } - } - else - { - b3Vector3 tmp = spherePos - out; - b3Scalar l2 = tmp.length2(); - if (l2 < radius * radius) - { - dist = b3Sqrt(l2); - if (dist > minDist) - { - minDist = dist; - closestPnt = out; - localHitNormal = tmp / dist; - region = 2; - } - } - else - { - bCollide = false; - break; - } - } - } - else - { - if (dist > minDist) - { - minDist = dist; - closestPnt = pntReturn; - localHitNormal = planeEqn; - region = 3; - } - } - } - static int numChecks = 0; - numChecks++; - - if (bCollide && minDist > -10000) - { - float4 normalOnSurfaceB1 = tr.getBasis() * localHitNormal; //-hitNormalWorld; - float4 pOnB1 = tr(closestPnt); - //printf("dist ,%f,",minDist); - float actualDepth = minDist - radius; - if (actualDepth < 0) - { - //printf("actualDepth = ,%f,", actualDepth); - //printf("normalOnSurfaceB1 = ,%f,%f,%f,", normalOnSurfaceB1.x,normalOnSurfaceB1.y,normalOnSurfaceB1.z); - //printf("region=,%d,\n", region); - pOnB1[3] = actualDepth; - - int dstIdx; - // dstIdx = nGlobalContactsOut++;//AppendInc( nGlobalContactsOut, dstIdx ); - - if (nGlobalContactsOut < maxContactCapacity) - { - dstIdx = nGlobalContactsOut; - nGlobalContactsOut++; - - b3Contact4* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB1; - c->setFrictionCoeff(0.7); - c->setRestituitionCoeff(0.f); - - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass == 0 ? -bodyIndexA : bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass == 0 ? -bodyIndexB : bodyIndexB; - c->m_worldPosB[0] = pOnB1; - int numPoints = 1; - c->m_worldNormalOnB.w = (b3Scalar)numPoints; - } //if (dstIdx < numPairs) - } - } //if (hasCollision) -} - -int computeContactConvexConvex2( - int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - const b3AlignedObjectArray<b3RigidBodyData>& rigidBodies, - const b3AlignedObjectArray<b3Collidable>& collidables, - const b3AlignedObjectArray<b3ConvexPolyhedronData>& convexShapes, - const b3AlignedObjectArray<b3Vector3>& convexVertices, - const b3AlignedObjectArray<b3Vector3>& uniqueEdges, - const b3AlignedObjectArray<int>& convexIndices, - const b3AlignedObjectArray<b3GpuFace>& faces, - b3AlignedObjectArray<b3Contact4>& globalContactsOut, - int& nGlobalContactsOut, - int maxContactCapacity, - const b3AlignedObjectArray<b3Contact4>& oldContacts) -{ - int contactIndex = -1; - b3Vector3 posA = rigidBodies[bodyIndexA].m_pos; - b3Quaternion ornA = rigidBodies[bodyIndexA].m_quat; - b3Vector3 posB = rigidBodies[bodyIndexB].m_pos; - b3Quaternion ornB = rigidBodies[bodyIndexB].m_quat; - - b3ConvexPolyhedronData hullA, hullB; - - b3Vector3 sepNormalWorldSpace; - - b3Collidable colA = collidables[collidableIndexA]; - hullA = convexShapes[colA.m_shapeIndex]; - //printf("numvertsA = %d\n",hullA.m_numVertices); - - b3Collidable colB = collidables[collidableIndexB]; - hullB = convexShapes[colB.m_shapeIndex]; - //printf("numvertsB = %d\n",hullB.m_numVertices); - - // int contactCapacity = MAX_VERTS; - //int numContactsOut=0; - -#ifdef _WIN32 - b3Assert(_finite(rigidBodies[bodyIndexA].m_pos.x)); - b3Assert(_finite(rigidBodies[bodyIndexB].m_pos.x)); -#endif - - bool foundSepAxis = findSeparatingAxis(hullA, hullB, - posA, - ornA, - posB, - ornB, - - convexVertices, uniqueEdges, faces, convexIndices, - convexVertices, uniqueEdges, faces, convexIndices, - - sepNormalWorldSpace); - - if (foundSepAxis) - { - contactIndex = clipHullHullSingle( - bodyIndexA, bodyIndexB, - posA, ornA, - posB, ornB, - collidableIndexA, collidableIndexB, - &rigidBodies, - &globalContactsOut, - nGlobalContactsOut, - - convexShapes, - convexShapes, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - convexVertices, - uniqueEdges, - faces, - convexIndices, - - collidables, - collidables, - sepNormalWorldSpace, - maxContactCapacity); - } - - return contactIndex; -} - -void GpuSatCollision::computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs, - const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, - const b3OpenCLArray<b3Contact4>* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray<b3ConvexPolyhedronData>& convexData, - const b3OpenCLArray<b3Vector3>& gpuVertices, - const b3OpenCLArray<b3Vector3>& gpuUniqueEdges, - const b3OpenCLArray<b3GpuFace>& gpuFaces, - const b3OpenCLArray<int>& gpuIndices, - const b3OpenCLArray<b3Collidable>& gpuCollidables, - const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - - const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, - const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - - b3OpenCLArray<b3Vector3>& worldVertsB1GPU, - b3OpenCLArray<b3Int4>& clippingFacesOutGPU, - b3OpenCLArray<b3Vector3>& worldNormalsAGPU, - b3OpenCLArray<b3Vector3>& worldVertsA1GPU, - b3OpenCLArray<b3Vector3>& worldVertsB2GPU, - b3AlignedObjectArray<class b3OptimizedBvh*>& bvhDataUnused, - b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, - b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, - b3OpenCLArray<b3BvhInfo>* bvhInfo, - - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray<b3Int4>& triangleConvexPairsOut, - int& numTriConvexPairsOut) -{ - myframecount++; - - if (!nPairs) - return; - -#ifdef CHECK_ON_HOST - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3Int4> hostTriangleConvexPairs; - - b3AlignedObjectArray<b3Contact4> hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - - b3AlignedObjectArray<b3Contact4> oldHostContacts; - - if (oldContacts->size()) - { - oldContacts->copyToHost(oldHostContacts); - } - - hostContacts.resize(maxContactCapacity); - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - computeContactSphereConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - computeContactSphereConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - //printf("convex-sphere\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - computeContactPlaneConvex(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("convex-plane\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - computeContactPlaneConvex(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("plane-convex\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - computeContactCompoundCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], hostAabbsWorldSpace, hostAabbsLocalSpace, hostVertices, hostUniqueEdges, hostIndices, hostFaces, &hostContacts[0], - nContacts, maxContactCapacity, treeNodesCPU, subTreesCPU, bvhInfoCPU); - // printf("convex-plane\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - computeContactPlaneCompound(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("convex-plane\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - computeContactPlaneCompound(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, &hostBodyBuf[0], - &hostCollidables[0], &hostConvexData[0], &cpuChildShapes[0], &hostVertices[0], &hostIndices[0], &hostFaces[0], &hostContacts[0], nContacts, maxContactCapacity); - // printf("plane-convex\n"); - } - - if (hostCollidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - hostCollidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - //printf("hostPairs[i].z=%d\n",hostPairs[i].z); - int contactIndex = computeContactConvexConvex2(i, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, hostBodyBuf, hostCollidables, hostConvexData, hostVertices, hostUniqueEdges, hostIndices, hostFaces, hostContacts, nContacts, maxContactCapacity, oldHostContacts); - //int contactIndex = computeContactConvexConvex(hostPairs,i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf,hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - - if (contactIndex >= 0) - { - // printf("convex convex contactIndex = %d\n",contactIndex); - hostPairs[i].z = contactIndex; - } - // printf("plane-convex\n"); - } - } - - if (hostPairs.size()) - { - pairs->copyFromHost(hostPairs); - } - - hostContacts.resize(nContacts); - if (nContacts) - { - contactOut->copyFromHost(hostContacts); - } - else - { - contactOut->resize(0); - } - - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - //printf("(HOST) nContacts = %d\n",nContacts); - -#else - - { - if (nPairs) - { - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - - B3_PROFILE("primitiveContactsKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_primitiveContactsKernel, "m_primitiveContactsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - launcher.setConst(maxContactCapacity); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } - } - -#endif //CHECK_ON_HOST - - B3_PROFILE("computeConvexConvexContactsGPUSAT"); - // printf("nContacts = %d\n",nContacts); - - m_sepNormals.resize(nPairs); - m_hasSeparatingNormals.resize(nPairs); - - int concaveCapacity = maxTriConvexPairCapacity; - m_concaveSepNormals.resize(concaveCapacity); - m_concaveHasSeparatingNormals.resize(concaveCapacity); - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(0); - - m_gpuCompoundPairs.resize(compoundPairCapacity); - - m_gpuCompoundSepNormals.resize(compoundPairCapacity); - - m_gpuHasCompoundSepNormals.resize(compoundPairCapacity); - - m_numCompoundPairsOut.resize(0); - m_numCompoundPairsOut.push_back(0); - - int numCompoundPairs = 0; - - int numConcavePairs = 0; - - { - clFinish(m_queue); - if (findSeparatingAxisOnGpu) - { - m_dmins.resize(nPairs); - if (splitSearchSepAxisConvex) - { - if (useMprGpu) - { - nContacts = m_totalContactsOut.at(0); - { - B3_PROFILE("mprPenetrationKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_mprPenetrationKernel, "mprPenetrationKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - - launcher.setConst(maxContactCapacity); - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - /* - b3AlignedObjectArray<int>hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray<b3Vector3>hostSepAxis; - m_sepNormals.copyToHost(hostSepAxis); - */ - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - // printf("nContacts (after mprPenetrationKernel) = %d\n",nContacts); - if (nContacts > maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - } - - if (1) - { - if (1) - { - { - B3_PROFILE("findSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisVertexFaceKernel, "findSeparatingAxisVertexFaceKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - - { - B3_PROFILE("findSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL()), - b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true) - - }; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisEdgeEdgeKernel, "findSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numDirections); - launcher.setConst(nPairs); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - if (useMprGpu) - { - B3_PROFILE("findSeparatingAxisUnitSphereKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(m_unitSphereDirections.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisUnitSphereKernel, "findSeparatingAxisUnitSphereKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - int numDirections = sizeof(unitSphere162) / sizeof(b3Vector3); - launcher.setConst(numDirections); - - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - } - else - { - B3_PROFILE("findSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findSeparatingAxisKernel, "m_findSeparatingAxisKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - else - { - B3_PROFILE("findSeparatingAxisKernel CPU"); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexShapeData; - convexData.copyToHost(hostConvexShapeData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<int> hostHasSepAxis; - hostHasSepAxis.resize(nPairs); - b3AlignedObjectArray<b3Vector3> hostSepAxis; - hostSepAxis.resize(nPairs); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - - b3AlignedObjectArray<b3Contact4> hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - hostContacts.resize(maxContactCapacity); - int nGlobalContactsOut = nContacts; - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - hostHasSepAxis[i] = 0; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((hostBodyBuf[bodyIndexA].m_invMass == 0) && (hostBodyBuf[bodyIndexB].m_invMass == 0)) - { - continue; - } - - if ((hostCollidables[collidableIndexA].m_shapeType != SHAPE_CONVEX_HULL) || (hostCollidables[collidableIndexB].m_shapeType != SHAPE_CONVEX_HULL)) - { - continue; - } - - float dmin = FLT_MAX; - - b3ConvexPolyhedronData* convexShapeA = &hostConvexShapeData[shapeIndexA]; - b3ConvexPolyhedronData* convexShapeB = &hostConvexShapeData[shapeIndexB]; - b3Vector3 posA = hostBodyBuf[bodyIndexA].m_pos; - b3Vector3 posB = hostBodyBuf[bodyIndexB].m_pos; - b3Quaternion ornA = hostBodyBuf[bodyIndexA].m_quat; - b3Quaternion ornB = hostBodyBuf[bodyIndexB].m_quat; - - if (useGjk) - { - //first approximate the separating axis, to 'fail-proof' GJK+EPA or MPR - { - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; - b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); - b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB); - b3Vector3 DeltaC2 = c0 - c1; - - b3Vector3 sepAxis; - - bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - - if (hasSepAxisA) - { - bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - if (hasSepAxisB) - { - bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin, false); - - if (hasEdgeEdge) - { - hostHasSepAxis[i] = 1; - hostSepAxis[i] = sepAxis; - hostSepAxis[i].w = dmin; - } - } - } - } - - if (hostHasSepAxis[i]) - { - int pairIndex = i; - - bool useMpr = true; - if (useMpr) - { - int res = 0; - float depth = 0.f; - b3Vector3 sepAxis2 = b3MakeVector3(1, 0, 0); - b3Vector3 resultPointOnBWorld = b3MakeVector3(0, 0, 0); - - float depthOut; - b3Vector3 dirOut; - b3Vector3 posOut; - - //res = b3MprPenetration(bodyIndexA,bodyIndexB,hostBodyBuf,hostConvexShapeData,hostCollidables,hostVertices,&mprConfig,&depthOut,&dirOut,&posOut); - res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB, &hostBodyBuf[0], &hostConvexShapeData[0], &hostCollidables[0], &hostVertices[0], &hostSepAxis[0], &hostHasSepAxis[0], &depthOut, &dirOut, &posOut); - depth = depthOut; - sepAxis2 = b3MakeVector3(-dirOut.x, -dirOut.y, -dirOut.z); - resultPointOnBWorld = posOut; - //hostHasSepAxis[i] = 0; - - if (res == 0) - { - //add point? - //printf("depth = %f\n",depth); - //printf("normal = %f,%f,%f\n",dir.v[0],dir.v[1],dir.v[2]); - //qprintf("pos = %f,%f,%f\n",pos.v[0],pos.v[1],pos.v[2]); - - float dist = 0.f; - - const b3ConvexPolyhedronData& hullA = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexA].m_collidableIdx].m_shapeIndex]; - const b3ConvexPolyhedronData& hullB = hostConvexShapeData[hostCollidables[hostBodyBuf[bodyIndexB].m_collidableIdx].m_shapeIndex]; - - if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist)) - { - if (depth > dist) - { - float diff = depth - dist; - - static float maxdiff = 0.f; - if (maxdiff < diff) - { - maxdiff = diff; - printf("maxdiff = %20.10f\n", maxdiff); - } - } - } - if (depth > dmin) - { - b3Vector3 oldAxis = hostSepAxis[i]; - depth = dmin; - sepAxis2 = oldAxis; - } - - if (b3TestSepAxis(&hullA, &hullB, posA, ornA, posB, ornB, &sepAxis2, &hostVertices[0], &hostVertices[0], &dist)) - { - if (depth > dist) - { - float diff = depth - dist; - //printf("?diff = %f\n",diff ); - static float maxdiff = 0.f; - if (maxdiff < diff) - { - maxdiff = diff; - printf("maxdiff = %20.10f\n", maxdiff); - } - } - //this is used for SAT - //hostHasSepAxis[i] = 1; - //hostSepAxis[i] = sepAxis2; - - //add contact point - - //int contactIndex = nGlobalContactsOut; - b3Contact4& newContact = hostContacts.at(nGlobalContactsOut); - nGlobalContactsOut++; - newContact.m_batchIdx = 0; //i; - newContact.m_bodyAPtrAndSignBit = (hostBodyBuf.at(bodyIndexA).m_invMass == 0) ? -bodyIndexA : bodyIndexA; - newContact.m_bodyBPtrAndSignBit = (hostBodyBuf.at(bodyIndexB).m_invMass == 0) ? -bodyIndexB : bodyIndexB; - - newContact.m_frictionCoeffCmp = 45874; - newContact.m_restituitionCoeffCmp = 0; - - static float maxDepth = 0.f; - - if (depth > maxDepth) - { - maxDepth = depth; - printf("MPR maxdepth = %f\n", maxDepth); - } - - resultPointOnBWorld.w = -depth; - newContact.m_worldPosB[0] = resultPointOnBWorld; - //b3Vector3 resultPointOnAWorld = resultPointOnBWorld+depth*sepAxis2; - newContact.m_worldNormalOnB = sepAxis2; - newContact.m_worldNormalOnB.w = (b3Scalar)1; - } - else - { - printf("rejected\n"); - } - } - } - else - { - //int contactIndex = computeContactConvexConvex2( i,bodyIndexA,bodyIndexB,collidableIndexA,collidableIndexB,hostBodyBuf, hostCollidables,hostConvexData,hostVertices,hostUniqueEdges,hostIndices,hostFaces,hostContacts,nContacts,maxContactCapacity,oldHostContacts); - b3AlignedObjectArray<b3Contact4> oldHostContacts; - int result; - result = computeContactConvexConvex2( //hostPairs, - pairIndex, - bodyIndexA, bodyIndexB, - collidableIndexA, collidableIndexB, - hostBodyBuf, - hostCollidables, - hostConvexShapeData, - hostVertices, - hostUniqueEdges, - hostIndices, - hostFaces, - hostContacts, - nGlobalContactsOut, - maxContactCapacity, - oldHostContacts - //hostHasSepAxis, - //hostSepAxis - - ); - } //mpr - } //hostHasSepAxis[i] = 1; - } - else - { - b3Vector3 c0local = hostConvexShapeData[shapeIndexA].m_localCenter; - b3Vector3 c0 = b3TransformPoint(c0local, posA, ornA); - b3Vector3 c1local = hostConvexShapeData[shapeIndexB].m_localCenter; - b3Vector3 c1 = b3TransformPoint(c1local, posB, ornB); - b3Vector3 DeltaC2 = c0 - c1; - - b3Vector3 sepAxis; - - bool hasSepAxisA = b3FindSeparatingAxis(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - - if (hasSepAxisA) - { - bool hasSepAxisB = b3FindSeparatingAxis(convexShapeB, convexShapeA, posB, ornB, posA, ornA, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin); - if (hasSepAxisB) - { - bool hasEdgeEdge = b3FindSeparatingAxisEdgeEdge(convexShapeA, convexShapeB, posA, ornA, posB, ornB, DeltaC2, - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &hostVertices.at(0), &hostUniqueEdges.at(0), &hostFaces.at(0), &hostIndices.at(0), - &sepAxis, &dmin, true); - - if (hasEdgeEdge) - { - hostHasSepAxis[i] = 1; - hostSepAxis[i] = sepAxis; - } - } - } - } - } - - if (useGjkContacts) //nGlobalContactsOut>0) - { - //printf("nGlobalContactsOut=%d\n",nGlobalContactsOut); - nContacts = nGlobalContactsOut; - contactOut->copyFromHost(hostContacts); - - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - } - - m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); - m_sepNormals.copyFromHost(hostSepAxis); - - /* - //double-check results from GPU (comment-out the 'else' so both paths are executed - b3AlignedObjectArray<int> checkHasSepAxis; - m_hasSeparatingNormals.copyToHost(checkHasSepAxis); - static int frameCount = 0; - frameCount++; - for (int i=0;i<nPairs;i++) - { - if (hostHasSepAxis[i] != checkHasSepAxis[i]) - { - printf("at frameCount %d hostHasSepAxis[%d] = %d but checkHasSepAxis[i] = %d\n", - frameCount,i,hostHasSepAxis[i],checkHasSepAxis[i]); - } - } - //m_hasSeparatingNormals.copyFromHost(hostHasSepAxis); - // m_sepNormals.copyFromHost(hostSepAxis); - */ - } - - numCompoundPairs = m_numCompoundPairsOut.at(0); - bool useGpuFindCompoundPairs = true; - if (useGpuFindCompoundPairs) - { - B3_PROFILE("findCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsLocalSpace.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL()), - b3BufferInfoCL(m_numCompoundPairsOut.getBufferCL()), - b3BufferInfoCL(subTreesGPU->getBufferCL()), - b3BufferInfoCL(treeNodesGPU->getBufferCL()), - b3BufferInfoCL(bvhInfo->getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findCompoundPairsKernel, "m_findCompoundPairsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - launcher.setConst(compoundPairCapacity); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - - numCompoundPairs = m_numCompoundPairsOut.at(0); - //printf("numCompoundPairs =%d\n",numCompoundPairs ); - if (numCompoundPairs) - { - //printf("numCompoundPairs=%d\n",numCompoundPairs); - } - } - else - { - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3Aabb> hostAabbsLocalSpace; - clAabbsLocalSpace.copyToHost(hostAabbsLocalSpace); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<b3Int4> cpuCompoundPairsOut; - cpuCompoundPairsOut.resize(compoundPairCapacity); - - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - for (int pairIndex = 0; pairIndex < nPairs; pairIndex++) - { - int bodyIndexA = hostPairs[pairIndex].x; - int bodyIndexB = hostPairs[pairIndex].y; - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - if (cpuChildShapes.size()) - { - findCompoundPairsKernel( - pairIndex, - bodyIndexA, - bodyIndexB, - collidableIndexA, - collidableIndexB, - &hostBodyBuf[0], - &hostCollidables[0], - &hostConvexData[0], - hostVertices, - hostAabbsWorldSpace, - hostAabbsLocalSpace, - &cpuChildShapes[0], - &cpuCompoundPairsOut[0], - &numCompoundPairs, - compoundPairCapacity, - treeNodesCPU, - subTreesCPU, - bvhInfoCPU); - } - } - - m_numCompoundPairsOut.copyFromHostPointer(&numCompoundPairs, 1, 0, true); - if (numCompoundPairs) - { - b3CompoundOverlappingPair* ptr = (b3CompoundOverlappingPair*)&cpuCompoundPairsOut[0]; - m_gpuCompoundPairs.copyFromHostPointer(ptr, numCompoundPairs, 0, true); - } - //cpuCompoundPairsOut - } - if (numCompoundPairs) - { - printf("numCompoundPairs=%d\n", numCompoundPairs); - } - - if (numCompoundPairs > compoundPairCapacity) - { - b3Error("Exceeded compound pair capacity (%d/%d)\n", numCompoundPairs, compoundPairCapacity); - numCompoundPairs = compoundPairCapacity; - } - - m_gpuCompoundPairs.resize(numCompoundPairs); - m_gpuHasCompoundSepNormals.resize(numCompoundPairs); - m_gpuCompoundSepNormals.resize(numCompoundPairs); - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsPrimitivesKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsPrimitivesKernel, "m_processCompoundPairsPrimitivesKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numCompoundPairs); - launcher.setConst(maxContactCapacity); - - int num = numCompoundPairs; - launcher.launch1D(num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after processCompoundPairsPrimitivesKernel) = %d\n",nContacts); - if (nContacts > maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - - if (numCompoundPairs) - { - B3_PROFILE("processCompoundPairsKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL()), - b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_processCompoundPairsKernel, "m_processCompoundPairsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numCompoundPairs); - - int num = numCompoundPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - //printf("numConcave = %d\n",numConcave); - - // printf("hostNormals.size()=%d\n",hostNormals.size()); - //int numPairs = pairCount.at(0); - } - int vertexFaceCapacity = 64; - - { - //now perform the tree query on GPU - - if (treeNodesGPU->size() && treeNodesGPU->size()) - { - if (bvhTraversalKernelGPU) - { - B3_PROFILE("m_bvhTraversalKernel"); - - numConcavePairs = m_numConcavePairsOut.at(0); - - b3LauncherCL launcher(m_queue, m_bvhTraversalKernel, "m_bvhTraversalKernel"); - launcher.setBuffer(pairs->getBufferCL()); - launcher.setBuffer(bodyBuf->getBufferCL()); - launcher.setBuffer(gpuCollidables.getBufferCL()); - launcher.setBuffer(clAabbsWorldSpace.getBufferCL()); - launcher.setBuffer(triangleConvexPairsOut.getBufferCL()); - launcher.setBuffer(m_numConcavePairsOut.getBufferCL()); - launcher.setBuffer(subTreesGPU->getBufferCL()); - launcher.setBuffer(treeNodesGPU->getBufferCL()); - launcher.setBuffer(bvhInfo->getBufferCL()); - - launcher.setConst(nPairs); - launcher.setConst(maxTriConvexPairCapacity); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - numConcavePairs = m_numConcavePairsOut.at(0); - } - else - { - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - //int maxTriConvexPairCapacity, - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - - //int numTriConvexPairsOutHost=0; - numConcavePairs = 0; - //m_numConcavePairsOut - - b3AlignedObjectArray<b3QuantizedBvhNode> treeNodesCPU; - treeNodesGPU->copyToHost(treeNodesCPU); - b3AlignedObjectArray<b3BvhSubtreeInfo> subTreesCPU; - subTreesGPU->copyToHost(subTreesCPU); - b3AlignedObjectArray<b3BvhInfo> bvhInfoCPU; - bvhInfo->copyToHost(bvhInfoCPU); - //compute it... - - volatile int hostNumConcavePairsOut = 0; - - // - for (int i = 0; i < nPairs; i++) - { - b3BvhTraversal(&hostPairs.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostAabbsWorldSpace.at(0), - &triangleConvexPairsOutHost.at(0), - &hostNumConcavePairsOut, - &subTreesCPU.at(0), - &treeNodesCPU.at(0), - &bvhInfoCPU.at(0), - nPairs, - maxTriConvexPairCapacity, - i); - } - numConcavePairs = hostNumConcavePairsOut; - - if (hostNumConcavePairsOut) - { - triangleConvexPairsOutHost.resize(hostNumConcavePairsOut); - triangleConvexPairsOut.copyFromHost(triangleConvexPairsOutHost); - } - // - - m_numConcavePairsOut.resize(0); - m_numConcavePairsOut.push_back(numConcavePairs); - } - - //printf("numConcavePairs=%d (max = %d\n",numConcavePairs,maxTriConvexPairCapacity); - - if (numConcavePairs > maxTriConvexPairCapacity) - { - static int exceeded_maxTriConvexPairCapacity_count = 0; - b3Error("Exceeded the maxTriConvexPairCapacity (found %d but max is %d, it happened %d times)\n", - numConcavePairs, maxTriConvexPairCapacity, exceeded_maxTriConvexPairCapacity_count++); - numConcavePairs = maxTriConvexPairCapacity; - } - triangleConvexPairsOut.resize(numConcavePairs); - - if (numConcavePairs) - { - clippingFacesOutGPU.resize(numConcavePairs); - worldNormalsAGPU.resize(numConcavePairs); - worldVertsA1GPU.resize(vertexFaceCapacity * (numConcavePairs)); - worldVertsB1GPU.resize(vertexFaceCapacity * (numConcavePairs)); - - if (findConcaveSeparatingAxisKernelGPU) - { - /* - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - */ - - //now perform a SAT test for each triangle-convex element (stored in triangleConvexPairsOut) - if (splitSearchSepAxisConcave) - { - //printf("numConcavePairs = %d\n",numConcavePairs); - m_dmins.resize(numConcavePairs); - { - B3_PROFILE("findConcaveSeparatingAxisVertexFaceKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisVertexFaceKernel, "m_findConcaveSeparatingAxisVertexFaceKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(numConcavePairs); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - } - // numConcavePairs = 0; - if (1) - { - B3_PROFILE("findConcaveSeparatingAxisEdgeEdgeKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(m_dmins.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisEdgeEdgeKernel, "m_findConcaveSeparatingAxisEdgeEdgeKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(numConcavePairs); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - // numConcavePairs = 0; - } - else - { - B3_PROFILE("findConcaveSeparatingAxisKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSeparatingAxisKernel, "m_findConcaveSeparatingAxisKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(numConcavePairs); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - } - } - else - { - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; - - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - //triangleConvexPairsOutHost.resize(maxTriConvexPairCapacity); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - b3AlignedObjectArray<b3Aabb> hostAabbsWorldSpace; - clAabbsWorldSpace.copyToHost(hostAabbsWorldSpace); - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - - b3AlignedObjectArray<b3Vector3> hostUniqueEdges; - gpuUniqueEdges.copyToHost(hostUniqueEdges); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - b3AlignedObjectArray<b3GpuChildShape> cpuChildShapes; - gpuChildShapes.copyToHost(cpuChildShapes); - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - concaveHasSeparatingNormalsCPU.resize(concaveSepNormalsHost.size()); - - b3GpuChildShape* childShapePointerCPU = 0; - if (cpuChildShapes.size()) - childShapePointerCPU = &cpuChildShapes.at(0); - - clippingFacesOutCPU.resize(clippingFacesOutGPU.size()); - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - worldNormalsACPU.resize(worldNormalsAGPU.size()); - worldVertsB1CPU.resize(worldVertsB1GPU.size()); - - for (int i = 0; i < numConcavePairs; i++) - { - b3FindConcaveSeparatingAxisKernel(&triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &hostCollidables.at(0), - &hostConvexData.at(0), &hostVertices.at(0), &hostUniqueEdges.at(0), - &hostFaces.at(0), &hostIndices.at(0), childShapePointerCPU, - &hostAabbsWorldSpace.at(0), - &concaveSepNormalsHost.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - vertexFaceCapacity, - numConcavePairs, i); - }; - - m_concaveSepNormals.copyFromHost(concaveSepNormalsHost); - m_concaveHasSeparatingNormals.copyFromHost(concaveHasSeparatingNormalsCPU); - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - } - // b3AlignedObjectArray<b3Vector3> cpuCompoundSepNormals; - // m_concaveSepNormals.copyToHost(cpuCompoundSepNormals); - // b3AlignedObjectArray<b3Int4> cpuConcavePairs; - // triangleConvexPairsOut.copyToHost(cpuConcavePairs); - } - } - } - - if (numConcavePairs) - { - if (numConcavePairs) - { - B3_PROFILE("findConcaveSphereContactsKernel"); - nContacts = m_totalContactsOut.at(0); - // printf("nContacts1 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL()), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(clAabbsWorldSpace.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findConcaveSphereContactsKernel, "m_findConcaveSphereContactsKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - - launcher.setConst(numConcavePairs); - launcher.setConst(maxContactCapacity); - - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - //printf("nContacts (after findConcaveSphereContactsKernel) = %d\n",nContacts); - - //printf("nContacts2 = %d\n",nContacts); - - if (nContacts >= maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - } - } - -#ifdef __APPLE__ - bool contactClippingOnGpu = true; -#else - bool contactClippingOnGpu = true; -#endif - - if (contactClippingOnGpu) - { - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - // printf("nContacts3 = %d\n",nContacts); - - //B3_PROFILE("clipHullHullKernel"); - - bool breakupConcaveConvexKernel = true; - -#ifdef __APPLE__ - //actually, some Apple OpenCL platform/device combinations work fine... - breakupConcaveConvexKernel = true; -#endif - //concave-convex contact clipping - if (numConcavePairs) - { - // printf("numConcavePairs = %d\n", numConcavePairs); - // nContacts = m_totalContactsOut.at(0); - // printf("nContacts before = %d\n", nContacts); - - if (breakupConcaveConvexKernel) - { - worldVertsB2GPU.resize(vertexFaceCapacity * numConcavePairs); - - //clipFacesAndFindContacts - - if (clipConcaveFacesAndFindContactsCPU) - { - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - worldNormalsAGPU.copyToHost(worldNormalsACPU); - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; - m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2CPU.resize(worldVertsB2GPU.size()); - - for (int i = 0; i < numConcavePairs; i++) - { - clipFacesAndFindContactsKernel(&concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - } - else - { - if (1) - { - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - - launcher.setConst(numConcavePairs); - int debugMode = 0; - launcher.setConst(debugMode); - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - //int bla = m_totalContactsOut.at(0); - } - } - //contactReduction - { - int newContactCapacity = nContacts + numConcavePairs; - contactOut->reserve(newContactCapacity); - if (reduceConcaveContactsOnGPU) - { - // printf("newReservation = %d\n",newReservation); - { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(m_concaveHasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst(numConcavePairs); - int num = numConcavePairs; - - launcher.launch1D(num); - } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - } - else - { - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray<b3Int4> triangleConvexPairsOutHost; - triangleConvexPairsOut.copyToHost(triangleConvexPairsOutHost); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - b3AlignedObjectArray<int> concaveHasSeparatingNormalsCPU; - m_concaveHasSeparatingNormals.copyToHost(concaveHasSeparatingNormalsCPU); - - b3AlignedObjectArray<b3Vector3> concaveSepNormalsHost; - m_concaveSepNormals.copyToHost(concaveSepNormalsHost); - - b3AlignedObjectArray<b3Contact4> hostContacts; - if (nContacts) - { - contactOut->copyToHost(hostContacts); - } - hostContacts.resize(newContactCapacity); - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - for (int i = 0; i < numConcavePairs; i++) - { - b3NewContactReductionKernel(&triangleConvexPairsOutHost.at(0), - &hostBodyBuf.at(0), - &concaveSepNormalsHost.at(0), - &concaveHasSeparatingNormalsCPU.at(0), - &hostContacts.at(0), - &clippingFacesOutCPU.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - numConcavePairs, - i); - } - - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - // nContacts = m_totalContactsOut.at(0); - //contactOut->resize(nContacts); - hostContacts.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContacts); - } - } - //re-use? - } - else - { - B3_PROFILE("clipHullHullConcaveConvexKernel"); - nContacts = m_totalContactsOut.at(0); - int newContactCapacity = contactOut->capacity(); - - //printf("contactOut5 = %d\n",nContacts); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(triangleConvexPairsOut.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_concaveSepNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipHullHullConcaveConvexKernel, "m_clipHullHullConcaveConvexKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(newContactCapacity); - launcher.setConst(numConcavePairs); - int num = numConcavePairs; - launcher.launch1D(num); - clFinish(m_queue); - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - //printf("contactOut6 = %d\n",nContacts); - b3AlignedObjectArray<b3Contact4> cpuContacts; - contactOut->copyToHost(cpuContacts); - } - // printf("nContacts after = %d\n", nContacts); - } //numConcavePairs - - //convex-convex contact clipping - - bool breakupKernel = false; - -#ifdef __APPLE__ - breakupKernel = true; -#endif - -#ifdef CHECK_ON_HOST - bool computeConvexConvex = false; -#else - bool computeConvexConvex = true; -#endif //CHECK_ON_HOST - if (computeConvexConvex) - { - B3_PROFILE("clipHullHullKernel"); - if (breakupKernel) - { - worldVertsB1GPU.resize(vertexFaceCapacity * nPairs); - clippingFacesOutGPU.resize(nPairs); - worldNormalsAGPU.resize(nPairs); - worldVertsA1GPU.resize(vertexFaceCapacity * nPairs); - worldVertsB2GPU.resize(vertexFaceCapacity * nPairs); - - if (findConvexClippingFacesGPU) - { - B3_PROFILE("findClippingFacesKernel"); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_findClippingFacesKernel, "m_findClippingFacesKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(nPairs); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - else - { - float minDist = -1e30f; - float maxDist = 0.02f; - - b3AlignedObjectArray<b3ConvexPolyhedronData> hostConvexData; - convexData.copyToHost(hostConvexData); - b3AlignedObjectArray<b3Collidable> hostCollidables; - gpuCollidables.copyToHost(hostCollidables); - - b3AlignedObjectArray<int> hostHasSepNormals; - m_hasSeparatingNormals.copyToHost(hostHasSepNormals); - b3AlignedObjectArray<b3Vector3> cpuSepNormals; - m_sepNormals.copyToHost(cpuSepNormals); - - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - - //worldVertsB1GPU.resize(vertexFaceCapacity*nPairs); - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - b3AlignedObjectArray<b3Int4> clippingFacesOutCPU; - clippingFacesOutGPU.copyToHost(clippingFacesOutCPU); - - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - worldNormalsACPU.resize(nPairs); - - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - worldVertsA1CPU.resize(worldVertsA1GPU.size()); - - b3AlignedObjectArray<b3Vector3> hostVertices; - gpuVertices.copyToHost(hostVertices); - b3AlignedObjectArray<b3GpuFace> hostFaces; - gpuFaces.copyToHost(hostFaces); - b3AlignedObjectArray<int> hostIndices; - gpuIndices.copyToHost(hostIndices); - - for (int i = 0; i < nPairs; i++) - { - int bodyIndexA = hostPairs[i].x; - int bodyIndexB = hostPairs[i].y; - - int collidableIndexA = hostBodyBuf[bodyIndexA].m_collidableIdx; - int collidableIndexB = hostBodyBuf[bodyIndexB].m_collidableIdx; - - int shapeIndexA = hostCollidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = hostCollidables[collidableIndexB].m_shapeIndex; - - if (hostHasSepNormals[i]) - { - b3FindClippingFaces(cpuSepNormals[i], - &hostConvexData[shapeIndexA], - &hostConvexData[shapeIndexB], - hostBodyBuf[bodyIndexA].m_pos, hostBodyBuf[bodyIndexA].m_quat, - hostBodyBuf[bodyIndexB].m_pos, hostBodyBuf[bodyIndexB].m_quat, - &worldVertsA1CPU.at(0), &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - vertexFaceCapacity, minDist, maxDist, - &hostVertices.at(0), &hostFaces.at(0), - &hostIndices.at(0), - &hostVertices.at(0), &hostFaces.at(0), - &hostIndices.at(0), &clippingFacesOutCPU.at(0), i); - } - } - - clippingFacesOutGPU.copyFromHost(clippingFacesOutCPU); - worldVertsA1GPU.copyFromHost(worldVertsA1CPU); - worldNormalsAGPU.copyFromHost(worldNormalsACPU); - worldVertsB1GPU.copyFromHost(worldVertsB1CPU); - } - - ///clip face B against face A, reduce contacts and append them to a global contact array - if (1) - { - if (clipConvexFacesAndFindContactsCPU) - { - //b3AlignedObjectArray<b3Int4> hostPairs; - //pairs->copyToHost(hostPairs); - - b3AlignedObjectArray<b3Vector3> hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray<int> hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - - b3AlignedObjectArray<b3Int4> hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2CPU.resize(vertexFaceCapacity * nPairs); - - b3AlignedObjectArray<b3Vector3> worldVertsA1CPU; - worldVertsA1GPU.copyToHost(worldVertsA1CPU); - b3AlignedObjectArray<b3Vector3> worldNormalsACPU; - worldNormalsAGPU.copyToHost(worldNormalsACPU); - - b3AlignedObjectArray<b3Vector3> worldVertsB1CPU; - worldVertsB1GPU.copyToHost(worldVertsB1CPU); - - /* - __global const b3Float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global b3Int4* clippingFacesOut, - __global b3Float4* worldVertsA1, - __global b3Float4* worldNormalsA1, - __global b3Float4* worldVertsB1, - __global b3Float4* worldVertsB2, - int vertexFaceCapacity, - int pairIndex - */ - for (int i = 0; i < nPairs; i++) - { - clipFacesAndFindContactsKernel( - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostClippingFaces.at(0), - &worldVertsA1CPU.at(0), - &worldNormalsACPU.at(0), - &worldVertsB1CPU.at(0), - &worldVertsB2CPU.at(0), - - vertexFaceCapacity, - i); - } - - clippingFacesOutGPU.copyFromHost(hostClippingFaces); - worldVertsB2GPU.copyFromHost(worldVertsB2CPU); - } - else - { - B3_PROFILE("clipFacesAndFindContacts"); - //nContacts = m_totalContactsOut.at(0); - //int h = m_hasSeparatingNormals.at(0); - //int4 p = clippingFacesOutGPU.at(0); - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsA1GPU.getBufferCL()), - b3BufferInfoCL(worldNormalsAGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB1GPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_clipFacesAndFindContacts, "m_clipFacesAndFindContacts"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - - launcher.setConst(nPairs); - int debugMode = 0; - launcher.setConst(debugMode); - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - } - - { - nContacts = m_totalContactsOut.at(0); - //printf("nContacts = %d\n",nContacts); - - int newContactCapacity = nContacts + nPairs; - contactOut->reserve(newContactCapacity); - - if (reduceConvexContactsOnGPU) - { - { - B3_PROFILE("newContactReductionKernel"); - b3BufferInfoCL bInfo[] = - { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(clippingFacesOutGPU.getBufferCL()), - b3BufferInfoCL(worldVertsB2GPU.getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_newContactReductionKernel, "m_newContactReductionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(vertexFaceCapacity); - launcher.setConst(newContactCapacity); - launcher.setConst(nPairs); - int num = nPairs; - - launcher.launch1D(num); - } - nContacts = m_totalContactsOut.at(0); - contactOut->resize(nContacts); - } - else - { - volatile int nGlobalContactsOut = nContacts; - b3AlignedObjectArray<b3Int4> hostPairs; - pairs->copyToHost(hostPairs); - b3AlignedObjectArray<b3RigidBodyData> hostBodyBuf; - bodyBuf->copyToHost(hostBodyBuf); - b3AlignedObjectArray<b3Vector3> hostSepNormals; - m_sepNormals.copyToHost(hostSepNormals); - b3AlignedObjectArray<int> hostHasSepAxis; - m_hasSeparatingNormals.copyToHost(hostHasSepAxis); - b3AlignedObjectArray<b3Contact4> hostContactsOut; - contactOut->copyToHost(hostContactsOut); - hostContactsOut.resize(newContactCapacity); - - b3AlignedObjectArray<b3Int4> hostClippingFaces; - clippingFacesOutGPU.copyToHost(hostClippingFaces); - b3AlignedObjectArray<b3Vector3> worldVertsB2CPU; - worldVertsB2GPU.copyToHost(worldVertsB2CPU); - - for (int i = 0; i < nPairs; i++) - { - b3NewContactReductionKernel(&hostPairs.at(0), - &hostBodyBuf.at(0), - &hostSepNormals.at(0), - &hostHasSepAxis.at(0), - &hostContactsOut.at(0), - &hostClippingFaces.at(0), - &worldVertsB2CPU.at(0), - &nGlobalContactsOut, - vertexFaceCapacity, - newContactCapacity, - nPairs, - i); - } - - nContacts = nGlobalContactsOut; - m_totalContactsOut.copyFromHostPointer(&nContacts, 1, 0, true); - hostContactsOut.resize(nContacts); - //printf("contactOut4 (after newContactReductionKernel) = %d\n",nContacts); - contactOut->copyFromHost(hostContactsOut); - } - // b3Contact4 pt = contactOut->at(0); - // printf("nContacts = %d\n",nContacts); - } - } - } - else //breakupKernel - { - if (nPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(pairs->getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(m_sepNormals.getBufferCL()), - b3BufferInfoCL(m_hasSeparatingNormals.getBufferCL()), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipHullHullKernel, "m_clipHullHullKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nPairs); - launcher.setConst(maxContactCapacity); - - int num = nPairs; - launcher.launch1D(num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts >= maxContactCapacity) - { - b3Error("Exceeded contact capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - } - } - - int nCompoundsPairs = m_gpuCompoundPairs.size(); - - if (nCompoundsPairs) - { - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_gpuCompoundPairs.getBufferCL(), true), - b3BufferInfoCL(bodyBuf->getBufferCL(), true), - b3BufferInfoCL(gpuCollidables.getBufferCL(), true), - b3BufferInfoCL(convexData.getBufferCL(), true), - b3BufferInfoCL(gpuVertices.getBufferCL(), true), - b3BufferInfoCL(gpuUniqueEdges.getBufferCL(), true), - b3BufferInfoCL(gpuFaces.getBufferCL(), true), - b3BufferInfoCL(gpuIndices.getBufferCL(), true), - b3BufferInfoCL(gpuChildShapes.getBufferCL(), true), - b3BufferInfoCL(m_gpuCompoundSepNormals.getBufferCL(), true), - b3BufferInfoCL(m_gpuHasCompoundSepNormals.getBufferCL(), true), - b3BufferInfoCL(contactOut->getBufferCL()), - b3BufferInfoCL(m_totalContactsOut.getBufferCL())}; - b3LauncherCL launcher(m_queue, m_clipCompoundsHullHullKernel, "m_clipCompoundsHullHullKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nCompoundsPairs); - launcher.setConst(maxContactCapacity); - - int num = nCompoundsPairs; - launcher.launch1D(num); - clFinish(m_queue); - - nContacts = m_totalContactsOut.at(0); - if (nContacts > maxContactCapacity) - { - b3Error("Error: contacts exceeds capacity (%d/%d)\n", nContacts, maxContactCapacity); - nContacts = maxContactCapacity; - } - contactOut->resize(nContacts); - } //if nCompoundsPairs - } - } //contactClippingOnGpu - - //printf("nContacts end = %d\n",nContacts); - - //printf("frameCount = %d\n",frameCount++); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h deleted file mode 100644 index 53e8c4ed4d..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h +++ /dev/null @@ -1,106 +0,0 @@ - -#ifndef _CONVEX_HULL_CONTACT_H -#define _CONVEX_HULL_CONTACT_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "b3OptimizedBvh.h" -#include "b3BvhInfo.h" -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" - -//#include "../../dynamics/basic_demo/Stubs/ChNarrowPhase.h" - -struct GpuSatCollision -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - cl_kernel m_findSeparatingAxisKernel; - cl_kernel m_mprPenetrationKernel; - cl_kernel m_findSeparatingAxisUnitSphereKernel; - - cl_kernel m_findSeparatingAxisVertexFaceKernel; - cl_kernel m_findSeparatingAxisEdgeEdgeKernel; - - cl_kernel m_findConcaveSeparatingAxisKernel; - cl_kernel m_findConcaveSeparatingAxisVertexFaceKernel; - cl_kernel m_findConcaveSeparatingAxisEdgeEdgeKernel; - - cl_kernel m_findCompoundPairsKernel; - cl_kernel m_processCompoundPairsKernel; - - cl_kernel m_clipHullHullKernel; - cl_kernel m_clipCompoundsHullHullKernel; - - cl_kernel m_clipFacesAndFindContacts; - cl_kernel m_findClippingFacesKernel; - - cl_kernel m_clipHullHullConcaveConvexKernel; - // cl_kernel m_extractManifoldAndAddContactKernel; - cl_kernel m_newContactReductionKernel; - - cl_kernel m_bvhTraversalKernel; - cl_kernel m_primitiveContactsKernel; - cl_kernel m_findConcaveSphereContactsKernel; - - cl_kernel m_processCompoundPairsPrimitivesKernel; - - b3OpenCLArray<b3Vector3> m_unitSphereDirections; - - b3OpenCLArray<int> m_totalContactsOut; - - b3OpenCLArray<b3Vector3> m_sepNormals; - b3OpenCLArray<float> m_dmins; - - b3OpenCLArray<int> m_hasSeparatingNormals; - b3OpenCLArray<b3Vector3> m_concaveSepNormals; - b3OpenCLArray<int> m_concaveHasSeparatingNormals; - b3OpenCLArray<int> m_numConcavePairsOut; - b3OpenCLArray<b3CompoundOverlappingPair> m_gpuCompoundPairs; - b3OpenCLArray<b3Vector3> m_gpuCompoundSepNormals; - b3OpenCLArray<int> m_gpuHasCompoundSepNormals; - b3OpenCLArray<int> m_numCompoundPairsOut; - - GpuSatCollision(cl_context ctx, cl_device_id device, cl_command_queue q); - virtual ~GpuSatCollision(); - - void computeConvexConvexContactsGPUSAT(b3OpenCLArray<b3Int4>* pairs, int nPairs, - const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactOut, int& nContacts, - const b3OpenCLArray<b3Contact4>* oldContacts, - int maxContactCapacity, - int compoundPairCapacity, - const b3OpenCLArray<b3ConvexPolyhedronData>& hostConvexData, - const b3OpenCLArray<b3Vector3>& vertices, - const b3OpenCLArray<b3Vector3>& uniqueEdges, - const b3OpenCLArray<b3GpuFace>& faces, - const b3OpenCLArray<int>& indices, - const b3OpenCLArray<b3Collidable>& gpuCollidables, - const b3OpenCLArray<b3GpuChildShape>& gpuChildShapes, - - const b3OpenCLArray<b3Aabb>& clAabbsWorldSpace, - const b3OpenCLArray<b3Aabb>& clAabbsLocalSpace, - - b3OpenCLArray<b3Vector3>& worldVertsB1GPU, - b3OpenCLArray<b3Int4>& clippingFacesOutGPU, - b3OpenCLArray<b3Vector3>& worldNormalsAGPU, - b3OpenCLArray<b3Vector3>& worldVertsA1GPU, - b3OpenCLArray<b3Vector3>& worldVertsB2GPU, - b3AlignedObjectArray<class b3OptimizedBvh*>& bvhData, - b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU, - b3OpenCLArray<b3BvhSubtreeInfo>* subTreesGPU, - b3OpenCLArray<b3BvhInfo>* bvhInfo, - int numObjects, - int maxTriConvexPairCapacity, - b3OpenCLArray<b3Int4>& triangleConvexPairs, - int& numTriConvexPairsOut); -}; - -#endif //_CONVEX_HULL_CONTACT_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h deleted file mode 100644 index c4cf700076..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3ConvexPolyhedronCL.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef CONVEX_POLYHEDRON_CL -#define CONVEX_POLYHEDRON_CL - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - -#endif //CONVEX_POLYHEDRON_CL diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp deleted file mode 100644 index 974b246f03..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.cpp +++ /dev/null @@ -1,1062 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the -use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software in a -product, an acknowledgment in the product documentation would be appreciated -but is not required. -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -GJK-EPA collision solver by Nathanael Presson, 2008 -*/ - -#include "b3GjkEpa.h" - -#include "b3SupportMappings.h" - -namespace gjkepa2_impl2 -{ -// Config - -/* GJK */ -#define GJK_MAX_ITERATIONS 128 -#define GJK_ACCURACY ((b3Scalar)0.0001) -#define GJK_MIN_DISTANCE ((b3Scalar)0.0001) -#define GJK_DUPLICATED_EPS ((b3Scalar)0.0001) -#define GJK_SIMPLEX2_EPS ((b3Scalar)0.0) -#define GJK_SIMPLEX3_EPS ((b3Scalar)0.0) -#define GJK_SIMPLEX4_EPS ((b3Scalar)0.0) - -/* EPA */ -#define EPA_MAX_VERTICES 64 -#define EPA_MAX_FACES (EPA_MAX_VERTICES * 2) -#define EPA_MAX_ITERATIONS 255 -#define EPA_ACCURACY ((b3Scalar)0.0001) -#define EPA_FALLBACK (10 * EPA_ACCURACY) -#define EPA_PLANE_EPS ((b3Scalar)0.00001) -#define EPA_INSIDE_EPS ((b3Scalar)0.01) - -// Shorthands - -// MinkowskiDiff -struct b3MinkowskiDiff -{ - const b3ConvexPolyhedronData* m_shapes[2]; - - b3Matrix3x3 m_toshape1; - b3Transform m_toshape0; - - bool m_enableMargin; - - void EnableMargin(bool enable) - { - m_enableMargin = enable; - } - inline b3Vector3 Support0(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA) const - { - if (m_enableMargin) - { - return localGetSupportVertexWithMargin(d, m_shapes[0], verticesA, 0.f); - } - else - { - return localGetSupportVertexWithoutMargin(d, m_shapes[0], verticesA); - } - } - inline b3Vector3 Support1(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesB) const - { - if (m_enableMargin) - { - return m_toshape0 * (localGetSupportVertexWithMargin(m_toshape1 * d, m_shapes[1], verticesB, 0.f)); - } - else - { - return m_toshape0 * (localGetSupportVertexWithoutMargin(m_toshape1 * d, m_shapes[1], verticesB)); - } - } - - inline b3Vector3 Support(const b3Vector3& d, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const - { - return (Support0(d, verticesA) - Support1(-d, verticesB)); - } - b3Vector3 Support(const b3Vector3& d, unsigned int index, const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) const - { - if (index) - return (Support1(d, verticesA)); - else - return (Support0(d, verticesB)); - } -}; - -typedef b3MinkowskiDiff tShape; - -// GJK -struct b3GJK -{ - /* Types */ - struct sSV - { - b3Vector3 d, w; - }; - struct sSimplex - { - sSV* c[4]; - b3Scalar p[4]; - unsigned int rank; - }; - struct eStatus - { - enum _ - { - Valid, - Inside, - Failed - }; - }; - /* Fields */ - tShape m_shape; - const b3AlignedObjectArray<b3Vector3>& m_verticesA; - const b3AlignedObjectArray<b3Vector3>& m_verticesB; - b3Vector3 m_ray; - b3Scalar m_distance; - sSimplex m_simplices[2]; - sSV m_store[4]; - sSV* m_free[4]; - unsigned int m_nfree; - unsigned int m_current; - sSimplex* m_simplex; - eStatus::_ m_status; - /* Methods */ - b3GJK(const b3AlignedObjectArray<b3Vector3>& verticesA, const b3AlignedObjectArray<b3Vector3>& verticesB) - : m_verticesA(verticesA), m_verticesB(verticesB) - { - Initialize(); - } - void Initialize() - { - m_ray = b3MakeVector3(0, 0, 0); - m_nfree = 0; - m_status = eStatus::Failed; - m_current = 0; - m_distance = 0; - } - eStatus::_ Evaluate(const tShape& shapearg, const b3Vector3& guess) - { - unsigned int iterations = 0; - b3Scalar sqdist = 0; - b3Scalar alpha = 0; - b3Vector3 lastw[4]; - unsigned int clastw = 0; - /* Initialize solver */ - m_free[0] = &m_store[0]; - m_free[1] = &m_store[1]; - m_free[2] = &m_store[2]; - m_free[3] = &m_store[3]; - m_nfree = 4; - m_current = 0; - m_status = eStatus::Valid; - m_shape = shapearg; - m_distance = 0; - /* Initialize simplex */ - m_simplices[0].rank = 0; - m_ray = guess; - const b3Scalar sqrl = m_ray.length2(); - appendvertice(m_simplices[0], sqrl > 0 ? -m_ray : b3MakeVector3(1, 0, 0)); - m_simplices[0].p[0] = 1; - m_ray = m_simplices[0].c[0]->w; - sqdist = sqrl; - lastw[0] = - lastw[1] = - lastw[2] = - lastw[3] = m_ray; - /* Loop */ - do - { - const unsigned int next = 1 - m_current; - sSimplex& cs = m_simplices[m_current]; - sSimplex& ns = m_simplices[next]; - /* Check zero */ - const b3Scalar rl = m_ray.length(); - if (rl < GJK_MIN_DISTANCE) - { /* Touching or inside */ - m_status = eStatus::Inside; - break; - } - /* Append new vertice in -'v' direction */ - appendvertice(cs, -m_ray); - const b3Vector3& w = cs.c[cs.rank - 1]->w; - bool found = false; - for (unsigned int i = 0; i < 4; ++i) - { - if ((w - lastw[i]).length2() < GJK_DUPLICATED_EPS) - { - found = true; - break; - } - } - if (found) - { /* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - else - { /* Update lastw */ - lastw[clastw = (clastw + 1) & 3] = w; - } - /* Check for termination */ - const b3Scalar omega = b3Dot(m_ray, w) / rl; - alpha = b3Max(omega, alpha); - if (((rl - alpha) - (GJK_ACCURACY * rl)) <= 0) - { /* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - /* Reduce simplex */ - b3Scalar weights[4]; - unsigned int mask = 0; - switch (cs.rank) - { - case 2: - sqdist = projectorigin(cs.c[0]->w, - cs.c[1]->w, - weights, mask); - break; - case 3: - sqdist = projectorigin(cs.c[0]->w, - cs.c[1]->w, - cs.c[2]->w, - weights, mask); - break; - case 4: - sqdist = projectorigin(cs.c[0]->w, - cs.c[1]->w, - cs.c[2]->w, - cs.c[3]->w, - weights, mask); - break; - } - if (sqdist >= 0) - { /* Valid */ - ns.rank = 0; - m_ray = b3MakeVector3(0, 0, 0); - m_current = next; - for (unsigned int i = 0, ni = cs.rank; i < ni; ++i) - { - if (mask & (1 << i)) - { - ns.c[ns.rank] = cs.c[i]; - ns.p[ns.rank++] = weights[i]; - m_ray += cs.c[i]->w * weights[i]; - } - else - { - m_free[m_nfree++] = cs.c[i]; - } - } - if (mask == 15) m_status = eStatus::Inside; - } - else - { /* Return old simplex */ - removevertice(m_simplices[m_current]); - break; - } - m_status = ((++iterations) < GJK_MAX_ITERATIONS) ? m_status : eStatus::Failed; - } while (m_status == eStatus::Valid); - m_simplex = &m_simplices[m_current]; - switch (m_status) - { - case eStatus::Valid: - m_distance = m_ray.length(); - break; - case eStatus::Inside: - m_distance = 0; - break; - default: - { - } - } - return (m_status); - } - bool EncloseOrigin() - { - switch (m_simplex->rank) - { - case 1: - { - for (unsigned int i = 0; i < 3; ++i) - { - b3Vector3 axis = b3MakeVector3(0, 0, 0); - axis[i] = 1; - appendvertice(*m_simplex, axis); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - appendvertice(*m_simplex, -axis); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - } - } - break; - case 2: - { - const b3Vector3 d = m_simplex->c[1]->w - m_simplex->c[0]->w; - for (unsigned int i = 0; i < 3; ++i) - { - b3Vector3 axis = b3MakeVector3(0, 0, 0); - axis[i] = 1; - const b3Vector3 p = b3Cross(d, axis); - if (p.length2() > 0) - { - appendvertice(*m_simplex, p); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - appendvertice(*m_simplex, -p); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - } - } - } - break; - case 3: - { - const b3Vector3 n = b3Cross(m_simplex->c[1]->w - m_simplex->c[0]->w, - m_simplex->c[2]->w - m_simplex->c[0]->w); - if (n.length2() > 0) - { - appendvertice(*m_simplex, n); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - appendvertice(*m_simplex, -n); - if (EncloseOrigin()) return (true); - removevertice(*m_simplex); - } - } - break; - case 4: - { - if (b3Fabs(det(m_simplex->c[0]->w - m_simplex->c[3]->w, - m_simplex->c[1]->w - m_simplex->c[3]->w, - m_simplex->c[2]->w - m_simplex->c[3]->w)) > 0) - return (true); - } - break; - } - return (false); - } - /* Internals */ - void getsupport(const b3Vector3& d, sSV& sv) const - { - sv.d = d / d.length(); - sv.w = m_shape.Support(sv.d, m_verticesA, m_verticesB); - } - void removevertice(sSimplex& simplex) - { - m_free[m_nfree++] = simplex.c[--simplex.rank]; - } - void appendvertice(sSimplex& simplex, const b3Vector3& v) - { - simplex.p[simplex.rank] = 0; - simplex.c[simplex.rank] = m_free[--m_nfree]; - getsupport(v, *simplex.c[simplex.rank++]); - } - static b3Scalar det(const b3Vector3& a, const b3Vector3& b, const b3Vector3& c) - { - return (a.y * b.z * c.x + a.z * b.x * c.y - - a.x * b.z * c.y - a.y * b.x * c.z + - a.x * b.y * c.z - a.z * b.y * c.x); - } - static b3Scalar projectorigin(const b3Vector3& a, - const b3Vector3& b, - b3Scalar* w, unsigned int& m) - { - const b3Vector3 d = b - a; - const b3Scalar l = d.length2(); - if (l > GJK_SIMPLEX2_EPS) - { - const b3Scalar t(l > 0 ? -b3Dot(a, d) / l : 0); - if (t >= 1) - { - w[0] = 0; - w[1] = 1; - m = 2; - return (b.length2()); - } - else if (t <= 0) - { - w[0] = 1; - w[1] = 0; - m = 1; - return (a.length2()); - } - else - { - w[0] = 1 - (w[1] = t); - m = 3; - return ((a + d * t).length2()); - } - } - return (-1); - } - static b3Scalar projectorigin(const b3Vector3& a, - const b3Vector3& b, - const b3Vector3& c, - b3Scalar* w, unsigned int& m) - { - static const unsigned int imd3[] = {1, 2, 0}; - const b3Vector3* vt[] = {&a, &b, &c}; - const b3Vector3 dl[] = {a - b, b - c, c - a}; - const b3Vector3 n = b3Cross(dl[0], dl[1]); - const b3Scalar l = n.length2(); - if (l > GJK_SIMPLEX3_EPS) - { - b3Scalar mindist = -1; - b3Scalar subw[2] = {0.f, 0.f}; - unsigned int subm(0); - for (unsigned int i = 0; i < 3; ++i) - { - if (b3Dot(*vt[i], b3Cross(dl[i], n)) > 0) - { - const unsigned int j = imd3[i]; - const b3Scalar subd(projectorigin(*vt[i], *vt[j], subw, subm)); - if ((mindist < 0) || (subd < mindist)) - { - mindist = subd; - m = static_cast<unsigned int>(((subm & 1) ? 1 << i : 0) + ((subm & 2) ? 1 << j : 0)); - w[i] = subw[0]; - w[j] = subw[1]; - w[imd3[j]] = 0; - } - } - } - if (mindist < 0) - { - const b3Scalar d = b3Dot(a, n); - const b3Scalar s = b3Sqrt(l); - const b3Vector3 p = n * (d / l); - mindist = p.length2(); - m = 7; - w[0] = (b3Cross(dl[1], b - p)).length() / s; - w[1] = (b3Cross(dl[2], c - p)).length() / s; - w[2] = 1 - (w[0] + w[1]); - } - return (mindist); - } - return (-1); - } - static b3Scalar projectorigin(const b3Vector3& a, - const b3Vector3& b, - const b3Vector3& c, - const b3Vector3& d, - b3Scalar* w, unsigned int& m) - { - static const unsigned int imd3[] = {1, 2, 0}; - const b3Vector3* vt[] = {&a, &b, &c, &d}; - const b3Vector3 dl[] = {a - d, b - d, c - d}; - const b3Scalar vl = det(dl[0], dl[1], dl[2]); - const bool ng = (vl * b3Dot(a, b3Cross(b - c, a - b))) <= 0; - if (ng && (b3Fabs(vl) > GJK_SIMPLEX4_EPS)) - { - b3Scalar mindist = -1; - b3Scalar subw[3] = {0.f, 0.f, 0.f}; - unsigned int subm(0); - for (unsigned int i = 0; i < 3; ++i) - { - const unsigned int j = imd3[i]; - const b3Scalar s = vl * b3Dot(d, b3Cross(dl[i], dl[j])); - if (s > 0) - { - const b3Scalar subd = projectorigin(*vt[i], *vt[j], d, subw, subm); - if ((mindist < 0) || (subd < mindist)) - { - mindist = subd; - m = static_cast<unsigned int>((subm & 1 ? 1 << i : 0) + - (subm & 2 ? 1 << j : 0) + - (subm & 4 ? 8 : 0)); - w[i] = subw[0]; - w[j] = subw[1]; - w[imd3[j]] = 0; - w[3] = subw[2]; - } - } - } - if (mindist < 0) - { - mindist = 0; - m = 15; - w[0] = det(c, b, d) / vl; - w[1] = det(a, c, d) / vl; - w[2] = det(b, a, d) / vl; - w[3] = 1 - (w[0] + w[1] + w[2]); - } - return (mindist); - } - return (-1); - } -}; - -// EPA -struct b3EPA -{ - /* Types */ - typedef b3GJK::sSV sSV; - struct sFace - { - b3Vector3 n; - b3Scalar d; - sSV* c[3]; - sFace* f[3]; - sFace* l[2]; - unsigned char e[3]; - unsigned char pass; - }; - struct sList - { - sFace* root; - unsigned int count; - sList() : root(0), count(0) {} - }; - struct sHorizon - { - sFace* cf; - sFace* ff; - unsigned int nf; - sHorizon() : cf(0), ff(0), nf(0) {} - }; - struct eStatus - { - enum _ - { - Valid, - Touching, - Degenerated, - NonConvex, - InvalidHull, - OutOfFaces, - OutOfVertices, - AccuraryReached, - FallBack, - Failed - }; - }; - /* Fields */ - eStatus::_ m_status; - b3GJK::sSimplex m_result; - b3Vector3 m_normal; - b3Scalar m_depth; - sSV m_sv_store[EPA_MAX_VERTICES]; - sFace m_fc_store[EPA_MAX_FACES]; - unsigned int m_nextsv; - sList m_hull; - sList m_stock; - /* Methods */ - b3EPA() - { - Initialize(); - } - - static inline void bind(sFace* fa, unsigned int ea, sFace* fb, unsigned int eb) - { - fa->e[ea] = (unsigned char)eb; - fa->f[ea] = fb; - fb->e[eb] = (unsigned char)ea; - fb->f[eb] = fa; - } - static inline void append(sList& list, sFace* face) - { - face->l[0] = 0; - face->l[1] = list.root; - if (list.root) list.root->l[0] = face; - list.root = face; - ++list.count; - } - static inline void remove(sList& list, sFace* face) - { - if (face->l[1]) face->l[1]->l[0] = face->l[0]; - if (face->l[0]) face->l[0]->l[1] = face->l[1]; - if (face == list.root) list.root = face->l[1]; - --list.count; - } - - void Initialize() - { - m_status = eStatus::Failed; - m_normal = b3MakeVector3(0, 0, 0); - m_depth = 0; - m_nextsv = 0; - for (unsigned int i = 0; i < EPA_MAX_FACES; ++i) - { - append(m_stock, &m_fc_store[EPA_MAX_FACES - i - 1]); - } - } - eStatus::_ Evaluate(b3GJK& gjk, const b3Vector3& guess) - { - b3GJK::sSimplex& simplex = *gjk.m_simplex; - if ((simplex.rank > 1) && gjk.EncloseOrigin()) - { - /* Clean up */ - while (m_hull.root) - { - sFace* f = m_hull.root; - remove(m_hull, f); - append(m_stock, f); - } - m_status = eStatus::Valid; - m_nextsv = 0; - /* Orient simplex */ - if (gjk.det(simplex.c[0]->w - simplex.c[3]->w, - simplex.c[1]->w - simplex.c[3]->w, - simplex.c[2]->w - simplex.c[3]->w) < 0) - { - b3Swap(simplex.c[0], simplex.c[1]); - b3Swap(simplex.p[0], simplex.p[1]); - } - /* Build initial hull */ - sFace* tetra[] = {newface(simplex.c[0], simplex.c[1], simplex.c[2], true), - newface(simplex.c[1], simplex.c[0], simplex.c[3], true), - newface(simplex.c[2], simplex.c[1], simplex.c[3], true), - newface(simplex.c[0], simplex.c[2], simplex.c[3], true)}; - if (m_hull.count == 4) - { - sFace* best = findbest(); - sFace outer = *best; - unsigned int pass = 0; - unsigned int iterations = 0; - bind(tetra[0], 0, tetra[1], 0); - bind(tetra[0], 1, tetra[2], 0); - bind(tetra[0], 2, tetra[3], 0); - bind(tetra[1], 1, tetra[3], 2); - bind(tetra[1], 2, tetra[2], 1); - bind(tetra[2], 2, tetra[3], 1); - m_status = eStatus::Valid; - for (; iterations < EPA_MAX_ITERATIONS; ++iterations) - { - if (m_nextsv < EPA_MAX_VERTICES) - { - sHorizon horizon; - sSV* w = &m_sv_store[m_nextsv++]; - bool valid = true; - best->pass = (unsigned char)(++pass); - gjk.getsupport(best->n, *w); - const b3Scalar wdist = b3Dot(best->n, w->w) - best->d; - if (wdist > EPA_ACCURACY) - { - for (unsigned int j = 0; (j < 3) && valid; ++j) - { - valid &= expand(pass, w, - best->f[j], best->e[j], - horizon); - } - if (valid && (horizon.nf >= 3)) - { - bind(horizon.cf, 1, horizon.ff, 2); - remove(m_hull, best); - append(m_stock, best); - best = findbest(); - outer = *best; - } - else - { - m_status = eStatus::Failed; - //m_status=eStatus::InvalidHull; - break; - } - } - else - { - m_status = eStatus::AccuraryReached; - break; - } - } - else - { - m_status = eStatus::OutOfVertices; - break; - } - } - const b3Vector3 projection = outer.n * outer.d; - m_normal = outer.n; - m_depth = outer.d; - m_result.rank = 3; - m_result.c[0] = outer.c[0]; - m_result.c[1] = outer.c[1]; - m_result.c[2] = outer.c[2]; - m_result.p[0] = b3Cross(outer.c[1]->w - projection, - outer.c[2]->w - projection) - .length(); - m_result.p[1] = b3Cross(outer.c[2]->w - projection, - outer.c[0]->w - projection) - .length(); - m_result.p[2] = b3Cross(outer.c[0]->w - projection, - outer.c[1]->w - projection) - .length(); - const b3Scalar sum = m_result.p[0] + m_result.p[1] + m_result.p[2]; - m_result.p[0] /= sum; - m_result.p[1] /= sum; - m_result.p[2] /= sum; - return (m_status); - } - } - /* Fallback */ - m_status = eStatus::FallBack; - m_normal = -guess; - const b3Scalar nl = m_normal.length(); - if (nl > 0) - m_normal = m_normal / nl; - else - m_normal = b3MakeVector3(1, 0, 0); - m_depth = 0; - m_result.rank = 1; - m_result.c[0] = simplex.c[0]; - m_result.p[0] = 1; - return (m_status); - } - bool getedgedist(sFace* face, sSV* a, sSV* b, b3Scalar& dist) - { - const b3Vector3 ba = b->w - a->w; - const b3Vector3 n_ab = b3Cross(ba, face->n); // Outward facing edge normal direction, on triangle plane - const b3Scalar a_dot_nab = b3Dot(a->w, n_ab); // Only care about the sign to determine inside/outside, so not normalization required - - if (a_dot_nab < 0) - { - // Outside of edge a->b - - const b3Scalar ba_l2 = ba.length2(); - const b3Scalar a_dot_ba = b3Dot(a->w, ba); - const b3Scalar b_dot_ba = b3Dot(b->w, ba); - - if (a_dot_ba > 0) - { - // Pick distance vertex a - dist = a->w.length(); - } - else if (b_dot_ba < 0) - { - // Pick distance vertex b - dist = b->w.length(); - } - else - { - // Pick distance to edge a->b - const b3Scalar a_dot_b = b3Dot(a->w, b->w); - dist = b3Sqrt(b3Max((a->w.length2() * b->w.length2() - a_dot_b * a_dot_b) / ba_l2, (b3Scalar)0)); - } - - return true; - } - - return false; - } - sFace* newface(sSV* a, sSV* b, sSV* c, bool forced) - { - if (m_stock.root) - { - sFace* face = m_stock.root; - remove(m_stock, face); - append(m_hull, face); - face->pass = 0; - face->c[0] = a; - face->c[1] = b; - face->c[2] = c; - face->n = b3Cross(b->w - a->w, c->w - a->w); - const b3Scalar l = face->n.length(); - const bool v = l > EPA_ACCURACY; - - if (v) - { - if (!(getedgedist(face, a, b, face->d) || - getedgedist(face, b, c, face->d) || - getedgedist(face, c, a, face->d))) - { - // Origin projects to the interior of the triangle - // Use distance to triangle plane - face->d = b3Dot(a->w, face->n) / l; - } - - face->n /= l; - if (forced || (face->d >= -EPA_PLANE_EPS)) - { - return face; - } - else - m_status = eStatus::NonConvex; - } - else - m_status = eStatus::Degenerated; - - remove(m_hull, face); - append(m_stock, face); - return 0; - } - m_status = m_stock.root ? eStatus::OutOfVertices : eStatus::OutOfFaces; - return 0; - } - sFace* findbest() - { - sFace* minf = m_hull.root; - b3Scalar mind = minf->d * minf->d; - for (sFace* f = minf->l[1]; f; f = f->l[1]) - { - const b3Scalar sqd = f->d * f->d; - if (sqd < mind) - { - minf = f; - mind = sqd; - } - } - return (minf); - } - bool expand(unsigned int pass, sSV* w, sFace* f, unsigned int e, sHorizon& horizon) - { - static const unsigned int i1m3[] = {1, 2, 0}; - static const unsigned int i2m3[] = {2, 0, 1}; - if (f->pass != pass) - { - const unsigned int e1 = i1m3[e]; - if ((b3Dot(f->n, w->w) - f->d) < -EPA_PLANE_EPS) - { - sFace* nf = newface(f->c[e1], f->c[e], w, false); - if (nf) - { - bind(nf, 0, f, e); - if (horizon.cf) - bind(horizon.cf, 1, nf, 2); - else - horizon.ff = nf; - horizon.cf = nf; - ++horizon.nf; - return (true); - } - } - else - { - const unsigned int e2 = i2m3[e]; - f->pass = (unsigned char)pass; - if (expand(pass, w, f->f[e1], f->e[e1], horizon) && - expand(pass, w, f->f[e2], f->e[e2], horizon)) - { - remove(m_hull, f); - append(m_stock, f); - return (true); - } - } - } - return (false); - } -}; - -// -static void Initialize(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - b3GjkEpaSolver2::sResults& results, - tShape& shape, - bool withmargins) -{ - /* Results */ - results.witnesses[0] = - results.witnesses[1] = b3MakeVector3(0, 0, 0); - results.status = b3GjkEpaSolver2::sResults::Separated; - /* Shape */ - shape.m_shapes[0] = hullA; - shape.m_shapes[1] = hullB; - shape.m_toshape1 = transB.getBasis().transposeTimes(transA.getBasis()); - shape.m_toshape0 = transA.inverseTimes(transB); - shape.EnableMargin(withmargins); -} - -} // namespace gjkepa2_impl2 - -// -// Api -// - -using namespace gjkepa2_impl2; - -// -int b3GjkEpaSolver2::StackSizeRequirement() -{ - return (sizeof(b3GJK) + sizeof(b3EPA)); -} - -// -bool b3GjkEpaSolver2::Distance(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results) -{ - tShape shape; - Initialize(transA, transB, hullA, hullB, verticesA, verticesB, results, shape, false); - b3GJK gjk(verticesA, verticesB); - b3GJK::eStatus::_ gjk_status = gjk.Evaluate(shape, guess); - if (gjk_status == b3GJK::eStatus::Valid) - { - b3Vector3 w0 = b3MakeVector3(0, 0, 0); - b3Vector3 w1 = b3MakeVector3(0, 0, 0); - for (unsigned int i = 0; i < gjk.m_simplex->rank; ++i) - { - const b3Scalar p = gjk.m_simplex->p[i]; - w0 += shape.Support(gjk.m_simplex->c[i]->d, 0, verticesA, verticesB) * p; - w1 += shape.Support(-gjk.m_simplex->c[i]->d, 1, verticesA, verticesB) * p; - } - results.witnesses[0] = transA * w0; - results.witnesses[1] = transA * w1; - results.normal = w0 - w1; - results.distance = results.normal.length(); - results.normal /= results.distance > GJK_MIN_DISTANCE ? results.distance : 1; - return (true); - } - else - { - results.status = gjk_status == b3GJK::eStatus::Inside ? sResults::Penetrating : sResults::GJK_Failed; - return (false); - } -} - -// -bool b3GjkEpaSolver2::Penetration(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results, - bool usemargins) -{ - tShape shape; - Initialize(transA, transB, hullA, hullB, verticesA, verticesB, results, shape, usemargins); - b3GJK gjk(verticesA, verticesB); - b3GJK::eStatus::_ gjk_status = gjk.Evaluate(shape, guess); - switch (gjk_status) - { - case b3GJK::eStatus::Inside: - { - b3EPA epa; - b3EPA::eStatus::_ epa_status = epa.Evaluate(gjk, -guess); - if (epa_status != b3EPA::eStatus::Failed) - { - b3Vector3 w0 = b3MakeVector3(0, 0, 0); - for (unsigned int i = 0; i < epa.m_result.rank; ++i) - { - w0 += shape.Support(epa.m_result.c[i]->d, 0, verticesA, verticesB) * epa.m_result.p[i]; - } - results.status = sResults::Penetrating; - results.witnesses[0] = transA * w0; - results.witnesses[1] = transA * (w0 - epa.m_normal * epa.m_depth); - results.normal = -epa.m_normal; - results.distance = -epa.m_depth; - return (true); - } - else - results.status = sResults::EPA_Failed; - } - break; - case b3GJK::eStatus::Failed: - results.status = sResults::GJK_Failed; - break; - default: - { - } - } - return (false); -} - -#if 0 -// -b3Scalar b3GjkEpaSolver2::SignedDistance(const b3Vector3& position, - b3Scalar margin, - const b3Transform& transA, - const b3ConvexPolyhedronData& hullA, - const b3AlignedObjectArray<b3Vector3>& verticesA, - sResults& results) -{ - tShape shape; - btSphereShape shape1(margin); - b3Transform wtrs1(b3Quaternion(0,0,0,1),position); - Initialize(shape0,wtrs0,&shape1,wtrs1,results,shape,false); - GJK gjk; - GJK::eStatus::_ gjk_status=gjk.Evaluate(shape,b3Vector3(1,1,1)); - if(gjk_status==GJK::eStatus::Valid) - { - b3Vector3 w0=b3Vector3(0,0,0); - b3Vector3 w1=b3Vector3(0,0,0); - for(unsigned int i=0;i<gjk.m_simplex->rank;++i) - { - const b3Scalar p=gjk.m_simplex->p[i]; - w0+=shape.Support( gjk.m_simplex->c[i]->d,0)*p; - w1+=shape.Support(-gjk.m_simplex->c[i]->d,1)*p; - } - results.witnesses[0] = wtrs0*w0; - results.witnesses[1] = wtrs0*w1; - const b3Vector3 delta= results.witnesses[1]- - results.witnesses[0]; - const b3Scalar margin= shape0->getMarginNonVirtual()+ - shape1.getMarginNonVirtual(); - const b3Scalar length= delta.length(); - results.normal = delta/length; - results.witnesses[0] += results.normal*margin; - return(length-margin); - } - else - { - if(gjk_status==GJK::eStatus::Inside) - { - if(Penetration(shape0,wtrs0,&shape1,wtrs1,gjk.m_ray,results)) - { - const b3Vector3 delta= results.witnesses[0]- - results.witnesses[1]; - const b3Scalar length= delta.length(); - if (length >= B3_EPSILON) - results.normal = delta/length; - return(-length); - } - } - } - return(B3_INFINITY); -} - -// -bool b3GjkEpaSolver2::SignedDistance(const btConvexShape* shape0, - const b3Transform& wtrs0, - const btConvexShape* shape1, - const b3Transform& wtrs1, - const b3Vector3& guess, - sResults& results) -{ - if(!Distance(shape0,wtrs0,shape1,wtrs1,guess,results)) - return(Penetration(shape0,wtrs0,shape1,wtrs1,guess,results,false)); - else - return(true); -} -#endif - -/* Symbols cleanup */ - -#undef GJK_MAX_ITERATIONS -#undef GJK_ACCURACY -#undef GJK_MIN_DISTANCE -#undef GJK_DUPLICATED_EPS -#undef GJK_SIMPLEX2_EPS -#undef GJK_SIMPLEX3_EPS -#undef GJK_SIMPLEX4_EPS - -#undef EPA_MAX_VERTICES -#undef EPA_MAX_FACES -#undef EPA_MAX_ITERATIONS -#undef EPA_ACCURACY -#undef EPA_FALLBACK -#undef EPA_PLANE_EPS -#undef EPA_INSIDE_EPS diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h deleted file mode 100644 index 7db32c6309..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3GjkEpa.h +++ /dev/null @@ -1,79 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2008 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the -use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it -freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not -claim that you wrote the original software. If you use this software in a -product, an acknowledgment in the product documentation would be appreciated -but is not required. -2. Altered source versions must be plainly marked as such, and must not be -misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -/* -GJK-EPA collision solver by Nathanael Presson, 2008 -*/ -#ifndef B3_GJK_EPA2_H -#define B3_GJK_EPA2_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" - -///btGjkEpaSolver contributed under zlib by Nathanael Presson -struct b3GjkEpaSolver2 -{ - struct sResults - { - enum eStatus - { - Separated, /* Shapes doesnt penetrate */ - Penetrating, /* Shapes are penetrating */ - GJK_Failed, /* GJK phase fail, no big issue, shapes are probably just 'touching' */ - EPA_Failed /* EPA phase fail, bigger problem, need to save parameters, and debug */ - } status; - b3Vector3 witnesses[2]; - b3Vector3 normal; - b3Scalar distance; - }; - - static int StackSizeRequirement(); - - static bool Distance(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results); - - static bool Penetration(const b3Transform& transA, const b3Transform& transB, - const b3ConvexPolyhedronData* hullA, const b3ConvexPolyhedronData* hullB, - const b3AlignedObjectArray<b3Vector3>& verticesA, - const b3AlignedObjectArray<b3Vector3>& verticesB, - const b3Vector3& guess, - sResults& results, - bool usemargins = true); -#if 0 -static b3Scalar SignedDistance( const b3Vector3& position, - b3Scalar margin, - const btConvexShape* shape, - const btTransform& wtrs, - sResults& results); - -static bool SignedDistance( const btConvexShape* shape0,const btTransform& wtrs0, - const btConvexShape* shape1,const btTransform& wtrs1, - const b3Vector3& guess, - sResults& results); -#endif -}; - -#endif //B3_GJK_EPA2_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp deleted file mode 100644 index 4938fa17af..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp +++ /dev/null @@ -1,363 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3OptimizedBvh.h" -#include "b3StridingMeshInterface.h" -#include "Bullet3Geometry/b3AabbUtil.h" - -b3OptimizedBvh::b3OptimizedBvh() -{ -} - -b3OptimizedBvh::~b3OptimizedBvh() -{ -} - -void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) -{ - m_useQuantization = useQuantizedAabbCompression; - - // NodeArray triangleNodes; - - struct NodeTriangleCallback : public b3InternalTriangleIndexCallback - { - NodeArray& m_triangleNodes; - - NodeTriangleCallback& operator=(NodeTriangleCallback& other) - { - m_triangleNodes.copyFromArray(other.m_triangleNodes); - return *this; - } - - NodeTriangleCallback(NodeArray& triangleNodes) - : m_triangleNodes(triangleNodes) - { - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) - { - b3OptimizedBvhNode node; - b3Vector3 aabbMin, aabbMax; - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangle[0]); - aabbMax.setMax(triangle[0]); - aabbMin.setMin(triangle[1]); - aabbMax.setMax(triangle[1]); - aabbMin.setMin(triangle[2]); - aabbMax.setMax(triangle[2]); - - //with quantization? - node.m_aabbMinOrg = aabbMin; - node.m_aabbMaxOrg = aabbMax; - - node.m_escapeIndex = -1; - - //for child nodes - node.m_subPart = partId; - node.m_triangleIndex = triangleIndex; - m_triangleNodes.push_back(node); - } - }; - struct QuantizedNodeTriangleCallback : public b3InternalTriangleIndexCallback - { - QuantizedNodeArray& m_triangleNodes; - const b3QuantizedBvh* m_optimizedTree; // for quantization - - QuantizedNodeTriangleCallback& operator=(QuantizedNodeTriangleCallback& other) - { - m_triangleNodes.copyFromArray(other.m_triangleNodes); - m_optimizedTree = other.m_optimizedTree; - return *this; - } - - QuantizedNodeTriangleCallback(QuantizedNodeArray& triangleNodes, const b3QuantizedBvh* tree) - : m_triangleNodes(triangleNodes), m_optimizedTree(tree) - { - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) - { - // The partId and triangle index must fit in the same (positive) integer - b3Assert(partId < (1 << MAX_NUM_PARTS_IN_BITS)); - b3Assert(triangleIndex < (1 << (31 - MAX_NUM_PARTS_IN_BITS))); - //negative indices are reserved for escapeIndex - b3Assert(triangleIndex >= 0); - - b3QuantizedBvhNode node; - b3Vector3 aabbMin, aabbMax; - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangle[0]); - aabbMax.setMax(triangle[0]); - aabbMin.setMin(triangle[1]); - aabbMax.setMax(triangle[1]); - aabbMin.setMin(triangle[2]); - aabbMax.setMax(triangle[2]); - - //PCK: add these checks for zero dimensions of aabb - const b3Scalar MIN_AABB_DIMENSION = b3Scalar(0.002); - const b3Scalar MIN_AABB_HALF_DIMENSION = b3Scalar(0.001); - if (aabbMax.getX() - aabbMin.getX() < MIN_AABB_DIMENSION) - { - aabbMax.setX(aabbMax.getX() + MIN_AABB_HALF_DIMENSION); - aabbMin.setX(aabbMin.getX() - MIN_AABB_HALF_DIMENSION); - } - if (aabbMax.getY() - aabbMin.getY() < MIN_AABB_DIMENSION) - { - aabbMax.setY(aabbMax.getY() + MIN_AABB_HALF_DIMENSION); - aabbMin.setY(aabbMin.getY() - MIN_AABB_HALF_DIMENSION); - } - if (aabbMax.getZ() - aabbMin.getZ() < MIN_AABB_DIMENSION) - { - aabbMax.setZ(aabbMax.getZ() + MIN_AABB_HALF_DIMENSION); - aabbMin.setZ(aabbMin.getZ() - MIN_AABB_HALF_DIMENSION); - } - - m_optimizedTree->quantize(&node.m_quantizedAabbMin[0], aabbMin, 0); - m_optimizedTree->quantize(&node.m_quantizedAabbMax[0], aabbMax, 1); - - node.m_escapeIndexOrTriangleIndex = (partId << (31 - MAX_NUM_PARTS_IN_BITS)) | triangleIndex; - - m_triangleNodes.push_back(node); - } - }; - - int numLeafNodes = 0; - - if (m_useQuantization) - { - //initialize quantization values - setQuantizationValues(bvhAabbMin, bvhAabbMax); - - QuantizedNodeTriangleCallback callback(m_quantizedLeafNodes, this); - - triangles->InternalProcessAllTriangles(&callback, m_bvhAabbMin, m_bvhAabbMax); - - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_quantizedLeafNodes.size(); - - m_quantizedContiguousNodes.resize(2 * numLeafNodes); - } - else - { - NodeTriangleCallback callback(m_leafNodes); - - b3Vector3 aabbMin = b3MakeVector3(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - b3Vector3 aabbMax = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - - triangles->InternalProcessAllTriangles(&callback, aabbMin, aabbMax); - - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_leafNodes.size(); - - m_contiguousNodes.resize(2 * numLeafNodes); - } - - m_curNodeIndex = 0; - - buildTree(0, numLeafNodes); - - ///if the entire tree is small then subtree size, we need to create a header info for the tree - if (m_useQuantization && !m_SubtreeHeaders.size()) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); - subtree.m_rootNodeIndex = 0; - subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary - m_quantizedLeafNodes.clear(); - m_leafNodes.clear(); -} - -void b3OptimizedBvh::refit(b3StridingMeshInterface* meshInterface, const b3Vector3& aabbMin, const b3Vector3& aabbMax) -{ - if (m_useQuantization) - { - setQuantizationValues(aabbMin, aabbMax); - - updateBvhNodes(meshInterface, 0, m_curNodeIndex, 0); - - ///now update all subtree headers - - int i; - for (i = 0; i < m_SubtreeHeaders.size(); i++) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); - } - } - else - { - } -} - -void b3OptimizedBvh::refitPartial(b3StridingMeshInterface* meshInterface, const b3Vector3& aabbMin, const b3Vector3& aabbMax) -{ - //incrementally initialize quantization values - b3Assert(m_useQuantization); - - b3Assert(aabbMin.getX() > m_bvhAabbMin.getX()); - b3Assert(aabbMin.getY() > m_bvhAabbMin.getY()); - b3Assert(aabbMin.getZ() > m_bvhAabbMin.getZ()); - - b3Assert(aabbMax.getX() < m_bvhAabbMax.getX()); - b3Assert(aabbMax.getY() < m_bvhAabbMax.getY()); - b3Assert(aabbMax.getZ() < m_bvhAabbMax.getZ()); - - ///we should update all quantization values, using updateBvhNodes(meshInterface); - ///but we only update chunks that overlap the given aabb - - unsigned short quantizedQueryAabbMin[3]; - unsigned short quantizedQueryAabbMax[3]; - - quantize(&quantizedQueryAabbMin[0], aabbMin, 0); - quantize(&quantizedQueryAabbMax[0], aabbMax, 1); - - int i; - for (i = 0; i < this->m_SubtreeHeaders.size(); i++) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax); - if (overlap != 0) - { - updateBvhNodes(meshInterface, subtree.m_rootNodeIndex, subtree.m_rootNodeIndex + subtree.m_subtreeSize, i); - - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[subtree.m_rootNodeIndex]); - } - } -} - -void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int firstNode, int endNode, int index) -{ - (void)index; - - b3Assert(m_useQuantization); - - int curNodeSubPart = -1; - - //get access info to trianglemesh data - const unsigned char* vertexbase = 0; - int numverts = 0; - PHY_ScalarType type = PHY_INTEGER; - int stride = 0; - const unsigned char* indexbase = 0; - int indexstride = 0; - int numfaces = 0; - PHY_ScalarType indicestype = PHY_INTEGER; - - b3Vector3 triangleVerts[3]; - b3Vector3 aabbMin, aabbMax; - const b3Vector3& meshScaling = meshInterface->getScaling(); - - int i; - for (i = endNode - 1; i >= firstNode; i--) - { - b3QuantizedBvhNode& curNode = m_quantizedContiguousNodes[i]; - if (curNode.isLeafNode()) - { - //recalc aabb from triangle data - int nodeSubPart = curNode.getPartId(); - int nodeTriangleIndex = curNode.getTriangleIndex(); - if (nodeSubPart != curNodeSubPart) - { - if (curNodeSubPart >= 0) - meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); - meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart); - - curNodeSubPart = nodeSubPart; - } - //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, - - unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride); - - for (int j = 2; j >= 0; j--) - { - int graphicsindex; - switch (indicestype) { - case PHY_INTEGER: graphicsindex = gfxbase[j]; break; - case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; - case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; - default: b3Assert(0); - } - if (type == PHY_FLOAT) - { - float* graphicsbase = (float*)(vertexbase + graphicsindex * stride); - triangleVerts[j] = b3MakeVector3( - graphicsbase[0] * meshScaling.getX(), - graphicsbase[1] * meshScaling.getY(), - graphicsbase[2] * meshScaling.getZ()); - } - else - { - double* graphicsbase = (double*)(vertexbase + graphicsindex * stride); - triangleVerts[j] = b3MakeVector3(b3Scalar(graphicsbase[0] * meshScaling.getX()), b3Scalar(graphicsbase[1] * meshScaling.getY()), b3Scalar(graphicsbase[2] * meshScaling.getZ())); - } - } - - aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMin.setMin(triangleVerts[0]); - aabbMax.setMax(triangleVerts[0]); - aabbMin.setMin(triangleVerts[1]); - aabbMax.setMax(triangleVerts[1]); - aabbMin.setMin(triangleVerts[2]); - aabbMax.setMax(triangleVerts[2]); - - quantize(&curNode.m_quantizedAabbMin[0], aabbMin, 0); - quantize(&curNode.m_quantizedAabbMax[0], aabbMax, 1); - } - else - { - //combine aabb from both children - - b3QuantizedBvhNode* leftChildNode = &m_quantizedContiguousNodes[i + 1]; - - b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? &m_quantizedContiguousNodes[i + 2] : &m_quantizedContiguousNodes[i + 1 + leftChildNode->getEscapeIndex()]; - - { - for (int i = 0; i < 3; i++) - { - curNode.m_quantizedAabbMin[i] = leftChildNode->m_quantizedAabbMin[i]; - if (curNode.m_quantizedAabbMin[i] > rightChildNode->m_quantizedAabbMin[i]) - curNode.m_quantizedAabbMin[i] = rightChildNode->m_quantizedAabbMin[i]; - - curNode.m_quantizedAabbMax[i] = leftChildNode->m_quantizedAabbMax[i]; - if (curNode.m_quantizedAabbMax[i] < rightChildNode->m_quantizedAabbMax[i]) - curNode.m_quantizedAabbMax[i] = rightChildNode->m_quantizedAabbMax[i]; - } - } - } - } - - if (curNodeSubPart >= 0) - meshInterface->unLockReadOnlyVertexBase(curNodeSubPart); -} - -///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' -b3OptimizedBvh* b3OptimizedBvh::deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) -{ - b3QuantizedBvh* bvh = b3QuantizedBvh::deSerializeInPlace(i_alignedDataBuffer, i_dataBufferSize, i_swapEndian); - - //we don't add additional data so just do a static upcast - return static_cast<b3OptimizedBvh*>(bvh); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h deleted file mode 100644 index 1286552939..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h +++ /dev/null @@ -1,56 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -///Contains contributions from Disney Studio's - -#ifndef B3_OPTIMIZED_BVH_H -#define B3_OPTIMIZED_BVH_H - -#include "b3QuantizedBvh.h" - -class b3StridingMeshInterface; - -///The b3OptimizedBvh extends the b3QuantizedBvh to create AABB tree for triangle meshes, through the b3StridingMeshInterface. -B3_ATTRIBUTE_ALIGNED16(class) -b3OptimizedBvh : public b3QuantizedBvh -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - -protected: -public: - b3OptimizedBvh(); - - virtual ~b3OptimizedBvh(); - - void build(b3StridingMeshInterface * triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax); - - void refit(b3StridingMeshInterface * triangles, const b3Vector3& aabbMin, const b3Vector3& aabbMax); - - void refitPartial(b3StridingMeshInterface * triangles, const b3Vector3& aabbMin, const b3Vector3& aabbMax); - - void updateBvhNodes(b3StridingMeshInterface * meshInterface, int firstNode, int endNode, int index); - - /// Data buffer MUST be 16 byte aligned - virtual bool serializeInPlace(void* o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const - { - return b3QuantizedBvh::serialize(o_alignedDataBuffer, i_dataBufferSize, i_swapEndian); - } - - ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' - static b3OptimizedBvh* deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); -}; - -#endif //B3_OPTIMIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp deleted file mode 100644 index 9a448495f3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.cpp +++ /dev/null @@ -1,1254 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3QuantizedBvh.h" - -#include "Bullet3Geometry/b3AabbUtil.h" - -#define RAYAABB2 - -b3QuantizedBvh::b3QuantizedBvh() : m_bulletVersion(B3_BULLET_VERSION), - m_useQuantization(false), - m_traversalMode(TRAVERSAL_STACKLESS_CACHE_FRIENDLY) - //m_traversalMode(TRAVERSAL_STACKLESS) - //m_traversalMode(TRAVERSAL_RECURSIVE) - , - m_subtreeHeaderCount(0) //PCK: add this line -{ - m_bvhAabbMin.setValue(-B3_INFINITY, -B3_INFINITY, -B3_INFINITY); - m_bvhAabbMax.setValue(B3_INFINITY, B3_INFINITY, B3_INFINITY); -} - -void b3QuantizedBvh::buildInternal() -{ - ///assumes that caller filled in the m_quantizedLeafNodes - m_useQuantization = true; - int numLeafNodes = 0; - - if (m_useQuantization) - { - //now we have an array of leafnodes in m_leafNodes - numLeafNodes = m_quantizedLeafNodes.size(); - - m_quantizedContiguousNodes.resize(2 * numLeafNodes); - } - - m_curNodeIndex = 0; - - buildTree(0, numLeafNodes); - - ///if the entire tree is small then subtree size, we need to create a header info for the tree - if (m_useQuantization && !m_SubtreeHeaders.size()) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(m_quantizedContiguousNodes[0]); - subtree.m_rootNodeIndex = 0; - subtree.m_subtreeSize = m_quantizedContiguousNodes[0].isLeafNode() ? 1 : m_quantizedContiguousNodes[0].getEscapeIndex(); - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - //PCK: clear m_quantizedLeafNodes and m_leafNodes, they are temporary - m_quantizedLeafNodes.clear(); - m_leafNodes.clear(); -} - -///just for debugging, to visualize the individual patches/subtrees -#ifdef DEBUG_PATCH_COLORS -b3Vector3 color[4] = - { - b3Vector3(1, 0, 0), - b3Vector3(0, 1, 0), - b3Vector3(0, 0, 1), - b3Vector3(0, 1, 1)}; -#endif //DEBUG_PATCH_COLORS - -void b3QuantizedBvh::setQuantizationValues(const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax, b3Scalar quantizationMargin) -{ - //enlarge the AABB to avoid division by zero when initializing the quantization values - b3Vector3 clampValue = b3MakeVector3(quantizationMargin, quantizationMargin, quantizationMargin); - m_bvhAabbMin = bvhAabbMin - clampValue; - m_bvhAabbMax = bvhAabbMax + clampValue; - b3Vector3 aabbSize = m_bvhAabbMax - m_bvhAabbMin; - m_bvhQuantization = b3MakeVector3(b3Scalar(65533.0), b3Scalar(65533.0), b3Scalar(65533.0)) / aabbSize; - m_useQuantization = true; -} - -b3QuantizedBvh::~b3QuantizedBvh() -{ -} - -#ifdef DEBUG_TREE_BUILDING -int gStackDepth = 0; -int gMaxStackDepth = 0; -#endif //DEBUG_TREE_BUILDING - -void b3QuantizedBvh::buildTree(int startIndex, int endIndex) -{ -#ifdef DEBUG_TREE_BUILDING - gStackDepth++; - if (gStackDepth > gMaxStackDepth) - gMaxStackDepth = gStackDepth; -#endif //DEBUG_TREE_BUILDING - - int splitAxis, splitIndex, i; - int numIndices = endIndex - startIndex; - int curIndex = m_curNodeIndex; - - b3Assert(numIndices > 0); - - if (numIndices == 1) - { -#ifdef DEBUG_TREE_BUILDING - gStackDepth--; -#endif //DEBUG_TREE_BUILDING - - assignInternalNodeFromLeafNode(m_curNodeIndex, startIndex); - - m_curNodeIndex++; - return; - } - //calculate Best Splitting Axis and where to split it. Sort the incoming 'leafNodes' array within range 'startIndex/endIndex'. - - splitAxis = calcSplittingAxis(startIndex, endIndex); - - splitIndex = sortAndCalcSplittingIndex(startIndex, endIndex, splitAxis); - - int internalNodeIndex = m_curNodeIndex; - - //set the min aabb to 'inf' or a max value, and set the max aabb to a -inf/minimum value. - //the aabb will be expanded during buildTree/mergeInternalNodeAabb with actual node values - setInternalNodeAabbMin(m_curNodeIndex, m_bvhAabbMax); //can't use b3Vector3(B3_INFINITY,B3_INFINITY,B3_INFINITY)) because of quantization - setInternalNodeAabbMax(m_curNodeIndex, m_bvhAabbMin); //can't use b3Vector3(-B3_INFINITY,-B3_INFINITY,-B3_INFINITY)) because of quantization - - for (i = startIndex; i < endIndex; i++) - { - mergeInternalNodeAabb(m_curNodeIndex, getAabbMin(i), getAabbMax(i)); - } - - m_curNodeIndex++; - - //internalNode->m_escapeIndex; - - int leftChildNodexIndex = m_curNodeIndex; - - //build left child tree - buildTree(startIndex, splitIndex); - - int rightChildNodexIndex = m_curNodeIndex; - //build right child tree - buildTree(splitIndex, endIndex); - -#ifdef DEBUG_TREE_BUILDING - gStackDepth--; -#endif //DEBUG_TREE_BUILDING - - int escapeIndex = m_curNodeIndex - curIndex; - - if (m_useQuantization) - { - //escapeIndex is the number of nodes of this subtree - const int sizeQuantizedNode = sizeof(b3QuantizedBvhNode); - const int treeSizeInBytes = escapeIndex * sizeQuantizedNode; - if (treeSizeInBytes > MAX_SUBTREE_SIZE_IN_BYTES) - { - updateSubtreeHeaders(leftChildNodexIndex, rightChildNodexIndex); - } - } - else - { - } - - setInternalNodeEscapeIndex(internalNodeIndex, escapeIndex); -} - -void b3QuantizedBvh::updateSubtreeHeaders(int leftChildNodexIndex, int rightChildNodexIndex) -{ - b3Assert(m_useQuantization); - - b3QuantizedBvhNode& leftChildNode = m_quantizedContiguousNodes[leftChildNodexIndex]; - int leftSubTreeSize = leftChildNode.isLeafNode() ? 1 : leftChildNode.getEscapeIndex(); - int leftSubTreeSizeInBytes = leftSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); - - b3QuantizedBvhNode& rightChildNode = m_quantizedContiguousNodes[rightChildNodexIndex]; - int rightSubTreeSize = rightChildNode.isLeafNode() ? 1 : rightChildNode.getEscapeIndex(); - int rightSubTreeSizeInBytes = rightSubTreeSize * static_cast<int>(sizeof(b3QuantizedBvhNode)); - - if (leftSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(leftChildNode); - subtree.m_rootNodeIndex = leftChildNodexIndex; - subtree.m_subtreeSize = leftSubTreeSize; - } - - if (rightSubTreeSizeInBytes <= MAX_SUBTREE_SIZE_IN_BYTES) - { - b3BvhSubtreeInfo& subtree = m_SubtreeHeaders.expand(); - subtree.setAabbFromQuantizeNode(rightChildNode); - subtree.m_rootNodeIndex = rightChildNodexIndex; - subtree.m_subtreeSize = rightSubTreeSize; - } - - //PCK: update the copy of the size - m_subtreeHeaderCount = m_SubtreeHeaders.size(); -} - -int b3QuantizedBvh::sortAndCalcSplittingIndex(int startIndex, int endIndex, int splitAxis) -{ - int i; - int splitIndex = startIndex; - int numIndices = endIndex - startIndex; - b3Scalar splitValue; - - b3Vector3 means = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - means += center; - } - means *= (b3Scalar(1.) / (b3Scalar)numIndices); - - splitValue = means[splitAxis]; - - //sort leafNodes so all values larger then splitValue comes first, and smaller values start from 'splitIndex'. - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - if (center[splitAxis] > splitValue) - { - //swap - swapLeafNodes(i, splitIndex); - splitIndex++; - } - } - - //if the splitIndex causes unbalanced trees, fix this by using the center in between startIndex and endIndex - //otherwise the tree-building might fail due to stack-overflows in certain cases. - //unbalanced1 is unsafe: it can cause stack overflows - //bool unbalanced1 = ((splitIndex==startIndex) || (splitIndex == (endIndex-1))); - - //unbalanced2 should work too: always use center (perfect balanced trees) - //bool unbalanced2 = true; - - //this should be safe too: - int rangeBalancedIndices = numIndices / 3; - bool unbalanced = ((splitIndex <= (startIndex + rangeBalancedIndices)) || (splitIndex >= (endIndex - 1 - rangeBalancedIndices))); - - if (unbalanced) - { - splitIndex = startIndex + (numIndices >> 1); - } - - bool unbal = (splitIndex == startIndex) || (splitIndex == (endIndex)); - (void)unbal; - b3Assert(!unbal); - - return splitIndex; -} - -int b3QuantizedBvh::calcSplittingAxis(int startIndex, int endIndex) -{ - int i; - - b3Vector3 means = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - b3Vector3 variance = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - int numIndices = endIndex - startIndex; - - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - means += center; - } - means *= (b3Scalar(1.) / (b3Scalar)numIndices); - - for (i = startIndex; i < endIndex; i++) - { - b3Vector3 center = b3Scalar(0.5) * (getAabbMax(i) + getAabbMin(i)); - b3Vector3 diff2 = center - means; - diff2 = diff2 * diff2; - variance += diff2; - } - variance *= (b3Scalar(1.) / ((b3Scalar)numIndices - 1)); - - return variance.maxAxis(); -} - -void b3QuantizedBvh::reportAabbOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - //either choose recursive traversal (walkTree) or stackless (walkStacklessTree) - - if (m_useQuantization) - { - ///quantize query AABB - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin, aabbMin, 0); - quantizeWithClamp(quantizedQueryAabbMax, aabbMax, 1); - - switch (m_traversalMode) - { - case TRAVERSAL_STACKLESS: - walkStacklessQuantizedTree(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, 0, m_curNodeIndex); - break; - case TRAVERSAL_STACKLESS_CACHE_FRIENDLY: - walkStacklessQuantizedTreeCacheFriendly(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - break; - case TRAVERSAL_RECURSIVE: - { - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[0]; - walkRecursiveQuantizedTreeAgainstQueryAabb(rootNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - } - break; - default: - //unsupported - b3Assert(0); - } - } - else - { - walkStacklessTree(nodeCallback, aabbMin, aabbMax); - } -} - -static int b3s_maxIterations = 0; - -void b3QuantizedBvh::walkStacklessTree(b3NodeOverlapCallback* nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - b3Assert(!m_useQuantization); - - const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; - int escapeIndex, curIndex = 0; - int walkIterations = 0; - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - while (curIndex < m_curNodeIndex) - { - //catch bugs in tree data - b3Assert(walkIterations < m_curNodeIndex); - - walkIterations++; - aabbOverlap = b3TestAabbAgainstAabb2(aabbMin, aabbMax, rootNode->m_aabbMinOrg, rootNode->m_aabbMaxOrg); - isLeafNode = rootNode->m_escapeIndex == -1; - - //PCK: unsigned instead of bool - if (isLeafNode && (aabbOverlap != 0)) - { - nodeCallback->processNode(rootNode->m_subPart, rootNode->m_triangleIndex); - } - - //PCK: unsigned instead of bool - if ((aabbOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->m_escapeIndex; - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -/* -///this was the original recursive traversal, before we optimized towards stackless traversal -void b3QuantizedBvh::walkTree(b3OptimizedBvhNode* rootNode,b3NodeOverlapCallback* nodeCallback,const b3Vector3& aabbMin,const b3Vector3& aabbMax) const -{ - bool isLeafNode, aabbOverlap = TestAabbAgainstAabb2(aabbMin,aabbMax,rootNode->m_aabbMin,rootNode->m_aabbMax); - if (aabbOverlap) - { - isLeafNode = (!rootNode->m_leftChild && !rootNode->m_rightChild); - if (isLeafNode) - { - nodeCallback->processNode(rootNode); - } else - { - walkTree(rootNode->m_leftChild,nodeCallback,aabbMin,aabbMax); - walkTree(rootNode->m_rightChild,nodeCallback,aabbMin,aabbMax); - } - } - -} -*/ - -void b3QuantizedBvh::walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode, b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const -{ - b3Assert(m_useQuantization); - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - //PCK: unsigned instead of bool - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, currentNode->m_quantizedAabbMin, currentNode->m_quantizedAabbMax); - isLeafNode = currentNode->isLeafNode(); - - //PCK: unsigned instead of bool - if (aabbOverlap != 0) - { - if (isLeafNode) - { - nodeCallback->processNode(currentNode->getPartId(), currentNode->getTriangleIndex()); - } - else - { - //process left and right children - const b3QuantizedBvhNode* leftChildNode = currentNode + 1; - walkRecursiveQuantizedTreeAgainstQueryAabb(leftChildNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - - const b3QuantizedBvhNode* rightChildNode = leftChildNode->isLeafNode() ? leftChildNode + 1 : leftChildNode + leftChildNode->getEscapeIndex(); - walkRecursiveQuantizedTreeAgainstQueryAabb(rightChildNode, nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax); - } - } -} - -void b3QuantizedBvh::walkStacklessTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const -{ - b3Assert(!m_useQuantization); - - const b3OptimizedBvhNode* rootNode = &m_contiguousNodes[0]; - int escapeIndex, curIndex = 0; - int walkIterations = 0; - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap = 0; - unsigned rayBoxOverlap = 0; - b3Scalar lambda_max = 1.0; - - /* Quick pruning by quantized box */ - b3Vector3 rayAabbMin = raySource; - b3Vector3 rayAabbMax = raySource; - rayAabbMin.setMin(rayTarget); - rayAabbMax.setMax(rayTarget); - - /* Add box cast extents to bounding box */ - rayAabbMin += aabbMin; - rayAabbMax += aabbMax; - -#ifdef RAYAABB2 - b3Vector3 rayDir = (rayTarget - raySource); - rayDir.normalize(); - lambda_max = rayDir.dot(rayTarget - raySource); - ///what about division by zero? --> just set rayDirection[i] to 1.0 - b3Vector3 rayDirectionInverse; - rayDirectionInverse[0] = rayDir[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[0]; - rayDirectionInverse[1] = rayDir[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[1]; - rayDirectionInverse[2] = rayDir[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDir[2]; - unsigned int sign[3] = {rayDirectionInverse[0] < 0.0, rayDirectionInverse[1] < 0.0, rayDirectionInverse[2] < 0.0}; -#endif - - b3Vector3 bounds[2]; - - while (curIndex < m_curNodeIndex) - { - b3Scalar param = 1.0; - //catch bugs in tree data - b3Assert(walkIterations < m_curNodeIndex); - - walkIterations++; - - bounds[0] = rootNode->m_aabbMinOrg; - bounds[1] = rootNode->m_aabbMaxOrg; - /* Add box cast extents */ - bounds[0] -= aabbMax; - bounds[1] -= aabbMin; - - aabbOverlap = b3TestAabbAgainstAabb2(rayAabbMin, rayAabbMax, rootNode->m_aabbMinOrg, rootNode->m_aabbMaxOrg); - //perhaps profile if it is worth doing the aabbOverlap test first - -#ifdef RAYAABB2 - ///careful with this check: need to check division by zero (above) and fix the unQuantize method - ///thanks Joerg/hiker for the reproduction case! - ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 - rayBoxOverlap = aabbOverlap ? b3RayAabb2(raySource, rayDirectionInverse, sign, bounds, param, 0.0f, lambda_max) : false; - -#else - b3Vector3 normal; - rayBoxOverlap = b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); -#endif - - isLeafNode = rootNode->m_escapeIndex == -1; - - //PCK: unsigned instead of bool - if (isLeafNode && (rayBoxOverlap != 0)) - { - nodeCallback->processNode(rootNode->m_subPart, rootNode->m_triangleIndex); - } - - //PCK: unsigned instead of bool - if ((rayBoxOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->m_escapeIndex; - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -void b3QuantizedBvh::walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const -{ - b3Assert(m_useQuantization); - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - (void)subTreeSize; - - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; - int escapeIndex; - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned boxBoxOverlap = 0; - unsigned rayBoxOverlap = 0; - - b3Scalar lambda_max = 1.0; - -#ifdef RAYAABB2 - b3Vector3 rayDirection = (rayTarget - raySource); - rayDirection.normalize(); - lambda_max = rayDirection.dot(rayTarget - raySource); - ///what about division by zero? --> just set rayDirection[i] to 1.0 - rayDirection[0] = rayDirection[0] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[0]; - rayDirection[1] = rayDirection[1] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[1]; - rayDirection[2] = rayDirection[2] == b3Scalar(0.0) ? b3Scalar(B3_LARGE_FLOAT) : b3Scalar(1.0) / rayDirection[2]; - unsigned int sign[3] = {rayDirection[0] < 0.0, rayDirection[1] < 0.0, rayDirection[2] < 0.0}; -#endif - - /* Quick pruning by quantized box */ - b3Vector3 rayAabbMin = raySource; - b3Vector3 rayAabbMax = raySource; - rayAabbMin.setMin(rayTarget); - rayAabbMax.setMax(rayTarget); - - /* Add box cast extents to bounding box */ - rayAabbMin += aabbMin; - rayAabbMax += aabbMax; - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin, rayAabbMin, 0); - quantizeWithClamp(quantizedQueryAabbMax, rayAabbMax, 1); - - while (curIndex < endNodeIndex) - { -//#define VISUALLY_ANALYZE_BVH 1 -#ifdef VISUALLY_ANALYZE_BVH - //some code snippet to debugDraw aabb, to visually analyze bvh structure - static int drawPatch = 0; - //need some global access to a debugDrawer - extern b3IDebugDraw* debugDrawerPtr; - if (curIndex == drawPatch) - { - b3Vector3 aabbMin, aabbMax; - aabbMin = unQuantize(rootNode->m_quantizedAabbMin); - aabbMax = unQuantize(rootNode->m_quantizedAabbMax); - b3Vector3 color(1, 0, 0); - debugDrawerPtr->drawAabb(aabbMin, aabbMax, color); - } -#endif //VISUALLY_ANALYZE_BVH - - //catch bugs in tree data - b3Assert(walkIterations < subTreeSize); - - walkIterations++; - //PCK: unsigned instead of bool - // only interested if this is closer than any previous hit - b3Scalar param = 1.0; - rayBoxOverlap = 0; - boxBoxOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode->m_quantizedAabbMin, rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - if (boxBoxOverlap) - { - b3Vector3 bounds[2]; - bounds[0] = unQuantize(rootNode->m_quantizedAabbMin); - bounds[1] = unQuantize(rootNode->m_quantizedAabbMax); - /* Add box cast extents */ - bounds[0] -= aabbMax; - bounds[1] -= aabbMin; -#if 0 - b3Vector3 normal; - bool ra2 = b3RayAabb2 (raySource, rayDirection, sign, bounds, param, 0.0, lambda_max); - bool ra = b3RayAabb (raySource, rayTarget, bounds[0], bounds[1], param, normal); - if (ra2 != ra) - { - printf("functions don't match\n"); - } -#endif -#ifdef RAYAABB2 - ///careful with this check: need to check division by zero (above) and fix the unQuantize method - ///thanks Joerg/hiker for the reproduction case! - ///http://www.bulletphysics.com/Bullet/phpBB3/viewtopic.php?f=9&t=1858 - - //B3_PROFILE("b3RayAabb2"); - rayBoxOverlap = b3RayAabb2(raySource, rayDirection, sign, bounds, param, 0.0f, lambda_max); - -#else - rayBoxOverlap = true; //b3RayAabb(raySource, rayTarget, bounds[0], bounds[1], param, normal); -#endif - } - - if (isLeafNode && rayBoxOverlap) - { - nodeCallback->processNode(rootNode->getPartId(), rootNode->getTriangleIndex()); - } - - //PCK: unsigned instead of bool - if ((rayBoxOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -void b3QuantizedBvh::walkStacklessQuantizedTree(b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const -{ - b3Assert(m_useQuantization); - - int curIndex = startNodeIndex; - int walkIterations = 0; - int subTreeSize = endNodeIndex - startNodeIndex; - (void)subTreeSize; - - const b3QuantizedBvhNode* rootNode = &m_quantizedContiguousNodes[startNodeIndex]; - int escapeIndex; - - bool isLeafNode; - //PCK: unsigned instead of bool - unsigned aabbOverlap; - - while (curIndex < endNodeIndex) - { -//#define VISUALLY_ANALYZE_BVH 1 -#ifdef VISUALLY_ANALYZE_BVH - //some code snippet to debugDraw aabb, to visually analyze bvh structure - static int drawPatch = 0; - //need some global access to a debugDrawer - extern b3IDebugDraw* debugDrawerPtr; - if (curIndex == drawPatch) - { - b3Vector3 aabbMin, aabbMax; - aabbMin = unQuantize(rootNode->m_quantizedAabbMin); - aabbMax = unQuantize(rootNode->m_quantizedAabbMax); - b3Vector3 color(1, 0, 0); - debugDrawerPtr->drawAabb(aabbMin, aabbMax, color); - } -#endif //VISUALLY_ANALYZE_BVH - - //catch bugs in tree data - b3Assert(walkIterations < subTreeSize); - - walkIterations++; - //PCK: unsigned instead of bool - aabbOverlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, rootNode->m_quantizedAabbMin, rootNode->m_quantizedAabbMax); - isLeafNode = rootNode->isLeafNode(); - - if (isLeafNode && aabbOverlap) - { - nodeCallback->processNode(rootNode->getPartId(), rootNode->getTriangleIndex()); - } - - //PCK: unsigned instead of bool - if ((aabbOverlap != 0) || isLeafNode) - { - rootNode++; - curIndex++; - } - else - { - escapeIndex = rootNode->getEscapeIndex(); - rootNode += escapeIndex; - curIndex += escapeIndex; - } - } - if (b3s_maxIterations < walkIterations) - b3s_maxIterations = walkIterations; -} - -//This traversal can be called from Playstation 3 SPU -void b3QuantizedBvh::walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const -{ - b3Assert(m_useQuantization); - - int i; - - for (i = 0; i < this->m_SubtreeHeaders.size(); i++) - { - const b3BvhSubtreeInfo& subtree = m_SubtreeHeaders[i]; - - //PCK: unsigned instead of bool - unsigned overlap = b3TestQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin, quantizedQueryAabbMax, subtree.m_quantizedAabbMin, subtree.m_quantizedAabbMax); - if (overlap != 0) - { - walkStacklessQuantizedTree(nodeCallback, quantizedQueryAabbMin, quantizedQueryAabbMax, - subtree.m_rootNodeIndex, - subtree.m_rootNodeIndex + subtree.m_subtreeSize); - } - } -} - -void b3QuantizedBvh::reportRayOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const -{ - reportBoxCastOverlappingNodex(nodeCallback, raySource, rayTarget, b3MakeVector3(0, 0, 0), b3MakeVector3(0, 0, 0)); -} - -void b3QuantizedBvh::reportBoxCastOverlappingNodex(b3NodeOverlapCallback* nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - //always use stackless - - if (m_useQuantization) - { - walkStacklessQuantizedTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); - } - else - { - walkStacklessTreeAgainstRay(nodeCallback, raySource, rayTarget, aabbMin, aabbMax, 0, m_curNodeIndex); - } - /* - { - //recursive traversal - b3Vector3 qaabbMin = raySource; - b3Vector3 qaabbMax = raySource; - qaabbMin.setMin(rayTarget); - qaabbMax.setMax(rayTarget); - qaabbMin += aabbMin; - qaabbMax += aabbMax; - reportAabbOverlappingNodex(nodeCallback,qaabbMin,qaabbMax); - } - */ -} - -void b3QuantizedBvh::swapLeafNodes(int i, int splitIndex) -{ - if (m_useQuantization) - { - b3QuantizedBvhNode tmp = m_quantizedLeafNodes[i]; - m_quantizedLeafNodes[i] = m_quantizedLeafNodes[splitIndex]; - m_quantizedLeafNodes[splitIndex] = tmp; - } - else - { - b3OptimizedBvhNode tmp = m_leafNodes[i]; - m_leafNodes[i] = m_leafNodes[splitIndex]; - m_leafNodes[splitIndex] = tmp; - } -} - -void b3QuantizedBvh::assignInternalNodeFromLeafNode(int internalNode, int leafNodeIndex) -{ - if (m_useQuantization) - { - m_quantizedContiguousNodes[internalNode] = m_quantizedLeafNodes[leafNodeIndex]; - } - else - { - m_contiguousNodes[internalNode] = m_leafNodes[leafNodeIndex]; - } -} - -//PCK: include -#include <new> - -#if 0 -//PCK: consts -static const unsigned BVH_ALIGNMENT = 16; -static const unsigned BVH_ALIGNMENT_MASK = BVH_ALIGNMENT-1; - -static const unsigned BVH_ALIGNMENT_BLOCKS = 2; -#endif - -unsigned int b3QuantizedBvh::getAlignmentSerializationPadding() -{ - // I changed this to 0 since the extra padding is not needed or used. - return 0; //BVH_ALIGNMENT_BLOCKS * BVH_ALIGNMENT; -} - -unsigned b3QuantizedBvh::calculateSerializeBufferSize() const -{ - unsigned baseSize = sizeof(b3QuantizedBvh) + getAlignmentSerializationPadding(); - baseSize += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; - if (m_useQuantization) - { - return baseSize + m_curNodeIndex * sizeof(b3QuantizedBvhNode); - } - return baseSize + m_curNodeIndex * sizeof(b3OptimizedBvhNode); -} - -bool b3QuantizedBvh::serialize(void* o_alignedDataBuffer, unsigned /*i_dataBufferSize */, bool i_swapEndian) const -{ - b3Assert(m_subtreeHeaderCount == m_SubtreeHeaders.size()); - m_subtreeHeaderCount = m_SubtreeHeaders.size(); - - /* if (i_dataBufferSize < calculateSerializeBufferSize() || o_alignedDataBuffer == NULL || (((unsigned)o_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) - { - ///check alignedment for buffer? - b3Assert(0); - return false; - } -*/ - - b3QuantizedBvh* targetBvh = (b3QuantizedBvh*)o_alignedDataBuffer; - - // construct the class so the virtual function table, etc will be set up - // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor - new (targetBvh) b3QuantizedBvh; - - if (i_swapEndian) - { - targetBvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(m_curNodeIndex)); - - b3SwapVector3Endian(m_bvhAabbMin, targetBvh->m_bvhAabbMin); - b3SwapVector3Endian(m_bvhAabbMax, targetBvh->m_bvhAabbMax); - b3SwapVector3Endian(m_bvhQuantization, targetBvh->m_bvhQuantization); - - targetBvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(m_traversalMode); - targetBvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(m_subtreeHeaderCount)); - } - else - { - targetBvh->m_curNodeIndex = m_curNodeIndex; - targetBvh->m_bvhAabbMin = m_bvhAabbMin; - targetBvh->m_bvhAabbMax = m_bvhAabbMax; - targetBvh->m_bvhQuantization = m_bvhQuantization; - targetBvh->m_traversalMode = m_traversalMode; - targetBvh->m_subtreeHeaderCount = m_subtreeHeaderCount; - } - - targetBvh->m_useQuantization = m_useQuantization; - - unsigned char* nodeData = (unsigned char*)targetBvh; - nodeData += sizeof(b3QuantizedBvh); - - unsigned sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - int nodeCount = m_curNodeIndex; - - if (m_useQuantization) - { - targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); - } - } - else - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]; - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]; - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]; - - targetBvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex; - } - } - nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_quantizedContiguousNodes.initializeFromBuffer(NULL, 0, 0); - } - else - { - targetBvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMinOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); - b3SwapVector3Endian(m_contiguousNodes[nodeIndex].m_aabbMaxOrg, targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); - - targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_escapeIndex)); - targetBvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_subPart)); - targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(m_contiguousNodes[nodeIndex].m_triangleIndex)); - } - } - else - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - targetBvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg = m_contiguousNodes[nodeIndex].m_aabbMinOrg; - targetBvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg = m_contiguousNodes[nodeIndex].m_aabbMaxOrg; - - targetBvh->m_contiguousNodes[nodeIndex].m_escapeIndex = m_contiguousNodes[nodeIndex].m_escapeIndex; - targetBvh->m_contiguousNodes[nodeIndex].m_subPart = m_contiguousNodes[nodeIndex].m_subPart; - targetBvh->m_contiguousNodes[nodeIndex].m_triangleIndex = m_contiguousNodes[nodeIndex].m_triangleIndex; - } - } - nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_contiguousNodes.initializeFromBuffer(NULL, 0, 0); - } - - sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - // Now serialize the subtree headers - targetBvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, m_subtreeHeaderCount, m_subtreeHeaderCount); - if (i_swapEndian) - { - for (int i = 0; i < m_subtreeHeaderCount; i++) - { - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_rootNodeIndex)); - targetBvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(m_SubtreeHeaders[i].m_subtreeSize)); - } - } - else - { - for (int i = 0; i < m_subtreeHeaderCount; i++) - { - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = (m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = (m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = (m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = (m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = (m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - targetBvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = (m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - targetBvh->m_SubtreeHeaders[i].m_rootNodeIndex = (m_SubtreeHeaders[i].m_rootNodeIndex); - targetBvh->m_SubtreeHeaders[i].m_subtreeSize = (m_SubtreeHeaders[i].m_subtreeSize); - - // need to clear padding in destination buffer - targetBvh->m_SubtreeHeaders[i].m_padding[0] = 0; - targetBvh->m_SubtreeHeaders[i].m_padding[1] = 0; - targetBvh->m_SubtreeHeaders[i].m_padding[2] = 0; - } - } - nodeData += sizeof(b3BvhSubtreeInfo) * m_subtreeHeaderCount; - - // this clears the pointer in the member variable it doesn't really do anything to the data - // it does call the destructor on the contained objects, but they are all classes with no destructor defined - // so the memory (which is not freed) is left alone - targetBvh->m_SubtreeHeaders.initializeFromBuffer(NULL, 0, 0); - - // this wipes the virtual function table pointer at the start of the buffer for the class - *((void**)o_alignedDataBuffer) = NULL; - - return true; -} - -b3QuantizedBvh* b3QuantizedBvh::deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian) -{ - if (i_alignedDataBuffer == NULL) // || (((unsigned)i_alignedDataBuffer & BVH_ALIGNMENT_MASK) != 0)) - { - return NULL; - } - b3QuantizedBvh* bvh = (b3QuantizedBvh*)i_alignedDataBuffer; - - if (i_swapEndian) - { - bvh->m_curNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_curNodeIndex)); - - b3UnSwapVector3Endian(bvh->m_bvhAabbMin); - b3UnSwapVector3Endian(bvh->m_bvhAabbMax); - b3UnSwapVector3Endian(bvh->m_bvhQuantization); - - bvh->m_traversalMode = (b3TraversalMode)b3SwapEndian(bvh->m_traversalMode); - bvh->m_subtreeHeaderCount = static_cast<int>(b3SwapEndian(bvh->m_subtreeHeaderCount)); - } - - unsigned int calculatedBufSize = bvh->calculateSerializeBufferSize(); - b3Assert(calculatedBufSize <= i_dataBufferSize); - - if (calculatedBufSize > i_dataBufferSize) - { - return NULL; - } - - unsigned char* nodeData = (unsigned char*)bvh; - nodeData += sizeof(b3QuantizedBvh); - - unsigned sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - int nodeCount = bvh->m_curNodeIndex; - - // Must call placement new to fill in virtual function table, etc, but we don't want to overwrite most data, so call a special version of the constructor - // Also, m_leafNodes and m_quantizedLeafNodes will be initialized to default values by the constructor - new (bvh) b3QuantizedBvh(*bvh, false); - - if (bvh->m_useQuantization) - { - bvh->m_quantizedContiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[1]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[2]); - - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[1]); - bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[2]); - - bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = static_cast<int>(b3SwapEndian(bvh->m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex)); - } - } - nodeData += sizeof(b3QuantizedBvhNode) * nodeCount; - } - else - { - bvh->m_contiguousNodes.initializeFromBuffer(nodeData, nodeCount, nodeCount); - - if (i_swapEndian) - { - for (int nodeIndex = 0; nodeIndex < nodeCount; nodeIndex++) - { - b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMinOrg); - b3UnSwapVector3Endian(bvh->m_contiguousNodes[nodeIndex].m_aabbMaxOrg); - - bvh->m_contiguousNodes[nodeIndex].m_escapeIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_escapeIndex)); - bvh->m_contiguousNodes[nodeIndex].m_subPart = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_subPart)); - bvh->m_contiguousNodes[nodeIndex].m_triangleIndex = static_cast<int>(b3SwapEndian(bvh->m_contiguousNodes[nodeIndex].m_triangleIndex)); - } - } - nodeData += sizeof(b3OptimizedBvhNode) * nodeCount; - } - - sizeToAdd = 0; //(BVH_ALIGNMENT-((unsigned)nodeData & BVH_ALIGNMENT_MASK))&BVH_ALIGNMENT_MASK; - nodeData += sizeToAdd; - - // Now serialize the subtree headers - bvh->m_SubtreeHeaders.initializeFromBuffer(nodeData, bvh->m_subtreeHeaderCount, bvh->m_subtreeHeaderCount); - if (i_swapEndian) - { - for (int i = 0; i < bvh->m_subtreeHeaderCount; i++) - { - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[0]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[1]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMin[2]); - - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[0]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[1]); - bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2] = b3SwapEndian(bvh->m_SubtreeHeaders[i].m_quantizedAabbMax[2]); - - bvh->m_SubtreeHeaders[i].m_rootNodeIndex = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_rootNodeIndex)); - bvh->m_SubtreeHeaders[i].m_subtreeSize = static_cast<int>(b3SwapEndian(bvh->m_SubtreeHeaders[i].m_subtreeSize)); - } - } - - return bvh; -} - -// Constructor that prevents b3Vector3's default constructor from being called -b3QuantizedBvh::b3QuantizedBvh(b3QuantizedBvh& self, bool /* ownsMemory */) : m_bvhAabbMin(self.m_bvhAabbMin), - m_bvhAabbMax(self.m_bvhAabbMax), - m_bvhQuantization(self.m_bvhQuantization), - m_bulletVersion(B3_BULLET_VERSION) -{ -} - -void b3QuantizedBvh::deSerializeFloat(struct b3QuantizedBvhFloatData& quantizedBvhFloatData) -{ - m_bvhAabbMax.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMax); - m_bvhAabbMin.deSerializeFloat(quantizedBvhFloatData.m_bvhAabbMin); - m_bvhQuantization.deSerializeFloat(quantizedBvhFloatData.m_bvhQuantization); - - m_curNodeIndex = quantizedBvhFloatData.m_curNodeIndex; - m_useQuantization = quantizedBvhFloatData.m_useQuantization != 0; - - { - int numElem = quantizedBvhFloatData.m_numContiguousLeafNodes; - m_contiguousNodes.resize(numElem); - - if (numElem) - { - b3OptimizedBvhNodeFloatData* memPtr = quantizedBvhFloatData.m_contiguousNodesPtr; - - for (int i = 0; i < numElem; i++, memPtr++) - { - m_contiguousNodes[i].m_aabbMaxOrg.deSerializeFloat(memPtr->m_aabbMaxOrg); - m_contiguousNodes[i].m_aabbMinOrg.deSerializeFloat(memPtr->m_aabbMinOrg); - m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; - m_contiguousNodes[i].m_subPart = memPtr->m_subPart; - m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; - } - } - } - - { - int numElem = quantizedBvhFloatData.m_numQuantizedContiguousNodes; - m_quantizedContiguousNodes.resize(numElem); - - if (numElem) - { - b3QuantizedBvhNodeData* memPtr = quantizedBvhFloatData.m_quantizedContiguousNodesPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - } - } - } - - m_traversalMode = b3TraversalMode(quantizedBvhFloatData.m_traversalMode); - - { - int numElem = quantizedBvhFloatData.m_numSubtreeHeaders; - m_SubtreeHeaders.resize(numElem); - if (numElem) - { - b3BvhSubtreeInfoData* memPtr = quantizedBvhFloatData.m_subTreeInfoPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; - m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; - } - } - } -} - -void b3QuantizedBvh::deSerializeDouble(struct b3QuantizedBvhDoubleData& quantizedBvhDoubleData) -{ - m_bvhAabbMax.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMax); - m_bvhAabbMin.deSerializeDouble(quantizedBvhDoubleData.m_bvhAabbMin); - m_bvhQuantization.deSerializeDouble(quantizedBvhDoubleData.m_bvhQuantization); - - m_curNodeIndex = quantizedBvhDoubleData.m_curNodeIndex; - m_useQuantization = quantizedBvhDoubleData.m_useQuantization != 0; - - { - int numElem = quantizedBvhDoubleData.m_numContiguousLeafNodes; - m_contiguousNodes.resize(numElem); - - if (numElem) - { - b3OptimizedBvhNodeDoubleData* memPtr = quantizedBvhDoubleData.m_contiguousNodesPtr; - - for (int i = 0; i < numElem; i++, memPtr++) - { - m_contiguousNodes[i].m_aabbMaxOrg.deSerializeDouble(memPtr->m_aabbMaxOrg); - m_contiguousNodes[i].m_aabbMinOrg.deSerializeDouble(memPtr->m_aabbMinOrg); - m_contiguousNodes[i].m_escapeIndex = memPtr->m_escapeIndex; - m_contiguousNodes[i].m_subPart = memPtr->m_subPart; - m_contiguousNodes[i].m_triangleIndex = memPtr->m_triangleIndex; - } - } - } - - { - int numElem = quantizedBvhDoubleData.m_numQuantizedContiguousNodes; - m_quantizedContiguousNodes.resize(numElem); - - if (numElem) - { - b3QuantizedBvhNodeData* memPtr = quantizedBvhDoubleData.m_quantizedContiguousNodesPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_quantizedContiguousNodes[i].m_escapeIndexOrTriangleIndex = memPtr->m_escapeIndexOrTriangleIndex; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_quantizedContiguousNodes[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - } - } - } - - m_traversalMode = b3TraversalMode(quantizedBvhDoubleData.m_traversalMode); - - { - int numElem = quantizedBvhDoubleData.m_numSubtreeHeaders; - m_SubtreeHeaders.resize(numElem); - if (numElem) - { - b3BvhSubtreeInfoData* memPtr = quantizedBvhDoubleData.m_subTreeInfoPtr; - for (int i = 0; i < numElem; i++, memPtr++) - { - m_SubtreeHeaders[i].m_quantizedAabbMax[0] = memPtr->m_quantizedAabbMax[0]; - m_SubtreeHeaders[i].m_quantizedAabbMax[1] = memPtr->m_quantizedAabbMax[1]; - m_SubtreeHeaders[i].m_quantizedAabbMax[2] = memPtr->m_quantizedAabbMax[2]; - m_SubtreeHeaders[i].m_quantizedAabbMin[0] = memPtr->m_quantizedAabbMin[0]; - m_SubtreeHeaders[i].m_quantizedAabbMin[1] = memPtr->m_quantizedAabbMin[1]; - m_SubtreeHeaders[i].m_quantizedAabbMin[2] = memPtr->m_quantizedAabbMin[2]; - m_SubtreeHeaders[i].m_rootNodeIndex = memPtr->m_rootNodeIndex; - m_SubtreeHeaders[i].m_subtreeSize = memPtr->m_subtreeSize; - } - } - } -} - -///fills the dataBuffer and returns the struct name (and 0 on failure) -const char* b3QuantizedBvh::serialize(void* dataBuffer, b3Serializer* serializer) const -{ - b3Assert(0); - return 0; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h deleted file mode 100644 index 48b41abcad..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h +++ /dev/null @@ -1,511 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_QUANTIZED_BVH_H -#define B3_QUANTIZED_BVH_H - -class b3Serializer; - -//#define DEBUG_CHECK_DEQUANTIZATION 1 -#ifdef DEBUG_CHECK_DEQUANTIZATION -#ifdef __SPU__ -#define printf spu_printf -#endif //__SPU__ - -#include <stdio.h> -#include <stdlib.h> -#endif //DEBUG_CHECK_DEQUANTIZATION - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3AlignedAllocator.h" - -#ifdef B3_USE_DOUBLE_PRECISION -#define b3QuantizedBvhData b3QuantizedBvhDoubleData -#define b3OptimizedBvhNodeData b3OptimizedBvhNodeDoubleData -#define b3QuantizedBvhDataName "b3QuantizedBvhDoubleData" -#else -#define b3QuantizedBvhData b3QuantizedBvhFloatData -#define b3OptimizedBvhNodeData b3OptimizedBvhNodeFloatData -#define b3QuantizedBvhDataName "b3QuantizedBvhFloatData" -#endif - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3QuantizedBvhNodeData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3BvhSubtreeInfoData.h" - -//http://msdn.microsoft.com/library/default.asp?url=/library/en-us/vclang/html/vclrf__m128.asp - -//Note: currently we have 16 bytes per quantized node -#define MAX_SUBTREE_SIZE_IN_BYTES 2048 - -// 10 gives the potential for 1024 parts, with at most 2^21 (2097152) (minus one -// actually) triangles each (since the sign bit is reserved -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -B3_ATTRIBUTE_ALIGNED16(struct) -b3QuantizedBvhNode : public b3QuantizedBvhNodeData -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - bool isLeafNode() const - { - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (m_escapeIndexOrTriangleIndex >= 0); - } - int getEscapeIndex() const - { - b3Assert(!isLeafNode()); - return -m_escapeIndexOrTriangleIndex; - } - int getTriangleIndex() const - { - b3Assert(isLeafNode()); - unsigned int x = 0; - unsigned int y = (~(x & 0)) << (31 - MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (m_escapeIndexOrTriangleIndex & ~(y)); - } - int getPartId() const - { - b3Assert(isLeafNode()); - // Get only the highest bits where the part index is stored - return (m_escapeIndexOrTriangleIndex >> (31 - MAX_NUM_PARTS_IN_BITS)); - } -}; - -/// b3OptimizedBvhNode contains both internal and leaf node information. -/// Total node size is 44 bytes / node. You can use the compressed version of 16 bytes. -B3_ATTRIBUTE_ALIGNED16(struct) -b3OptimizedBvhNode -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - //32 bytes - b3Vector3 m_aabbMinOrg; - b3Vector3 m_aabbMaxOrg; - - //4 - int m_escapeIndex; - - //8 - //for child nodes - int m_subPart; - int m_triangleIndex; - - //pad the size to 64 bytes - char m_padding[20]; -}; - -///b3BvhSubtreeInfo provides info to gather a subtree of limited size -B3_ATTRIBUTE_ALIGNED16(class) -b3BvhSubtreeInfo : public b3BvhSubtreeInfoData -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3BvhSubtreeInfo() - { - //memset(&m_padding[0], 0, sizeof(m_padding)); - } - - void setAabbFromQuantizeNode(const b3QuantizedBvhNode& quantizedNode) - { - m_quantizedAabbMin[0] = quantizedNode.m_quantizedAabbMin[0]; - m_quantizedAabbMin[1] = quantizedNode.m_quantizedAabbMin[1]; - m_quantizedAabbMin[2] = quantizedNode.m_quantizedAabbMin[2]; - m_quantizedAabbMax[0] = quantizedNode.m_quantizedAabbMax[0]; - m_quantizedAabbMax[1] = quantizedNode.m_quantizedAabbMax[1]; - m_quantizedAabbMax[2] = quantizedNode.m_quantizedAabbMax[2]; - } -}; - -class b3NodeOverlapCallback -{ -public: - virtual ~b3NodeOverlapCallback(){}; - - virtual void processNode(int subPart, int triangleIndex) = 0; -}; - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -///for code readability: -typedef b3AlignedObjectArray<b3OptimizedBvhNode> NodeArray; -typedef b3AlignedObjectArray<b3QuantizedBvhNode> QuantizedNodeArray; -typedef b3AlignedObjectArray<b3BvhSubtreeInfo> BvhSubtreeInfoArray; - -///The b3QuantizedBvh class stores an AABB tree that can be quickly traversed on CPU and Cell SPU. -///It is used by the b3BvhTriangleMeshShape as midphase -///It is recommended to use quantization for better performance and lower memory requirements. -B3_ATTRIBUTE_ALIGNED16(class) -b3QuantizedBvh -{ -public: - enum b3TraversalMode - { - TRAVERSAL_STACKLESS = 0, - TRAVERSAL_STACKLESS_CACHE_FRIENDLY, - TRAVERSAL_RECURSIVE - }; - - b3Vector3 m_bvhAabbMin; - b3Vector3 m_bvhAabbMax; - b3Vector3 m_bvhQuantization; - -protected: - int m_bulletVersion; //for serialization versioning. It could also be used to detect endianess. - - int m_curNodeIndex; - //quantization data - bool m_useQuantization; - - NodeArray m_leafNodes; - NodeArray m_contiguousNodes; - QuantizedNodeArray m_quantizedLeafNodes; - QuantizedNodeArray m_quantizedContiguousNodes; - - b3TraversalMode m_traversalMode; - BvhSubtreeInfoArray m_SubtreeHeaders; - - //This is only used for serialization so we don't have to add serialization directly to b3AlignedObjectArray - mutable int m_subtreeHeaderCount; - - ///two versions, one for quantized and normal nodes. This allows code-reuse while maintaining readability (no template/macro!) - ///this might be refactored into a virtual, it is usually not calculated at run-time - void setInternalNodeAabbMin(int nodeIndex, const b3Vector3& aabbMin) - { - if (m_useQuantization) - { - quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[0], aabbMin, 0); - } - else - { - m_contiguousNodes[nodeIndex].m_aabbMinOrg = aabbMin; - } - } - void setInternalNodeAabbMax(int nodeIndex, const b3Vector3& aabbMax) - { - if (m_useQuantization) - { - quantize(&m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[0], aabbMax, 1); - } - else - { - m_contiguousNodes[nodeIndex].m_aabbMaxOrg = aabbMax; - } - } - - b3Vector3 getAabbMin(int nodeIndex) const - { - if (m_useQuantization) - { - return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMin[0]); - } - //non-quantized - return m_leafNodes[nodeIndex].m_aabbMinOrg; - } - b3Vector3 getAabbMax(int nodeIndex) const - { - if (m_useQuantization) - { - return unQuantize(&m_quantizedLeafNodes[nodeIndex].m_quantizedAabbMax[0]); - } - //non-quantized - return m_leafNodes[nodeIndex].m_aabbMaxOrg; - } - - void setInternalNodeEscapeIndex(int nodeIndex, int escapeIndex) - { - if (m_useQuantization) - { - m_quantizedContiguousNodes[nodeIndex].m_escapeIndexOrTriangleIndex = -escapeIndex; - } - else - { - m_contiguousNodes[nodeIndex].m_escapeIndex = escapeIndex; - } - } - - void mergeInternalNodeAabb(int nodeIndex, const b3Vector3& newAabbMin, const b3Vector3& newAabbMax) - { - if (m_useQuantization) - { - unsigned short int quantizedAabbMin[3]; - unsigned short int quantizedAabbMax[3]; - quantize(quantizedAabbMin, newAabbMin, 0); - quantize(quantizedAabbMax, newAabbMax, 1); - for (int i = 0; i < 3; i++) - { - if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] > quantizedAabbMin[i]) - m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMin[i] = quantizedAabbMin[i]; - - if (m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] < quantizedAabbMax[i]) - m_quantizedContiguousNodes[nodeIndex].m_quantizedAabbMax[i] = quantizedAabbMax[i]; - } - } - else - { - //non-quantized - m_contiguousNodes[nodeIndex].m_aabbMinOrg.setMin(newAabbMin); - m_contiguousNodes[nodeIndex].m_aabbMaxOrg.setMax(newAabbMax); - } - } - - void swapLeafNodes(int firstIndex, int secondIndex); - - void assignInternalNodeFromLeafNode(int internalNode, int leafNodeIndex); - -protected: - void buildTree(int startIndex, int endIndex); - - int calcSplittingAxis(int startIndex, int endIndex); - - int sortAndCalcSplittingIndex(int startIndex, int endIndex, int splitAxis); - - void walkStacklessTree(b3NodeOverlapCallback * nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - - void walkStacklessQuantizedTreeAgainstRay(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const; - void walkStacklessQuantizedTree(b3NodeOverlapCallback * nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const; - void walkStacklessTreeAgainstRay(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax, int startNodeIndex, int endNodeIndex) const; - - ///tree traversal designed for small-memory processors like PS3 SPU - void walkStacklessQuantizedTreeCacheFriendly(b3NodeOverlapCallback * nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const; - - ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal - void walkRecursiveQuantizedTreeAgainstQueryAabb(const b3QuantizedBvhNode* currentNode, b3NodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax) const; - - ///use the 16-byte stackless 'skipindex' node tree to do a recursive traversal - void walkRecursiveQuantizedTreeAgainstQuantizedTree(const b3QuantizedBvhNode* treeNodeA, const b3QuantizedBvhNode* treeNodeB, b3NodeOverlapCallback* nodeCallback) const; - - void updateSubtreeHeaders(int leftChildNodexIndex, int rightChildNodexIndex); - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3QuantizedBvh(); - - virtual ~b3QuantizedBvh(); - - ///***************************************** expert/internal use only ************************* - void setQuantizationValues(const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax, b3Scalar quantizationMargin = b3Scalar(1.0)); - QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } - ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized - void buildInternal(); - ///***************************************** expert/internal use only ************************* - - void reportAabbOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - void reportRayOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget) const; - void reportBoxCastOverlappingNodex(b3NodeOverlapCallback * nodeCallback, const b3Vector3& raySource, const b3Vector3& rayTarget, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - - B3_FORCE_INLINE void quantize(unsigned short* out, const b3Vector3& point, int isMax) const - { - b3Assert(m_useQuantization); - - b3Assert(point.getX() <= m_bvhAabbMax.getX()); - b3Assert(point.getY() <= m_bvhAabbMax.getY()); - b3Assert(point.getZ() <= m_bvhAabbMax.getZ()); - - b3Assert(point.getX() >= m_bvhAabbMin.getX()); - b3Assert(point.getY() >= m_bvhAabbMin.getY()); - b3Assert(point.getZ() >= m_bvhAabbMin.getZ()); - - b3Vector3 v = (point - m_bvhAabbMin) * m_bvhQuantization; - ///Make sure rounding is done in a way that unQuantize(quantizeWithClamp(...)) is conservative - ///end-points always set the first bit, so that they are sorted properly (so that neighbouring AABBs overlap properly) - ///@todo: double-check this - if (isMax) - { - out[0] = (unsigned short)(((unsigned short)(v.getX() + b3Scalar(1.)) | 1)); - out[1] = (unsigned short)(((unsigned short)(v.getY() + b3Scalar(1.)) | 1)); - out[2] = (unsigned short)(((unsigned short)(v.getZ() + b3Scalar(1.)) | 1)); - } - else - { - out[0] = (unsigned short)(((unsigned short)(v.getX()) & 0xfffe)); - out[1] = (unsigned short)(((unsigned short)(v.getY()) & 0xfffe)); - out[2] = (unsigned short)(((unsigned short)(v.getZ()) & 0xfffe)); - } - -#ifdef DEBUG_CHECK_DEQUANTIZATION - b3Vector3 newPoint = unQuantize(out); - if (isMax) - { - if (newPoint.getX() < point.getX()) - { - printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n", newPoint.getX() - point.getX(), newPoint.getX(), point.getX()); - } - if (newPoint.getY() < point.getY()) - { - printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n", newPoint.getY() - point.getY(), newPoint.getY(), point.getY()); - } - if (newPoint.getZ() < point.getZ()) - { - printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n", newPoint.getZ() - point.getZ(), newPoint.getZ(), point.getZ()); - } - } - else - { - if (newPoint.getX() > point.getX()) - { - printf("unconservative X, diffX = %f, oldX=%f,newX=%f\n", newPoint.getX() - point.getX(), newPoint.getX(), point.getX()); - } - if (newPoint.getY() > point.getY()) - { - printf("unconservative Y, diffY = %f, oldY=%f,newY=%f\n", newPoint.getY() - point.getY(), newPoint.getY(), point.getY()); - } - if (newPoint.getZ() > point.getZ()) - { - printf("unconservative Z, diffZ = %f, oldZ=%f,newZ=%f\n", newPoint.getZ() - point.getZ(), newPoint.getZ(), point.getZ()); - } - } -#endif //DEBUG_CHECK_DEQUANTIZATION - } - - B3_FORCE_INLINE void quantizeWithClamp(unsigned short* out, const b3Vector3& point2, int isMax) const - { - b3Assert(m_useQuantization); - - b3Vector3 clampedPoint(point2); - clampedPoint.setMax(m_bvhAabbMin); - clampedPoint.setMin(m_bvhAabbMax); - - quantize(out, clampedPoint, isMax); - } - - B3_FORCE_INLINE b3Vector3 unQuantize(const unsigned short* vecIn) const - { - b3Vector3 vecOut; - vecOut.setValue( - (b3Scalar)(vecIn[0]) / (m_bvhQuantization.getX()), - (b3Scalar)(vecIn[1]) / (m_bvhQuantization.getY()), - (b3Scalar)(vecIn[2]) / (m_bvhQuantization.getZ())); - vecOut += m_bvhAabbMin; - return vecOut; - } - - ///setTraversalMode let's you choose between stackless, recursive or stackless cache friendly tree traversal. Note this is only implemented for quantized trees. - void setTraversalMode(b3TraversalMode traversalMode) - { - m_traversalMode = traversalMode; - } - - B3_FORCE_INLINE QuantizedNodeArray& getQuantizedNodeArray() - { - return m_quantizedContiguousNodes; - } - - B3_FORCE_INLINE BvhSubtreeInfoArray& getSubtreeInfoArray() - { - return m_SubtreeHeaders; - } - - //////////////////////////////////////////////////////////////////// - - /////Calculate space needed to store BVH for serialization - unsigned calculateSerializeBufferSize() const; - - /// Data buffer MUST be 16 byte aligned - virtual bool serialize(void* o_alignedDataBuffer, unsigned i_dataBufferSize, bool i_swapEndian) const; - - ///deSerializeInPlace loads and initializes a BVH from a buffer in memory 'in place' - static b3QuantizedBvh* deSerializeInPlace(void* i_alignedDataBuffer, unsigned int i_dataBufferSize, bool i_swapEndian); - - static unsigned int getAlignmentSerializationPadding(); - ////////////////////////////////////////////////////////////////////// - - virtual int calculateSerializeBufferSizeNew() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; - - virtual void deSerializeFloat(struct b3QuantizedBvhFloatData & quantizedBvhFloatData); - - virtual void deSerializeDouble(struct b3QuantizedBvhDoubleData & quantizedBvhDoubleData); - - //////////////////////////////////////////////////////////////////// - - B3_FORCE_INLINE bool isQuantized() - { - return m_useQuantization; - } - -private: - // Special "copy" constructor that allows for in-place deserialization - // Prevents b3Vector3's default constructor from being called, but doesn't inialize much else - // ownsMemory should most likely be false if deserializing, and if you are not, don't call this (it also changes the function signature, which we need) - b3QuantizedBvh(b3QuantizedBvh & other, bool ownsMemory); -}; - -struct b3OptimizedBvhNodeFloatData -{ - b3Vector3FloatData m_aabbMinOrg; - b3Vector3FloatData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -struct b3OptimizedBvhNodeDoubleData -{ - b3Vector3DoubleData m_aabbMinOrg; - b3Vector3DoubleData m_aabbMaxOrg; - int m_escapeIndex; - int m_subPart; - int m_triangleIndex; - char m_pad[4]; -}; - -struct b3QuantizedBvhFloatData -{ - b3Vector3FloatData m_bvhAabbMin; - b3Vector3FloatData m_bvhAabbMax; - b3Vector3FloatData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeFloatData* m_contiguousNodesPtr; - b3QuantizedBvhNodeData* m_quantizedContiguousNodesPtr; - b3BvhSubtreeInfoData* m_subTreeInfoPtr; - int m_traversalMode; - int m_numSubtreeHeaders; -}; - -struct b3QuantizedBvhDoubleData -{ - b3Vector3DoubleData m_bvhAabbMin; - b3Vector3DoubleData m_bvhAabbMax; - b3Vector3DoubleData m_bvhQuantization; - int m_curNodeIndex; - int m_useQuantization; - int m_numContiguousLeafNodes; - int m_numQuantizedContiguousNodes; - b3OptimizedBvhNodeDoubleData* m_contiguousNodesPtr; - b3QuantizedBvhNodeData* m_quantizedContiguousNodesPtr; - - int m_traversalMode; - int m_numSubtreeHeaders; - b3BvhSubtreeInfoData* m_subTreeInfoPtr; -}; - -B3_FORCE_INLINE int b3QuantizedBvh::calculateSerializeBufferSizeNew() const -{ - return sizeof(b3QuantizedBvhData); -} - -#endif //B3_QUANTIZED_BVH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp deleted file mode 100644 index 6b0c941f23..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3StridingMeshInterface.h" - -b3StridingMeshInterface::~b3StridingMeshInterface() -{ -} - -void b3StridingMeshInterface::InternalProcessAllTriangles(b3InternalTriangleIndexCallback* callback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - (void)aabbMin; - (void)aabbMax; - int numtotalphysicsverts = 0; - int part, graphicssubparts = getNumSubParts(); - const unsigned char* vertexbase; - const unsigned char* indexbase; - int indexstride; - PHY_ScalarType type; - PHY_ScalarType gfxindextype; - int stride, numverts, numtriangles; - int gfxindex; - b3Vector3 triangle[3]; - - b3Vector3 meshScaling = getScaling(); - - ///if the number of parts is big, the performance might drop due to the innerloop switch on indextype - for (part = 0; part < graphicssubparts; part++) - { - getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numtriangles, gfxindextype, part); - numtotalphysicsverts += numtriangles * 3; //upper bound - - ///unlike that developers want to pass in double-precision meshes in single-precision Bullet build - ///so disable this feature by default - ///see patch http://code.google.com/p/bullet/issues/detail?id=213 - - switch (type) - { - case PHY_FLOAT: - { - float* graphicsbase; - - switch (gfxindextype) - { - case PHY_INTEGER: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned int* tri_indices = (unsigned int*)(indexbase + gfxindex * indexstride); - graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_SHORT: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned short int* tri_indices = (unsigned short int*)(indexbase + gfxindex * indexstride); - graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_UCHAR: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned char* tri_indices = (unsigned char*)(indexbase + gfxindex * indexstride); - graphicsbase = (float*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (float*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue(graphicsbase[0] * meshScaling.getX(), graphicsbase[1] * meshScaling.getY(), graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - default: - b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); - } - break; - } - - case PHY_DOUBLE: - { - double* graphicsbase; - - switch (gfxindextype) - { - case PHY_INTEGER: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned int* tri_indices = (unsigned int*)(indexbase + gfxindex * indexstride); - graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_SHORT: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned short int* tri_indices = (unsigned short int*)(indexbase + gfxindex * indexstride); - graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - case PHY_UCHAR: - { - for (gfxindex = 0; gfxindex < numtriangles; gfxindex++) - { - unsigned char* tri_indices = (unsigned char*)(indexbase + gfxindex * indexstride); - graphicsbase = (double*)(vertexbase + tri_indices[0] * stride); - triangle[0].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[1] * stride); - triangle[1].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - graphicsbase = (double*)(vertexbase + tri_indices[2] * stride); - triangle[2].setValue((b3Scalar)graphicsbase[0] * meshScaling.getX(), (b3Scalar)graphicsbase[1] * meshScaling.getY(), (b3Scalar)graphicsbase[2] * meshScaling.getZ()); - callback->internalProcessTriangleIndex(triangle, part, gfxindex); - } - break; - } - default: - b3Assert((gfxindextype == PHY_INTEGER) || (gfxindextype == PHY_SHORT)); - } - break; - } - default: - b3Assert((type == PHY_FLOAT) || (type == PHY_DOUBLE)); - } - - unLockReadOnlyVertexBase(part); - } -} - -void b3StridingMeshInterface::calculateAabbBruteForce(b3Vector3& aabbMin, b3Vector3& aabbMax) -{ - struct AabbCalculationCallback : public b3InternalTriangleIndexCallback - { - b3Vector3 m_aabbMin; - b3Vector3 m_aabbMax; - - AabbCalculationCallback() - { - m_aabbMin.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - m_aabbMax.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - } - - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) - { - (void)partId; - (void)triangleIndex; - - m_aabbMin.setMin(triangle[0]); - m_aabbMax.setMax(triangle[0]); - m_aabbMin.setMin(triangle[1]); - m_aabbMax.setMax(triangle[1]); - m_aabbMin.setMin(triangle[2]); - m_aabbMax.setMax(triangle[2]); - } - }; - - //first calculate the total aabb for all triangles - AabbCalculationCallback aabbCallback; - aabbMin.setValue(b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT), b3Scalar(-B3_LARGE_FLOAT)); - aabbMax.setValue(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - InternalProcessAllTriangles(&aabbCallback, aabbMin, aabbMax); - - aabbMin = aabbCallback.m_aabbMin; - aabbMax = aabbCallback.m_aabbMax; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h deleted file mode 100644 index 2b1e63be75..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3StridingMeshInterface.h +++ /dev/null @@ -1,158 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_STRIDING_MESHINTERFACE_H -#define B3_STRIDING_MESHINTERFACE_H - -#include "Bullet3Common/b3Vector3.h" -#include "b3TriangleCallback.h" -//#include "b3ConcaveShape.h" - -enum PHY_ScalarType -{ - PHY_FLOAT, - PHY_DOUBLE, - PHY_INTEGER, - PHY_SHORT, - PHY_FIXEDPOINT88, - PHY_UCHAR -}; - -/// The b3StridingMeshInterface is the interface class for high performance generic access to triangle meshes, used in combination with b3BvhTriangleMeshShape and some other collision shapes. -/// Using index striding of 3*sizeof(integer) it can use triangle arrays, using index striding of 1*sizeof(integer) it can handle triangle strips. -/// It allows for sharing graphics and collision meshes. Also it provides locking/unlocking of graphics meshes that are in gpu memory. -B3_ATTRIBUTE_ALIGNED16(class) -b3StridingMeshInterface -{ -protected: - b3Vector3 m_scaling; - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3StridingMeshInterface() : m_scaling(b3MakeVector3(b3Scalar(1.), b3Scalar(1.), b3Scalar(1.))) - { - } - - virtual ~b3StridingMeshInterface(); - - virtual void InternalProcessAllTriangles(b3InternalTriangleIndexCallback * callback, const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - - ///brute force method to calculate aabb - void calculateAabbBruteForce(b3Vector3 & aabbMin, b3Vector3 & aabbMax); - - /// get read and write access to a subpart of a triangle mesh - /// this subpart has a continuous array of vertices and indices - /// in this way the mesh can be handled as chunks of memory with striding - /// very similar to OpenGL vertexarray support - /// make a call to unLockVertexBase when the read and write access is finished - virtual void getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& stride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) = 0; - - virtual void getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& stride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) const = 0; - - /// unLockVertexBase finishes the access to a subpart of the triangle mesh - /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished - virtual void unLockVertexBase(int subpart) = 0; - - virtual void unLockReadOnlyVertexBase(int subpart) const = 0; - - /// getNumSubParts returns the number of separate subparts - /// each subpart has a continuous array of vertices and indices - virtual int getNumSubParts() const = 0; - - virtual void preallocateVertices(int numverts) = 0; - virtual void preallocateIndices(int numindices) = 0; - - virtual bool hasPremadeAabb() const { return false; } - virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const - { - (void)aabbMin; - (void)aabbMax; - } - virtual void getPremadeAabb(b3Vector3 * aabbMin, b3Vector3 * aabbMax) const - { - (void)aabbMin; - (void)aabbMax; - } - - const b3Vector3& getScaling() const - { - return m_scaling; - } - void setScaling(const b3Vector3& scaling) - { - m_scaling = scaling; - } - - virtual int calculateSerializeBufferSize() const; - - ///fills the dataBuffer and returns the struct name (and 0 on failure) - //virtual const char* serialize(void* dataBuffer, b3Serializer* serializer) const; -}; - -struct b3IntIndexData -{ - int m_value; -}; - -struct b3ShortIntIndexData -{ - short m_value; - char m_pad[2]; -}; - -struct b3ShortIntIndexTripletData -{ - short m_values[3]; - char m_pad[2]; -}; - -struct b3CharIndexTripletData -{ - unsigned char m_values[3]; - char m_pad; -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3MeshPartData -{ - b3Vector3FloatData* m_vertices3f; - b3Vector3DoubleData* m_vertices3d; - - b3IntIndexData* m_indices32; - b3ShortIntIndexTripletData* m_3indices16; - b3CharIndexTripletData* m_3indices8; - - b3ShortIntIndexData* m_indices16; //backwards compatibility - - int m_numTriangles; //length of m_indices = m_numTriangles - int m_numVertices; -}; - -///do not change those serialization structures, it requires an updated sBulletDNAstr/sBulletDNAstr64 -struct b3StridingMeshInterfaceData -{ - b3MeshPartData* m_meshPartsPtr; - b3Vector3FloatData m_scaling; - int m_numMeshParts; - char m_padding[4]; -}; - -B3_FORCE_INLINE int b3StridingMeshInterface::calculateSerializeBufferSize() const -{ - return sizeof(b3StridingMeshInterfaceData); -} - -#endif //B3_STRIDING_MESHINTERFACE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h deleted file mode 100644 index 9ca1e22949..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3SupportMappings.h +++ /dev/null @@ -1,34 +0,0 @@ - -#ifndef B3_SUPPORT_MAPPINGS_H -#define B3_SUPPORT_MAPPINGS_H - -#include "Bullet3Common/b3Transform.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "b3VectorFloat4.h" - -struct b3GjkPairDetector; - -inline b3Vector3 localGetSupportVertexWithMargin(const float4& supportVec, const struct b3ConvexPolyhedronData* hull, - const b3AlignedObjectArray<b3Vector3>& verticesA, b3Scalar margin) -{ - b3Vector3 supVec = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - b3Scalar maxDot = b3Scalar(-B3_LARGE_FLOAT); - - // Here we take advantage of dot(a, b*c) = dot(a*b, c). Note: This is true mathematically, but not numerically. - if (0 < hull->m_numVertices) - { - const b3Vector3 scaled = supportVec; - int index = (int)scaled.maxDot(&verticesA[hull->m_vertexOffset], hull->m_numVertices, maxDot); - return verticesA[hull->m_vertexOffset + index]; - } - - return supVec; -} - -inline b3Vector3 localGetSupportVertexWithoutMargin(const float4& supportVec, const struct b3ConvexPolyhedronData* hull, - const b3AlignedObjectArray<b3Vector3>& verticesA) -{ - return localGetSupportVertexWithMargin(supportVec, hull, verticesA, 0.f); -} - -#endif //B3_SUPPORT_MAPPINGS_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp deleted file mode 100644 index 3908c6de89..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TriangleCallback.h" - -b3TriangleCallback::~b3TriangleCallback() -{ -} - -b3InternalTriangleIndexCallback::~b3InternalTriangleIndexCallback() -{ -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h deleted file mode 100644 index a0fd3e7ac7..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleCallback.h +++ /dev/null @@ -1,37 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRIANGLE_CALLBACK_H -#define B3_TRIANGLE_CALLBACK_H - -#include "Bullet3Common/b3Vector3.h" - -///The b3TriangleCallback provides a callback for each overlapping triangle when calling processAllTriangles. -///This callback is called by processAllTriangles for all b3ConcaveShape derived class, such as b3BvhTriangleMeshShape, b3StaticPlaneShape and b3HeightfieldTerrainShape. -class b3TriangleCallback -{ -public: - virtual ~b3TriangleCallback(); - virtual void processTriangle(b3Vector3* triangle, int partId, int triangleIndex) = 0; -}; - -class b3InternalTriangleIndexCallback -{ -public: - virtual ~b3InternalTriangleIndexCallback(); - virtual void internalProcessTriangleIndex(b3Vector3* triangle, int partId, int triangleIndex) = 0; -}; - -#endif //B3_TRIANGLE_CALLBACK_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp deleted file mode 100644 index 73faadbdd0..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.cpp +++ /dev/null @@ -1,90 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#include "b3TriangleIndexVertexArray.h" - -b3TriangleIndexVertexArray::b3TriangleIndexVertexArray(int numTriangles, int* triangleIndexBase, int triangleIndexStride, int numVertices, b3Scalar* vertexBase, int vertexStride) - : m_hasAabb(0) -{ - b3IndexedMesh mesh; - - mesh.m_numTriangles = numTriangles; - mesh.m_triangleIndexBase = (const unsigned char*)triangleIndexBase; - mesh.m_triangleIndexStride = triangleIndexStride; - mesh.m_numVertices = numVertices; - mesh.m_vertexBase = (const unsigned char*)vertexBase; - mesh.m_vertexStride = vertexStride; - - addIndexedMesh(mesh); -} - -b3TriangleIndexVertexArray::~b3TriangleIndexVertexArray() -{ -} - -void b3TriangleIndexVertexArray::getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart) -{ - b3Assert(subpart < getNumSubParts()); - - b3IndexedMesh& mesh = m_indexedMeshes[subpart]; - - numverts = mesh.m_numVertices; - (*vertexbase) = (unsigned char*)mesh.m_vertexBase; - - type = mesh.m_vertexType; - - vertexStride = mesh.m_vertexStride; - - numfaces = mesh.m_numTriangles; - - (*indexbase) = (unsigned char*)mesh.m_triangleIndexBase; - indexstride = mesh.m_triangleIndexStride; - indicestype = mesh.m_indexType; -} - -void b3TriangleIndexVertexArray::getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart) const -{ - const b3IndexedMesh& mesh = m_indexedMeshes[subpart]; - - numverts = mesh.m_numVertices; - (*vertexbase) = (const unsigned char*)mesh.m_vertexBase; - - type = mesh.m_vertexType; - - vertexStride = mesh.m_vertexStride; - - numfaces = mesh.m_numTriangles; - (*indexbase) = (const unsigned char*)mesh.m_triangleIndexBase; - indexstride = mesh.m_triangleIndexStride; - indicestype = mesh.m_indexType; -} - -bool b3TriangleIndexVertexArray::hasPremadeAabb() const -{ - return (m_hasAabb == 1); -} - -void b3TriangleIndexVertexArray::setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const -{ - m_aabbMin = aabbMin; - m_aabbMax = aabbMax; - m_hasAabb = 1; // this is intentionally an int see notes in header -} - -void b3TriangleIndexVertexArray::getPremadeAabb(b3Vector3* aabbMin, b3Vector3* aabbMax) const -{ - *aabbMin = m_aabbMin; - *aabbMax = m_aabbMax; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h deleted file mode 100644 index 57cbf03dc2..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h +++ /dev/null @@ -1,128 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2009 Erwin Coumans http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_TRIANGLE_INDEX_VERTEX_ARRAY_H -#define B3_TRIANGLE_INDEX_VERTEX_ARRAY_H - -#include "b3StridingMeshInterface.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Scalar.h" - -///The b3IndexedMesh indexes a single vertex and index array. Multiple b3IndexedMesh objects can be passed into a b3TriangleIndexVertexArray using addIndexedMesh. -///Instead of the number of indices, we pass the number of triangles. -B3_ATTRIBUTE_ALIGNED16(struct) -b3IndexedMesh -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_numTriangles; - const unsigned char* m_triangleIndexBase; - // Size in byte of the indices for one triangle (3*sizeof(index_type) if the indices are tightly packed) - int m_triangleIndexStride; - int m_numVertices; - const unsigned char* m_vertexBase; - // Size of a vertex, in bytes - int m_vertexStride; - - // The index type is set when adding an indexed mesh to the - // b3TriangleIndexVertexArray, do not set it manually - PHY_ScalarType m_indexType; - - // The vertex type has a default type similar to Bullet's precision mode (float or double) - // but can be set manually if you for example run Bullet with double precision but have - // mesh data in single precision.. - PHY_ScalarType m_vertexType; - - b3IndexedMesh() - : m_indexType(PHY_INTEGER), -#ifdef B3_USE_DOUBLE_PRECISION - m_vertexType(PHY_DOUBLE) -#else // B3_USE_DOUBLE_PRECISION - m_vertexType(PHY_FLOAT) -#endif // B3_USE_DOUBLE_PRECISION - { - } -}; - -typedef b3AlignedObjectArray<b3IndexedMesh> IndexedMeshArray; - -///The b3TriangleIndexVertexArray allows to access multiple triangle meshes, by indexing into existing triangle/index arrays. -///Additional meshes can be added using addIndexedMesh -///No duplcate is made of the vertex/index data, it only indexes into external vertex/index arrays. -///So keep those arrays around during the lifetime of this b3TriangleIndexVertexArray. -B3_ATTRIBUTE_ALIGNED16(class) -b3TriangleIndexVertexArray : public b3StridingMeshInterface -{ -protected: - IndexedMeshArray m_indexedMeshes; - int m_pad[2]; - mutable int m_hasAabb; // using int instead of bool to maintain alignment - mutable b3Vector3 m_aabbMin; - mutable b3Vector3 m_aabbMax; - -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3TriangleIndexVertexArray() : m_hasAabb(0) - { - } - - virtual ~b3TriangleIndexVertexArray(); - - //just to be backwards compatible - b3TriangleIndexVertexArray(int numTriangles, int* triangleIndexBase, int triangleIndexStride, int numVertices, b3Scalar* vertexBase, int vertexStride); - - void addIndexedMesh(const b3IndexedMesh& mesh, PHY_ScalarType indexType = PHY_INTEGER) - { - m_indexedMeshes.push_back(mesh); - m_indexedMeshes[m_indexedMeshes.size() - 1].m_indexType = indexType; - } - - virtual void getLockedVertexIndexBase(unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0); - - virtual void getLockedReadOnlyVertexIndexBase(const unsigned char** vertexbase, int& numverts, PHY_ScalarType& type, int& vertexStride, const unsigned char** indexbase, int& indexstride, int& numfaces, PHY_ScalarType& indicestype, int subpart = 0) const; - - /// unLockVertexBase finishes the access to a subpart of the triangle mesh - /// make a call to unLockVertexBase when the read and write access (using getLockedVertexIndexBase) is finished - virtual void unLockVertexBase(int subpart) { (void)subpart; } - - virtual void unLockReadOnlyVertexBase(int subpart) const { (void)subpart; } - - /// getNumSubParts returns the number of separate subparts - /// each subpart has a continuous array of vertices and indices - virtual int getNumSubParts() const - { - return (int)m_indexedMeshes.size(); - } - - IndexedMeshArray& getIndexedMeshArray() - { - return m_indexedMeshes; - } - - const IndexedMeshArray& getIndexedMeshArray() const - { - return m_indexedMeshes; - } - - virtual void preallocateVertices(int numverts) { (void)numverts; } - virtual void preallocateIndices(int numindices) { (void)numindices; } - - virtual bool hasPremadeAabb() const; - virtual void setPremadeAabb(const b3Vector3& aabbMin, const b3Vector3& aabbMax) const; - virtual void getPremadeAabb(b3Vector3 * aabbMin, b3Vector3 * aabbMax) const; -}; - -#endif //B3_TRIANGLE_INDEX_VERTEX_ARRAY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h deleted file mode 100644 index 5cc4b5a626..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VectorFloat4.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef B3_VECTOR_FLOAT4_H -#define B3_VECTOR_FLOAT4_H - -#include "Bullet3Common/b3Transform.h" - -//#define cross3(a,b) (a.cross(b)) -#define float4 b3Vector3 -//#define make_float4(x,y,z,w) b3Vector4(x,y,z,w) - -#endif //B3_VECTOR_FLOAT4_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp deleted file mode 100644 index 8b0a834efe..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.cpp +++ /dev/null @@ -1,574 +0,0 @@ - -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. - - Elsevier CDROM license agreements grants nonexclusive license to use the software - for any purpose, commercial or non-commercial as long as the following credit is included - identifying the original source of the software: - - Parts of the source are "from the book Real-Time Collision Detection by - Christer Ericson, published by Morgan Kaufmann Publishers, - (c) 2005 Elsevier Inc." - -*/ - -#include "b3VoronoiSimplexSolver.h" - -#define VERTA 0 -#define VERTB 1 -#define VERTC 2 -#define VERTD 3 - -#define B3_CATCH_DEGENERATE_TETRAHEDRON 1 -void b3VoronoiSimplexSolver::removeVertex(int index) -{ - b3Assert(m_numVertices > 0); - m_numVertices--; - m_simplexVectorW[index] = m_simplexVectorW[m_numVertices]; - m_simplexPointsP[index] = m_simplexPointsP[m_numVertices]; - m_simplexPointsQ[index] = m_simplexPointsQ[m_numVertices]; -} - -void b3VoronoiSimplexSolver::reduceVertices(const b3UsageBitfield& usedVerts) -{ - if ((numVertices() >= 4) && (!usedVerts.usedVertexD)) - removeVertex(3); - - if ((numVertices() >= 3) && (!usedVerts.usedVertexC)) - removeVertex(2); - - if ((numVertices() >= 2) && (!usedVerts.usedVertexB)) - removeVertex(1); - - if ((numVertices() >= 1) && (!usedVerts.usedVertexA)) - removeVertex(0); -} - -//clear the simplex, remove all the vertices -void b3VoronoiSimplexSolver::reset() -{ - m_cachedValidClosest = false; - m_numVertices = 0; - m_needsUpdate = true; - m_lastW = b3MakeVector3(b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT), b3Scalar(B3_LARGE_FLOAT)); - m_cachedBC.reset(); -} - -//add a vertex -void b3VoronoiSimplexSolver::addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q) -{ - m_lastW = w; - m_needsUpdate = true; - - m_simplexVectorW[m_numVertices] = w; - m_simplexPointsP[m_numVertices] = p; - m_simplexPointsQ[m_numVertices] = q; - - m_numVertices++; -} - -bool b3VoronoiSimplexSolver::updateClosestVectorAndPoints() -{ - if (m_needsUpdate) - { - m_cachedBC.reset(); - - m_needsUpdate = false; - - switch (numVertices()) - { - case 0: - m_cachedValidClosest = false; - break; - case 1: - { - m_cachedP1 = m_simplexPointsP[0]; - m_cachedP2 = m_simplexPointsQ[0]; - m_cachedV = m_cachedP1 - m_cachedP2; //== m_simplexVectorW[0] - m_cachedBC.reset(); - m_cachedBC.setBarycentricCoordinates(b3Scalar(1.), b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - m_cachedValidClosest = m_cachedBC.isValid(); - break; - }; - case 2: - { - //closest point origin from line segment - const b3Vector3& from = m_simplexVectorW[0]; - const b3Vector3& to = m_simplexVectorW[1]; - b3Vector3 nearest; - - b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - b3Vector3 diff = p - from; - b3Vector3 v = to - from; - b3Scalar t = v.dot(diff); - - if (t > 0) - { - b3Scalar dotVV = v.dot(v); - if (t < dotVV) - { - t /= dotVV; - diff -= t * v; - m_cachedBC.m_usedVertices.usedVertexA = true; - m_cachedBC.m_usedVertices.usedVertexB = true; - } - else - { - t = 1; - diff -= v; - //reduce to 1 point - m_cachedBC.m_usedVertices.usedVertexB = true; - } - } - else - { - t = 0; - //reduce to 1 point - m_cachedBC.m_usedVertices.usedVertexA = true; - } - m_cachedBC.setBarycentricCoordinates(1 - t, t); - nearest = from + t * v; - - m_cachedP1 = m_simplexPointsP[0] + t * (m_simplexPointsP[1] - m_simplexPointsP[0]); - m_cachedP2 = m_simplexPointsQ[0] + t * (m_simplexPointsQ[1] - m_simplexPointsQ[0]); - m_cachedV = m_cachedP1 - m_cachedP2; - - reduceVertices(m_cachedBC.m_usedVertices); - - m_cachedValidClosest = m_cachedBC.isValid(); - break; - } - case 3: - { - //closest point origin from triangle - b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - - const b3Vector3& a = m_simplexVectorW[0]; - const b3Vector3& b = m_simplexVectorW[1]; - const b3Vector3& c = m_simplexVectorW[2]; - - closestPtPointTriangle(p, a, b, c, m_cachedBC); - m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2]; - - m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2]; - - m_cachedV = m_cachedP1 - m_cachedP2; - - reduceVertices(m_cachedBC.m_usedVertices); - m_cachedValidClosest = m_cachedBC.isValid(); - - break; - } - case 4: - { - b3Vector3 p = b3MakeVector3(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - - const b3Vector3& a = m_simplexVectorW[0]; - const b3Vector3& b = m_simplexVectorW[1]; - const b3Vector3& c = m_simplexVectorW[2]; - const b3Vector3& d = m_simplexVectorW[3]; - - bool hasSeparation = closestPtPointTetrahedron(p, a, b, c, d, m_cachedBC); - - if (hasSeparation) - { - m_cachedP1 = m_simplexPointsP[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsP[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsP[2] * m_cachedBC.m_barycentricCoords[2] + - m_simplexPointsP[3] * m_cachedBC.m_barycentricCoords[3]; - - m_cachedP2 = m_simplexPointsQ[0] * m_cachedBC.m_barycentricCoords[0] + - m_simplexPointsQ[1] * m_cachedBC.m_barycentricCoords[1] + - m_simplexPointsQ[2] * m_cachedBC.m_barycentricCoords[2] + - m_simplexPointsQ[3] * m_cachedBC.m_barycentricCoords[3]; - - m_cachedV = m_cachedP1 - m_cachedP2; - reduceVertices(m_cachedBC.m_usedVertices); - } - else - { - // printf("sub distance got penetration\n"); - - if (m_cachedBC.m_degenerate) - { - m_cachedValidClosest = false; - } - else - { - m_cachedValidClosest = true; - //degenerate case == false, penetration = true + zero - m_cachedV.setValue(b3Scalar(0.), b3Scalar(0.), b3Scalar(0.)); - } - break; - } - - m_cachedValidClosest = m_cachedBC.isValid(); - - //closest point origin from tetrahedron - break; - } - default: - { - m_cachedValidClosest = false; - } - }; - } - - return m_cachedValidClosest; -} - -//return/calculate the closest vertex -bool b3VoronoiSimplexSolver::closest(b3Vector3& v) -{ - bool succes = updateClosestVectorAndPoints(); - v = m_cachedV; - return succes; -} - -b3Scalar b3VoronoiSimplexSolver::maxVertex() -{ - int i, numverts = numVertices(); - b3Scalar maxV = b3Scalar(0.); - for (i = 0; i < numverts; i++) - { - b3Scalar curLen2 = m_simplexVectorW[i].length2(); - if (maxV < curLen2) - maxV = curLen2; - } - return maxV; -} - -//return the current simplex -int b3VoronoiSimplexSolver::getSimplex(b3Vector3* pBuf, b3Vector3* qBuf, b3Vector3* yBuf) const -{ - int i; - for (i = 0; i < numVertices(); i++) - { - yBuf[i] = m_simplexVectorW[i]; - pBuf[i] = m_simplexPointsP[i]; - qBuf[i] = m_simplexPointsQ[i]; - } - return numVertices(); -} - -bool b3VoronoiSimplexSolver::inSimplex(const b3Vector3& w) -{ - bool found = false; - int i, numverts = numVertices(); - //b3Scalar maxV = b3Scalar(0.); - - //w is in the current (reduced) simplex - for (i = 0; i < numverts; i++) - { -#ifdef BT_USE_EQUAL_VERTEX_THRESHOLD - if (m_simplexVectorW[i].distance2(w) <= m_equalVertexThreshold) -#else - if (m_simplexVectorW[i] == w) -#endif - found = true; - } - - //check in case lastW is already removed - if (w == m_lastW) - return true; - - return found; -} - -void b3VoronoiSimplexSolver::backup_closest(b3Vector3& v) -{ - v = m_cachedV; -} - -bool b3VoronoiSimplexSolver::emptySimplex() const -{ - return (numVertices() == 0); -} - -void b3VoronoiSimplexSolver::compute_points(b3Vector3& p1, b3Vector3& p2) -{ - updateClosestVectorAndPoints(); - p1 = m_cachedP1; - p2 = m_cachedP2; -} - -bool b3VoronoiSimplexSolver::closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, b3SubSimplexClosestResult& result) -{ - result.m_usedVertices.reset(); - - // Check if P in vertex region outside A - b3Vector3 ab = b - a; - b3Vector3 ac = c - a; - b3Vector3 ap = p - a; - b3Scalar d1 = ab.dot(ap); - b3Scalar d2 = ac.dot(ap); - if (d1 <= b3Scalar(0.0) && d2 <= b3Scalar(0.0)) - { - result.m_closestPointOnSimplex = a; - result.m_usedVertices.usedVertexA = true; - result.setBarycentricCoordinates(1, 0, 0); - return true; // a; // barycentric coordinates (1,0,0) - } - - // Check if P in vertex region outside B - b3Vector3 bp = p - b; - b3Scalar d3 = ab.dot(bp); - b3Scalar d4 = ac.dot(bp); - if (d3 >= b3Scalar(0.0) && d4 <= d3) - { - result.m_closestPointOnSimplex = b; - result.m_usedVertices.usedVertexB = true; - result.setBarycentricCoordinates(0, 1, 0); - - return true; // b; // barycentric coordinates (0,1,0) - } - // Check if P in edge region of AB, if so return projection of P onto AB - b3Scalar vc = d1 * d4 - d3 * d2; - if (vc <= b3Scalar(0.0) && d1 >= b3Scalar(0.0) && d3 <= b3Scalar(0.0)) - { - b3Scalar v = d1 / (d1 - d3); - result.m_closestPointOnSimplex = a + v * ab; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexB = true; - result.setBarycentricCoordinates(1 - v, v, 0); - return true; - //return a + v * ab; // barycentric coordinates (1-v,v,0) - } - - // Check if P in vertex region outside C - b3Vector3 cp = p - c; - b3Scalar d5 = ab.dot(cp); - b3Scalar d6 = ac.dot(cp); - if (d6 >= b3Scalar(0.0) && d5 <= d6) - { - result.m_closestPointOnSimplex = c; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(0, 0, 1); - return true; //c; // barycentric coordinates (0,0,1) - } - - // Check if P in edge region of AC, if so return projection of P onto AC - b3Scalar vb = d5 * d2 - d1 * d6; - if (vb <= b3Scalar(0.0) && d2 >= b3Scalar(0.0) && d6 <= b3Scalar(0.0)) - { - b3Scalar w = d2 / (d2 - d6); - result.m_closestPointOnSimplex = a + w * ac; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(1 - w, 0, w); - return true; - //return a + w * ac; // barycentric coordinates (1-w,0,w) - } - - // Check if P in edge region of BC, if so return projection of P onto BC - b3Scalar va = d3 * d6 - d5 * d4; - if (va <= b3Scalar(0.0) && (d4 - d3) >= b3Scalar(0.0) && (d5 - d6) >= b3Scalar(0.0)) - { - b3Scalar w = (d4 - d3) / ((d4 - d3) + (d5 - d6)); - - result.m_closestPointOnSimplex = b + w * (c - b); - result.m_usedVertices.usedVertexB = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(0, 1 - w, w); - return true; - // return b + w * (c - b); // barycentric coordinates (0,1-w,w) - } - - // P inside face region. Compute Q through its barycentric coordinates (u,v,w) - b3Scalar denom = b3Scalar(1.0) / (va + vb + vc); - b3Scalar v = vb * denom; - b3Scalar w = vc * denom; - - result.m_closestPointOnSimplex = a + ab * v + ac * w; - result.m_usedVertices.usedVertexA = true; - result.m_usedVertices.usedVertexB = true; - result.m_usedVertices.usedVertexC = true; - result.setBarycentricCoordinates(1 - v - w, v, w); - - return true; - // return a + ab * v + ac * w; // = u*a + v*b + w*c, u = va * denom = b3Scalar(1.0) - v - w -} - -/// Test if point p and d lie on opposite sides of plane through abc -int b3VoronoiSimplexSolver::pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d) -{ - b3Vector3 normal = (b - a).cross(c - a); - - b3Scalar signp = (p - a).dot(normal); // [AP AB AC] - b3Scalar signd = (d - a).dot(normal); // [AD AB AC] - -#ifdef B3_CATCH_DEGENERATE_TETRAHEDRON -#ifdef BT_USE_DOUBLE_PRECISION - if (signd * signd < (b3Scalar(1e-8) * b3Scalar(1e-8))) - { - return -1; - } -#else - if (signd * signd < (b3Scalar(1e-4) * b3Scalar(1e-4))) - { - // printf("affine dependent/degenerate\n");// - return -1; - } -#endif - -#endif - // Points on opposite sides if expression signs are opposite - return signp * signd < b3Scalar(0.); -} - -bool b3VoronoiSimplexSolver::closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult) -{ - b3SubSimplexClosestResult tempResult; - - // Start out assuming point inside all halfspaces, so closest to itself - finalResult.m_closestPointOnSimplex = p; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = true; - finalResult.m_usedVertices.usedVertexB = true; - finalResult.m_usedVertices.usedVertexC = true; - finalResult.m_usedVertices.usedVertexD = true; - - int pointOutsideABC = pointOutsideOfPlane(p, a, b, c, d); - int pointOutsideACD = pointOutsideOfPlane(p, a, c, d, b); - int pointOutsideADB = pointOutsideOfPlane(p, a, d, b, c); - int pointOutsideBDC = pointOutsideOfPlane(p, b, d, c, a); - - if (pointOutsideABC < 0 || pointOutsideACD < 0 || pointOutsideADB < 0 || pointOutsideBDC < 0) - { - finalResult.m_degenerate = true; - return false; - } - - if (!pointOutsideABC && !pointOutsideACD && !pointOutsideADB && !pointOutsideBDC) - { - return false; - } - - b3Scalar bestSqDist = FLT_MAX; - // If point outside face abc then compute closest point on abc - if (pointOutsideABC) - { - closestPtPointTriangle(p, a, b, c, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - - b3Scalar sqDist = (q - p).dot(q - p); - // Update best closest point if (squared) distance is less than current best - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - //convert result bitmask! - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexB; - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTB], - tempResult.m_barycentricCoords[VERTC], - 0); - } - } - - // Repeat test for face acd - if (pointOutsideACD) - { - closestPtPointTriangle(p, a, c, d, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - - b3Scalar sqDist = (q - p).dot(q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexB; - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexC; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - 0, - tempResult.m_barycentricCoords[VERTB], - tempResult.m_barycentricCoords[VERTC]); - } - } - // Repeat test for face adb - - if (pointOutsideADB) - { - closestPtPointTriangle(p, a, d, b, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - - b3Scalar sqDist = (q - p).dot(q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - finalResult.m_usedVertices.usedVertexA = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexC; - - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; - finalResult.setBarycentricCoordinates( - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTC], - 0, - tempResult.m_barycentricCoords[VERTB]); - } - } - // Repeat test for face bdc - - if (pointOutsideBDC) - { - closestPtPointTriangle(p, b, d, c, tempResult); - b3Vector3 q = tempResult.m_closestPointOnSimplex; - //convert result bitmask! - b3Scalar sqDist = (q - p).dot(q - p); - if (sqDist < bestSqDist) - { - bestSqDist = sqDist; - finalResult.m_closestPointOnSimplex = q; - finalResult.m_usedVertices.reset(); - // - finalResult.m_usedVertices.usedVertexB = tempResult.m_usedVertices.usedVertexA; - finalResult.m_usedVertices.usedVertexC = tempResult.m_usedVertices.usedVertexC; - finalResult.m_usedVertices.usedVertexD = tempResult.m_usedVertices.usedVertexB; - - finalResult.setBarycentricCoordinates( - 0, - tempResult.m_barycentricCoords[VERTA], - tempResult.m_barycentricCoords[VERTC], - tempResult.m_barycentricCoords[VERTB]); - } - } - - //help! we ended up full ! - - if (finalResult.m_usedVertices.usedVertexA && - finalResult.m_usedVertices.usedVertexB && - finalResult.m_usedVertices.usedVertexC && - finalResult.m_usedVertices.usedVertexD) - { - return true; - } - - return true; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h deleted file mode 100644 index b40b169978..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3VoronoiSimplexSolver.h +++ /dev/null @@ -1,164 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2003-2006 Erwin Coumans http://continuousphysics.com/Bullet/ - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_VORONOI_SIMPLEX_SOLVER_H -#define B3_VORONOI_SIMPLEX_SOLVER_H - -#include "Bullet3Common/b3Vector3.h" - -#define VORONOI_SIMPLEX_MAX_VERTS 5 - -///disable next define, or use defaultCollisionConfiguration->getSimplexSolver()->setEqualVertexThreshold(0.f) to disable/configure -//#define BT_USE_EQUAL_VERTEX_THRESHOLD -#define VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD 0.0001f - -struct b3UsageBitfield -{ - b3UsageBitfield() - { - reset(); - } - - void reset() - { - usedVertexA = false; - usedVertexB = false; - usedVertexC = false; - usedVertexD = false; - } - unsigned short usedVertexA : 1; - unsigned short usedVertexB : 1; - unsigned short usedVertexC : 1; - unsigned short usedVertexD : 1; - unsigned short unused1 : 1; - unsigned short unused2 : 1; - unsigned short unused3 : 1; - unsigned short unused4 : 1; -}; - -struct b3SubSimplexClosestResult -{ - b3Vector3 m_closestPointOnSimplex; - //MASK for m_usedVertices - //stores the simplex vertex-usage, using the MASK, - // if m_usedVertices & MASK then the related vertex is used - b3UsageBitfield m_usedVertices; - b3Scalar m_barycentricCoords[4]; - bool m_degenerate; - - void reset() - { - m_degenerate = false; - setBarycentricCoordinates(); - m_usedVertices.reset(); - } - bool isValid() - { - bool valid = (m_barycentricCoords[0] >= b3Scalar(0.)) && - (m_barycentricCoords[1] >= b3Scalar(0.)) && - (m_barycentricCoords[2] >= b3Scalar(0.)) && - (m_barycentricCoords[3] >= b3Scalar(0.)); - - return valid; - } - void setBarycentricCoordinates(b3Scalar a = b3Scalar(0.), b3Scalar b = b3Scalar(0.), b3Scalar c = b3Scalar(0.), b3Scalar d = b3Scalar(0.)) - { - m_barycentricCoords[0] = a; - m_barycentricCoords[1] = b; - m_barycentricCoords[2] = c; - m_barycentricCoords[3] = d; - } -}; - -/// b3VoronoiSimplexSolver is an implementation of the closest point distance algorithm from a 1-4 points simplex to the origin. -/// Can be used with GJK, as an alternative to Johnson distance algorithm. - -B3_ATTRIBUTE_ALIGNED16(class) -b3VoronoiSimplexSolver -{ -public: - B3_DECLARE_ALIGNED_ALLOCATOR(); - - int m_numVertices; - - b3Vector3 m_simplexVectorW[VORONOI_SIMPLEX_MAX_VERTS]; - b3Vector3 m_simplexPointsP[VORONOI_SIMPLEX_MAX_VERTS]; - b3Vector3 m_simplexPointsQ[VORONOI_SIMPLEX_MAX_VERTS]; - - b3Vector3 m_cachedP1; - b3Vector3 m_cachedP2; - b3Vector3 m_cachedV; - b3Vector3 m_lastW; - - b3Scalar m_equalVertexThreshold; - bool m_cachedValidClosest; - - b3SubSimplexClosestResult m_cachedBC; - - bool m_needsUpdate; - - void removeVertex(int index); - void reduceVertices(const b3UsageBitfield& usedVerts); - bool updateClosestVectorAndPoints(); - - bool closestPtPointTetrahedron(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d, b3SubSimplexClosestResult& finalResult); - int pointOutsideOfPlane(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, const b3Vector3& d); - bool closestPtPointTriangle(const b3Vector3& p, const b3Vector3& a, const b3Vector3& b, const b3Vector3& c, b3SubSimplexClosestResult& result); - -public: - b3VoronoiSimplexSolver() - : m_equalVertexThreshold(VORONOI_DEFAULT_EQUAL_VERTEX_THRESHOLD) - { - } - void reset(); - - void addVertex(const b3Vector3& w, const b3Vector3& p, const b3Vector3& q); - - void setEqualVertexThreshold(b3Scalar threshold) - { - m_equalVertexThreshold = threshold; - } - - b3Scalar getEqualVertexThreshold() const - { - return m_equalVertexThreshold; - } - - bool closest(b3Vector3 & v); - - b3Scalar maxVertex(); - - bool fullSimplex() const - { - return (m_numVertices == 4); - } - - int getSimplex(b3Vector3 * pBuf, b3Vector3 * qBuf, b3Vector3 * yBuf) const; - - bool inSimplex(const b3Vector3& w); - - void backup_closest(b3Vector3 & v); - - bool emptySimplex() const; - - void compute_points(b3Vector3 & p1, b3Vector3 & p2); - - int numVertices() const - { - return m_numVertices; - } -}; - -#endif //B3_VORONOI_SIMPLEX_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl deleted file mode 100644 index faa413441c..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.cl +++ /dev/null @@ -1,283 +0,0 @@ -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - -typedef unsigned int u32; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///btQuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} btQuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - -int getTriangleIndex(const btQuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeaf(const btQuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const btQuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} btBvhSubtreeInfo; - -///keep this in sync with btCollidable.h -typedef struct -{ - int m_numChildShapes; - int blaat2; - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -int testQuantizedAabbAgainstQuantizedAabb( - const unsigned short int* aabbMin1, - const unsigned short int* aabbMax1, - const unsigned short int* aabbMin2, - const unsigned short int* aabbMax2) -{ - //int overlap = 1; - if (aabbMin1[0] > aabbMax2[0]) - return 0; - if (aabbMax1[0] < aabbMin2[0]) - return 0; - if (aabbMin1[1] > aabbMax2[1]) - return 0; - if (aabbMax1[1] < aabbMin2[1]) - return 0; - if (aabbMin1[2] > aabbMax2[2]) - return 0; - if (aabbMax1[2] < aabbMin2[2]) - return 0; - return 1; - //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap; - //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap; - //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap; - //return overlap; -} - - -void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization) -{ - float4 clampedPoint = max(point2,bvhAabbMin); - clampedPoint = min (clampedPoint, bvhAabbMax); - - float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization; - if (isMax) - { - out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1)); - out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1)); - out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1)); - } else - { - out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe)); - out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe)); - out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe)); - } - -} - - -// work-in-progress -__kernel void bvhTraversalKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global btAabbCL* aabbs, - __global int4* concavePairsOut, - __global volatile int* numConcavePairsOut, - __global const btBvhSubtreeInfo* subtreeHeadersRoot, - __global const btQuantizedBvhNode* quantizedNodesRoot, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumConcavePairsCapacity) -{ - int id = get_global_id(0); - if (id>=numPairs) - return; - - int bodyIndexA = pairs[id].x; - int bodyIndexB = pairs[id].y; - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - return; - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - if (shapeTypeB!=SHAPE_CONVEX_HULL && - shapeTypeB!=SHAPE_SPHERE && - shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS - ) - return; - - b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes]; - - float4 bvhAabbMin = bvhInfo.m_aabbMin; - float4 bvhAabbMax = bvhInfo.m_aabbMax; - float4 bvhQuantization = bvhInfo.m_quantization; - int numSubtreeHeaders = bvhInfo.m_numSubTrees; - __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset]; - __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset]; - - - unsigned short int quantizedQueryAabbMin[3]; - unsigned short int quantizedQueryAabbMax[3]; - quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization); - quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization); - - for (int i=0;i<numSubtreeHeaders;i++) - { - btBvhSubtreeInfo subtree = subtreeHeaders[i]; - - int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax); - if (overlap != 0) - { - int startNodeIndex = subtree.m_rootNodeIndex; - int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize; - int curIndex = startNodeIndex; - int escapeIndex; - int isLeafNode; - int aabbOverlap; - while (curIndex < endNodeIndex) - { - btQuantizedBvhNode rootNode = quantizedNodes[curIndex]; - aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax); - isLeafNode = isLeaf(&rootNode); - if (aabbOverlap) - { - if (isLeafNode) - { - int triangleIndex = getTriangleIndex(&rootNode); - if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - int pairIdx = atomic_add(numConcavePairsOut,numChildrenB); - for (int b=0;b<numChildrenB;b++) - { - if ((pairIdx+b)<maxNumConcavePairsCapacity) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB); - concavePairsOut[pairIdx+b] = newPair; - } - } - } else - { - int pairIdx = atomic_inc(numConcavePairsOut); - if (pairIdx<maxNumConcavePairsCapacity) - { - int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0); - concavePairsOut[pairIdx] = newPair; - } - } - } - curIndex++; - } else - { - if (isLeafNode) - { - curIndex++; - } else - { - escapeIndex = getEscapeIndex(&rootNode); - curIndex += escapeIndex; - } - } - } - } - } - -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h deleted file mode 100644 index f1df8a6970..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/bvhTraversal.h +++ /dev/null @@ -1,257 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* bvhTraversalKernelCL = - "//keep this enum in sync with the CPU version (in btCollidable.h)\n" - "//written by Erwin Coumans\n" - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define TRIANGLE_NUM_CONVEX_FACES 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define SHAPE_SPHERE 7\n" - "typedef unsigned int u32;\n" - "#define MAX_NUM_PARTS_IN_BITS 10\n" - "///btQuantizedBvhNode is a compressed aabb node, 16 bytes.\n" - "///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes\n" - " int m_escapeIndexOrTriangleIndex;\n" - "} btQuantizedBvhNode;\n" - "typedef struct\n" - "{\n" - " float4 m_aabbMin;\n" - " float4 m_aabbMax;\n" - " float4 m_quantization;\n" - " int m_numNodes;\n" - " int m_numSubTrees;\n" - " int m_nodeOffset;\n" - " int m_subTreeOffset;\n" - "} b3BvhInfo;\n" - "int getTriangleIndex(const btQuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int isLeaf(const btQuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - " \n" - "int getEscapeIndex(const btQuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes, points to the root of the subtree\n" - " int m_rootNodeIndex;\n" - " //4 bytes\n" - " int m_subtreeSize;\n" - " int m_padding[3];\n" - "} btBvhSubtreeInfo;\n" - "///keep this in sync with btCollidable.h\n" - "typedef struct\n" - "{\n" - " int m_numChildShapes;\n" - " int blaat2;\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - " \n" - "} btCollidableGpu;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "int testQuantizedAabbAgainstQuantizedAabb(\n" - " const unsigned short int* aabbMin1,\n" - " const unsigned short int* aabbMax1,\n" - " const unsigned short int* aabbMin2,\n" - " const unsigned short int* aabbMax2)\n" - "{\n" - " //int overlap = 1;\n" - " if (aabbMin1[0] > aabbMax2[0])\n" - " return 0;\n" - " if (aabbMax1[0] < aabbMin2[0])\n" - " return 0;\n" - " if (aabbMin1[1] > aabbMax2[1])\n" - " return 0;\n" - " if (aabbMax1[1] < aabbMin2[1])\n" - " return 0;\n" - " if (aabbMin1[2] > aabbMax2[2])\n" - " return 0;\n" - " if (aabbMax1[2] < aabbMin2[2])\n" - " return 0;\n" - " return 1;\n" - " //overlap = ((aabbMin1[0] > aabbMax2[0]) || (aabbMax1[0] < aabbMin2[0])) ? 0 : overlap;\n" - " //overlap = ((aabbMin1[2] > aabbMax2[2]) || (aabbMax1[2] < aabbMin2[2])) ? 0 : overlap;\n" - " //overlap = ((aabbMin1[1] > aabbMax2[1]) || (aabbMax1[1] < aabbMin2[1])) ? 0 : overlap;\n" - " //return overlap;\n" - "}\n" - "void quantizeWithClamp(unsigned short* out, float4 point2,int isMax, float4 bvhAabbMin, float4 bvhAabbMax, float4 bvhQuantization)\n" - "{\n" - " float4 clampedPoint = max(point2,bvhAabbMin);\n" - " clampedPoint = min (clampedPoint, bvhAabbMax);\n" - " float4 v = (clampedPoint - bvhAabbMin) * bvhQuantization;\n" - " if (isMax)\n" - " {\n" - " out[0] = (unsigned short) (((unsigned short)(v.x+1.f) | 1));\n" - " out[1] = (unsigned short) (((unsigned short)(v.y+1.f) | 1));\n" - " out[2] = (unsigned short) (((unsigned short)(v.z+1.f) | 1));\n" - " } else\n" - " {\n" - " out[0] = (unsigned short) (((unsigned short)(v.x) & 0xfffe));\n" - " out[1] = (unsigned short) (((unsigned short)(v.y) & 0xfffe));\n" - " out[2] = (unsigned short) (((unsigned short)(v.z) & 0xfffe));\n" - " }\n" - "}\n" - "// work-in-progress\n" - "__kernel void bvhTraversalKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global btAabbCL* aabbs,\n" - " __global int4* concavePairsOut,\n" - " __global volatile int* numConcavePairsOut,\n" - " __global const btBvhSubtreeInfo* subtreeHeadersRoot,\n" - " __global const btQuantizedBvhNode* quantizedNodesRoot,\n" - " __global const b3BvhInfo* bvhInfos,\n" - " int numPairs,\n" - " int maxNumConcavePairsCapacity)\n" - "{\n" - " int id = get_global_id(0);\n" - " if (id>=numPairs)\n" - " return;\n" - " \n" - " int bodyIndexA = pairs[id].x;\n" - " int bodyIndexB = pairs[id].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " //once the broadphase avoids static-static pairs, we can remove this test\n" - " if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" - " {\n" - " return;\n" - " }\n" - " \n" - " if (collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH)\n" - " return;\n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " \n" - " if (shapeTypeB!=SHAPE_CONVEX_HULL &&\n" - " shapeTypeB!=SHAPE_SPHERE &&\n" - " shapeTypeB!=SHAPE_COMPOUND_OF_CONVEX_HULLS\n" - " )\n" - " return;\n" - " b3BvhInfo bvhInfo = bvhInfos[collidables[collidableIndexA].m_numChildShapes];\n" - " float4 bvhAabbMin = bvhInfo.m_aabbMin;\n" - " float4 bvhAabbMax = bvhInfo.m_aabbMax;\n" - " float4 bvhQuantization = bvhInfo.m_quantization;\n" - " int numSubtreeHeaders = bvhInfo.m_numSubTrees;\n" - " __global const btBvhSubtreeInfo* subtreeHeaders = &subtreeHeadersRoot[bvhInfo.m_subTreeOffset];\n" - " __global const btQuantizedBvhNode* quantizedNodes = &quantizedNodesRoot[bvhInfo.m_nodeOffset];\n" - " \n" - " unsigned short int quantizedQueryAabbMin[3];\n" - " unsigned short int quantizedQueryAabbMax[3];\n" - " quantizeWithClamp(quantizedQueryAabbMin,aabbs[bodyIndexB].m_min,false,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" - " quantizeWithClamp(quantizedQueryAabbMax,aabbs[bodyIndexB].m_max,true ,bvhAabbMin, bvhAabbMax,bvhQuantization);\n" - " \n" - " for (int i=0;i<numSubtreeHeaders;i++)\n" - " {\n" - " btBvhSubtreeInfo subtree = subtreeHeaders[i];\n" - " \n" - " int overlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,subtree.m_quantizedAabbMin,subtree.m_quantizedAabbMax);\n" - " if (overlap != 0)\n" - " {\n" - " int startNodeIndex = subtree.m_rootNodeIndex;\n" - " int endNodeIndex = subtree.m_rootNodeIndex+subtree.m_subtreeSize;\n" - " int curIndex = startNodeIndex;\n" - " int escapeIndex;\n" - " int isLeafNode;\n" - " int aabbOverlap;\n" - " while (curIndex < endNodeIndex)\n" - " {\n" - " btQuantizedBvhNode rootNode = quantizedNodes[curIndex];\n" - " aabbOverlap = testQuantizedAabbAgainstQuantizedAabb(quantizedQueryAabbMin,quantizedQueryAabbMax,rootNode.m_quantizedAabbMin,rootNode.m_quantizedAabbMax);\n" - " isLeafNode = isLeaf(&rootNode);\n" - " if (aabbOverlap)\n" - " {\n" - " if (isLeafNode)\n" - " {\n" - " int triangleIndex = getTriangleIndex(&rootNode);\n" - " if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" - " int pairIdx = atomic_add(numConcavePairsOut,numChildrenB);\n" - " for (int b=0;b<numChildrenB;b++)\n" - " {\n" - " if ((pairIdx+b)<maxNumConcavePairsCapacity)\n" - " {\n" - " int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" - " int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,childShapeIndexB);\n" - " concavePairsOut[pairIdx+b] = newPair;\n" - " }\n" - " }\n" - " } else\n" - " {\n" - " int pairIdx = atomic_inc(numConcavePairsOut);\n" - " if (pairIdx<maxNumConcavePairsCapacity)\n" - " {\n" - " int4 newPair = (int4)(bodyIndexA,bodyIndexB,triangleIndex,0);\n" - " concavePairsOut[pairIdx] = newPair;\n" - " }\n" - " }\n" - " } \n" - " curIndex++;\n" - " } else\n" - " {\n" - " if (isLeafNode)\n" - " {\n" - " curIndex++;\n" - " } else\n" - " {\n" - " escapeIndex = getEscapeIndex(&rootNode);\n" - " curIndex += escapeIndex;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl deleted file mode 100644 index e754f4e1da..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mpr.cl +++ /dev/null @@ -1,311 +0,0 @@ - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3MprPenetration.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#define AppendInc(x, out) out = atomic_inc(x) -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w -#ifdef cl_ext_atomic_counters_32 - #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else - #define counter32_t volatile __global int* -#endif - - -__kernel void mprPenetrationKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int contactCapacity, - int numPairs) -{ - int i = get_global_id(0); - int pairIndex = i; - if (i<numPairs) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - return; - } - - float depthOut; - b3Float4 dirOut; - b3Float4 posOut; - - - int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut); - - - - - - if (res==0) - { - //add a contact - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx<contactCapacity) - { - pairs[pairIndex].z = dstIdx; - __global struct b3Contact4Data* c = globalContactsOut + dstIdx; - c->m_worldNormalOnB = -dirOut;//normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - //for (int i=0;i<nContacts;i++) - posOut.w = -depthOut; - c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]]; - GET_NPOINTS(*c) = 1;//nContacts; - } - } - - } -} - -typedef float4 Quaternion; -#define make_float4 (float4) - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - -inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - - -bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - float4* sep, - float* dmin) -{ - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test unit sphere directions - for (int i=0;i<numUnitSphereDirections;i++) - { - - float4 crossje; - crossje = unitSphereDirections[i]; - - if (dot3F4(DeltaC2,crossje)>0) - crossje *= -1.f; - { - float dist; - bool result = true; - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* unitSphereDirections, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global float* dmins, - int numUnitSphereDirections, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = dmins[i]; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = separatingNormals[i]; - - int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; - if (numEdgeEdgeDirections>numUnitSphereDirections) - { - bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - } //if (hasSeparatingAxis[i]) - }//(i<numPairs) -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h deleted file mode 100644 index 74959a931c..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/mprKernels.h +++ /dev/null @@ -1,1445 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* mprKernelsCL = - "/***\n" - " * ---------------------------------\n" - " * Copyright (c)2012 Daniel Fiser <danfis@danfis.cz>\n" - " *\n" - " * This file was ported from mpr.c file, part of libccd.\n" - " * The Minkoski Portal Refinement implementation was ported \n" - " * to OpenCL by Erwin Coumans for the Bullet 3 Physics library.\n" - " * at http://github.com/erwincoumans/bullet3\n" - " *\n" - " * Distributed under the OSI-approved BSD License (the \"License\");\n" - " * see <http://www.opensource.org/licenses/bsd-license.php>.\n" - " * This software is distributed WITHOUT ANY WARRANTY; without even the\n" - " * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n" - " * See the License for more information.\n" - " */\n" - "#ifndef B3_MPR_PENETRATION_H\n" - "#define B3_MPR_PENETRATION_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" - "#define B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "typedef struct b3GpuFace b3GpuFace_t;\n" - "struct b3GpuFace\n" - "{\n" - " b3Float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - " int m_unusedPadding1;\n" - " int m_unusedPadding2;\n" - "};\n" - "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" - "struct b3ConvexPolyhedronData\n" - "{\n" - " b3Float4 m_localCenter;\n" - " b3Float4 m_extents;\n" - " b3Float4 mC;\n" - " b3Float4 mE;\n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "};\n" - "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_COLLIDABLE_H\n" - "#define B3_COLLIDABLE_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "enum b3ShapeTypes\n" - "{\n" - " SHAPE_HEIGHT_FIELD=1,\n" - " SHAPE_CONVEX_HULL=3,\n" - " SHAPE_PLANE=4,\n" - " SHAPE_CONCAVE_TRIMESH=5,\n" - " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" - " SHAPE_SPHERE=7,\n" - " MAX_NUM_SHAPE_TYPES,\n" - "};\n" - "typedef struct b3Collidable b3Collidable_t;\n" - "struct b3Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - "};\n" - "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" - "struct b3GpuChildShape\n" - "{\n" - " b3Float4 m_childPosition;\n" - " b3Quat m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "struct b3CompoundOverlappingPair\n" - "{\n" - " int m_bodyIndexA;\n" - " int m_bodyIndexB;\n" - "// int m_pairType;\n" - " int m_childShapeIndexA;\n" - " int m_childShapeIndexB;\n" - "};\n" - "#endif //B3_COLLIDABLE_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#define B3_MPR_SQRT sqrt\n" - "#endif\n" - "#define B3_MPR_FMIN(x, y) ((x) < (y) ? (x) : (y))\n" - "#define B3_MPR_FABS fabs\n" - "#define B3_MPR_TOLERANCE 1E-6f\n" - "#define B3_MPR_MAX_ITERATIONS 1000\n" - "struct _b3MprSupport_t \n" - "{\n" - " b3Float4 v; //!< Support point in minkowski sum\n" - " b3Float4 v1; //!< Support point in obj1\n" - " b3Float4 v2; //!< Support point in obj2\n" - "};\n" - "typedef struct _b3MprSupport_t b3MprSupport_t;\n" - "struct _b3MprSimplex_t \n" - "{\n" - " b3MprSupport_t ps[4];\n" - " int last; //!< index of last added point\n" - "};\n" - "typedef struct _b3MprSimplex_t b3MprSimplex_t;\n" - "inline b3MprSupport_t* b3MprSimplexPointW(b3MprSimplex_t *s, int idx)\n" - "{\n" - " return &s->ps[idx];\n" - "}\n" - "inline void b3MprSimplexSetSize(b3MprSimplex_t *s, int size)\n" - "{\n" - " s->last = size - 1;\n" - "}\n" - "inline int b3MprSimplexSize(const b3MprSimplex_t *s)\n" - "{\n" - " return s->last + 1;\n" - "}\n" - "inline const b3MprSupport_t* b3MprSimplexPoint(const b3MprSimplex_t* s, int idx)\n" - "{\n" - " // here is no check on boundaries\n" - " return &s->ps[idx];\n" - "}\n" - "inline void b3MprSupportCopy(b3MprSupport_t *d, const b3MprSupport_t *s)\n" - "{\n" - " *d = *s;\n" - "}\n" - "inline void b3MprSimplexSet(b3MprSimplex_t *s, size_t pos, const b3MprSupport_t *a)\n" - "{\n" - " b3MprSupportCopy(s->ps + pos, a);\n" - "}\n" - "inline void b3MprSimplexSwap(b3MprSimplex_t *s, size_t pos1, size_t pos2)\n" - "{\n" - " b3MprSupport_t supp;\n" - " b3MprSupportCopy(&supp, &s->ps[pos1]);\n" - " b3MprSupportCopy(&s->ps[pos1], &s->ps[pos2]);\n" - " b3MprSupportCopy(&s->ps[pos2], &supp);\n" - "}\n" - "inline int b3MprIsZero(float val)\n" - "{\n" - " return B3_MPR_FABS(val) < FLT_EPSILON;\n" - "}\n" - "inline int b3MprEq(float _a, float _b)\n" - "{\n" - " float ab;\n" - " float a, b;\n" - " ab = B3_MPR_FABS(_a - _b);\n" - " if (B3_MPR_FABS(ab) < FLT_EPSILON)\n" - " return 1;\n" - " a = B3_MPR_FABS(_a);\n" - " b = B3_MPR_FABS(_b);\n" - " if (b > a){\n" - " return ab < FLT_EPSILON * b;\n" - " }else{\n" - " return ab < FLT_EPSILON * a;\n" - " }\n" - "}\n" - "inline int b3MprVec3Eq(const b3Float4* a, const b3Float4 *b)\n" - "{\n" - " return b3MprEq((*a).x, (*b).x)\n" - " && b3MprEq((*a).y, (*b).y)\n" - " && b3MprEq((*a).z, (*b).z);\n" - "}\n" - "inline b3Float4 b3LocalGetSupportVertex(b3Float4ConstArg supportVec,__global const b3ConvexPolyhedronData_t* hull, b3ConstArray(b3Float4) verticesA)\n" - "{\n" - " b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" - " float maxDot = -B3_LARGE_FLOAT;\n" - " if( 0 < hull->m_numVertices )\n" - " {\n" - " const b3Float4 scaled = supportVec;\n" - " int index = b3MaxDot(scaled, &verticesA[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" - " return verticesA[hull->m_vertexOffset+index];\n" - " }\n" - " return supVec;\n" - "}\n" - "B3_STATIC void b3MprConvexSupport(int pairIndex,int bodyIndex, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " const b3Float4* _dir, b3Float4* outp, int logme)\n" - "{\n" - " //dir is in worldspace, move to local space\n" - " \n" - " b3Float4 pos = cpuBodyBuf[bodyIndex].m_pos;\n" - " b3Quat orn = cpuBodyBuf[bodyIndex].m_quat;\n" - " \n" - " b3Float4 dir = b3MakeFloat4((*_dir).x,(*_dir).y,(*_dir).z,0.f);\n" - " \n" - " const b3Float4 localDir = b3QuatRotate(b3QuatInverse(orn),dir);\n" - " \n" - " //find local support vertex\n" - " int colIndex = cpuBodyBuf[bodyIndex].m_collidableIdx;\n" - " \n" - " b3Assert(cpuCollidables[colIndex].m_shapeType==SHAPE_CONVEX_HULL);\n" - " __global const b3ConvexPolyhedronData_t* hull = &cpuConvexData[cpuCollidables[colIndex].m_shapeIndex];\n" - " \n" - " b3Float4 pInA;\n" - " if (logme)\n" - " {\n" - " b3Float4 supVec = b3MakeFloat4(0,0,0,0);\n" - " float maxDot = -B3_LARGE_FLOAT;\n" - " if( 0 < hull->m_numVertices )\n" - " {\n" - " const b3Float4 scaled = localDir;\n" - " int index = b3MaxDot(scaled, &cpuVertices[hull->m_vertexOffset], hull->m_numVertices, &maxDot);\n" - " pInA = cpuVertices[hull->m_vertexOffset+index];\n" - " \n" - " }\n" - " } else\n" - " {\n" - " pInA = b3LocalGetSupportVertex(localDir,hull,cpuVertices);\n" - " }\n" - " //move vertex to world space\n" - " *outp = b3TransformPoint(pInA,pos,orn);\n" - " \n" - "}\n" - "inline void b3MprSupport(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " const b3Float4* _dir, b3MprSupport_t *supp)\n" - "{\n" - " b3Float4 dir;\n" - " dir = *_dir;\n" - " b3MprConvexSupport(pairIndex,bodyIndexA,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v1,0);\n" - " dir = *_dir*-1.f;\n" - " b3MprConvexSupport(pairIndex,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,&dir, &supp->v2,0);\n" - " supp->v = supp->v1 - supp->v2;\n" - "}\n" - "inline void b3FindOrigin(int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, b3MprSupport_t *center)\n" - "{\n" - " center->v1 = cpuBodyBuf[bodyIndexA].m_pos;\n" - " center->v2 = cpuBodyBuf[bodyIndexB].m_pos;\n" - " center->v = center->v1 - center->v2;\n" - "}\n" - "inline void b3MprVec3Set(b3Float4 *v, float x, float y, float z)\n" - "{\n" - " (*v).x = x;\n" - " (*v).y = y;\n" - " (*v).z = z;\n" - " (*v).w = 0.f;\n" - "}\n" - "inline void b3MprVec3Add(b3Float4 *v, const b3Float4 *w)\n" - "{\n" - " (*v).x += (*w).x;\n" - " (*v).y += (*w).y;\n" - " (*v).z += (*w).z;\n" - "}\n" - "inline void b3MprVec3Copy(b3Float4 *v, const b3Float4 *w)\n" - "{\n" - " *v = *w;\n" - "}\n" - "inline void b3MprVec3Scale(b3Float4 *d, float k)\n" - "{\n" - " *d *= k;\n" - "}\n" - "inline float b3MprVec3Dot(const b3Float4 *a, const b3Float4 *b)\n" - "{\n" - " float dot;\n" - " dot = b3Dot3F4(*a,*b);\n" - " return dot;\n" - "}\n" - "inline float b3MprVec3Len2(const b3Float4 *v)\n" - "{\n" - " return b3MprVec3Dot(v, v);\n" - "}\n" - "inline void b3MprVec3Normalize(b3Float4 *d)\n" - "{\n" - " float k = 1.f / B3_MPR_SQRT(b3MprVec3Len2(d));\n" - " b3MprVec3Scale(d, k);\n" - "}\n" - "inline void b3MprVec3Cross(b3Float4 *d, const b3Float4 *a, const b3Float4 *b)\n" - "{\n" - " *d = b3Cross3(*a,*b);\n" - " \n" - "}\n" - "inline void b3MprVec3Sub2(b3Float4 *d, const b3Float4 *v, const b3Float4 *w)\n" - "{\n" - " *d = *v - *w;\n" - "}\n" - "inline void b3PortalDir(const b3MprSimplex_t *portal, b3Float4 *dir)\n" - "{\n" - " b3Float4 v2v1, v3v1;\n" - " b3MprVec3Sub2(&v2v1, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b3MprVec3Sub2(&v3v1, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b3MprVec3Cross(dir, &v2v1, &v3v1);\n" - " b3MprVec3Normalize(dir);\n" - "}\n" - "inline int portalEncapsulesOrigin(const b3MprSimplex_t *portal,\n" - " const b3Float4 *dir)\n" - "{\n" - " float dot;\n" - " dot = b3MprVec3Dot(dir, &b3MprSimplexPoint(portal, 1)->v);\n" - " return b3MprIsZero(dot) || dot > 0.f;\n" - "}\n" - "inline int portalReachTolerance(const b3MprSimplex_t *portal,\n" - " const b3MprSupport_t *v4,\n" - " const b3Float4 *dir)\n" - "{\n" - " float dv1, dv2, dv3, dv4;\n" - " float dot1, dot2, dot3;\n" - " // find the smallest dot product of dir and {v1-v4, v2-v4, v3-v4}\n" - " dv1 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, dir);\n" - " dv2 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, dir);\n" - " dv3 = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, dir);\n" - " dv4 = b3MprVec3Dot(&v4->v, dir);\n" - " dot1 = dv4 - dv1;\n" - " dot2 = dv4 - dv2;\n" - " dot3 = dv4 - dv3;\n" - " dot1 = B3_MPR_FMIN(dot1, dot2);\n" - " dot1 = B3_MPR_FMIN(dot1, dot3);\n" - " return b3MprEq(dot1, B3_MPR_TOLERANCE) || dot1 < B3_MPR_TOLERANCE;\n" - "}\n" - "inline int portalCanEncapsuleOrigin(const b3MprSimplex_t *portal, \n" - " const b3MprSupport_t *v4,\n" - " const b3Float4 *dir)\n" - "{\n" - " float dot;\n" - " dot = b3MprVec3Dot(&v4->v, dir);\n" - " return b3MprIsZero(dot) || dot > 0.f;\n" - "}\n" - "inline void b3ExpandPortal(b3MprSimplex_t *portal,\n" - " const b3MprSupport_t *v4)\n" - "{\n" - " float dot;\n" - " b3Float4 v4v0;\n" - " b3MprVec3Cross(&v4v0, &v4->v, &b3MprSimplexPoint(portal, 0)->v);\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &v4v0);\n" - " if (dot > 0.f){\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &v4v0);\n" - " if (dot > 0.f){\n" - " b3MprSimplexSet(portal, 1, v4);\n" - " }else{\n" - " b3MprSimplexSet(portal, 3, v4);\n" - " }\n" - " }else{\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &v4v0);\n" - " if (dot > 0.f){\n" - " b3MprSimplexSet(portal, 2, v4);\n" - " }else{\n" - " b3MprSimplexSet(portal, 1, v4);\n" - " }\n" - " }\n" - "}\n" - "B3_STATIC int b3DiscoverPortal(int pairIndex, int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " __global int* hasSepAxis,\n" - " b3MprSimplex_t *portal)\n" - "{\n" - " b3Float4 dir, va, vb;\n" - " float dot;\n" - " int cont;\n" - " \n" - " \n" - " // vertex 0 is center of portal\n" - " b3FindOrigin(bodyIndexA,bodyIndexB,cpuBodyBuf, b3MprSimplexPointW(portal, 0));\n" - " // vertex 0 is center of portal\n" - " b3MprSimplexSetSize(portal, 1);\n" - " \n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 0)->v, b3mpr_vec3_origin)){\n" - " // Portal's center lies on origin (0,0,0) => we know that objects\n" - " // intersect but we would need to know penetration info.\n" - " // So move center little bit...\n" - " b3MprVec3Set(&va, FLT_EPSILON * 10.f, 0.f, 0.f);\n" - " b3MprVec3Add(&b3MprSimplexPointW(portal, 0)->v, &va);\n" - " }\n" - " // vertex 1 = support in direction of origin\n" - " b3MprVec3Copy(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Scale(&dir, -1.f);\n" - " b3MprVec3Normalize(&dir);\n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 1));\n" - " b3MprSimplexSetSize(portal, 2);\n" - " // test if origin isn't outside of v1\n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 1)->v, &dir);\n" - " \n" - " if (b3MprIsZero(dot) || dot < 0.f)\n" - " return -1;\n" - " // vertex 2\n" - " b3MprVec3Cross(&dir, &b3MprSimplexPoint(portal, 0)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " if (b3MprIsZero(b3MprVec3Len2(&dir))){\n" - " if (b3MprVec3Eq(&b3MprSimplexPoint(portal, 1)->v, b3mpr_vec3_origin)){\n" - " // origin lies on v1\n" - " return 1;\n" - " }else{\n" - " // origin lies on v0-v1 segment\n" - " return 2;\n" - " }\n" - " }\n" - " b3MprVec3Normalize(&dir);\n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 2));\n" - " \n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 2)->v, &dir);\n" - " if (b3MprIsZero(dot) || dot < 0.f)\n" - " return -1;\n" - " b3MprSimplexSetSize(portal, 3);\n" - " // vertex 3 direction\n" - " b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Cross(&dir, &va, &vb);\n" - " b3MprVec3Normalize(&dir);\n" - " // it is better to form portal faces to be oriented \"outside\" origin\n" - " dot = b3MprVec3Dot(&dir, &b3MprSimplexPoint(portal, 0)->v);\n" - " if (dot > 0.f){\n" - " b3MprSimplexSwap(portal, 1, 2);\n" - " b3MprVec3Scale(&dir, -1.f);\n" - " }\n" - " while (b3MprSimplexSize(portal) < 4){\n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, b3MprSimplexPointW(portal, 3));\n" - " \n" - " dot = b3MprVec3Dot(&b3MprSimplexPoint(portal, 3)->v, &dir);\n" - " if (b3MprIsZero(dot) || dot < 0.f)\n" - " return -1;\n" - " cont = 0;\n" - " // test if origin is outside (v1, v0, v3) - set v2 as v3 and\n" - " // continue\n" - " b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 3)->v);\n" - " dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" - " if (dot < 0.f && !b3MprIsZero(dot)){\n" - " b3MprSimplexSet(portal, 2, b3MprSimplexPoint(portal, 3));\n" - " cont = 1;\n" - " }\n" - " if (!cont){\n" - " // test if origin is outside (v3, v0, v2) - set v1 as v3 and\n" - " // continue\n" - " b3MprVec3Cross(&va, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " dot = b3MprVec3Dot(&va, &b3MprSimplexPoint(portal, 0)->v);\n" - " if (dot < 0.f && !b3MprIsZero(dot)){\n" - " b3MprSimplexSet(portal, 1, b3MprSimplexPoint(portal, 3));\n" - " cont = 1;\n" - " }\n" - " }\n" - " if (cont){\n" - " b3MprVec3Sub2(&va, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Sub2(&vb, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Cross(&dir, &va, &vb);\n" - " b3MprVec3Normalize(&dir);\n" - " }else{\n" - " b3MprSimplexSetSize(portal, 4);\n" - " }\n" - " }\n" - " return 0;\n" - "}\n" - "B3_STATIC int b3RefinePortal(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " b3MprSimplex_t *portal)\n" - "{\n" - " b3Float4 dir;\n" - " b3MprSupport_t v4;\n" - " for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" - " //while (1)\n" - " {\n" - " // compute direction outside the portal (from v0 throught v1,v2,v3\n" - " // face)\n" - " b3PortalDir(portal, &dir);\n" - " // test if origin is inside the portal\n" - " if (portalEncapsulesOrigin(portal, &dir))\n" - " return 0;\n" - " // get next support point\n" - " \n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" - " // test if v4 can expand portal to contain origin and if portal\n" - " // expanding doesn't reach given tolerance\n" - " if (!portalCanEncapsuleOrigin(portal, &v4, &dir)\n" - " || portalReachTolerance(portal, &v4, &dir))\n" - " {\n" - " return -1;\n" - " }\n" - " // v1-v2-v3 triangle must be rearranged to face outside Minkowski\n" - " // difference (direction from v0).\n" - " b3ExpandPortal(portal, &v4);\n" - " }\n" - " return -1;\n" - "}\n" - "B3_STATIC void b3FindPos(const b3MprSimplex_t *portal, b3Float4 *pos)\n" - "{\n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " b3Float4 dir;\n" - " size_t i;\n" - " float b[4], sum, inv;\n" - " b3Float4 vec, p1, p2;\n" - " b3PortalDir(portal, &dir);\n" - " // use barycentric coordinates of tetrahedron to find origin\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " b[0] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " b[1] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 0)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b[2] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 3)->v);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b[3] = b3MprVec3Dot(&vec, &b3MprSimplexPoint(portal, 0)->v);\n" - " sum = b[0] + b[1] + b[2] + b[3];\n" - " if (b3MprIsZero(sum) || sum < 0.f){\n" - " b[0] = 0.f;\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 2)->v,\n" - " &b3MprSimplexPoint(portal, 3)->v);\n" - " b[1] = b3MprVec3Dot(&vec, &dir);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 3)->v,\n" - " &b3MprSimplexPoint(portal, 1)->v);\n" - " b[2] = b3MprVec3Dot(&vec, &dir);\n" - " b3MprVec3Cross(&vec, &b3MprSimplexPoint(portal, 1)->v,\n" - " &b3MprSimplexPoint(portal, 2)->v);\n" - " b[3] = b3MprVec3Dot(&vec, &dir);\n" - " sum = b[1] + b[2] + b[3];\n" - " }\n" - " inv = 1.f / sum;\n" - " b3MprVec3Copy(&p1, b3mpr_vec3_origin);\n" - " b3MprVec3Copy(&p2, b3mpr_vec3_origin);\n" - " for (i = 0; i < 4; i++){\n" - " b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v1);\n" - " b3MprVec3Scale(&vec, b[i]);\n" - " b3MprVec3Add(&p1, &vec);\n" - " b3MprVec3Copy(&vec, &b3MprSimplexPoint(portal, i)->v2);\n" - " b3MprVec3Scale(&vec, b[i]);\n" - " b3MprVec3Add(&p2, &vec);\n" - " }\n" - " b3MprVec3Scale(&p1, inv);\n" - " b3MprVec3Scale(&p2, inv);\n" - " b3MprVec3Copy(pos, &p1);\n" - " b3MprVec3Add(pos, &p2);\n" - " b3MprVec3Scale(pos, 0.5);\n" - "}\n" - "inline float b3MprVec3Dist2(const b3Float4 *a, const b3Float4 *b)\n" - "{\n" - " b3Float4 ab;\n" - " b3MprVec3Sub2(&ab, a, b);\n" - " return b3MprVec3Len2(&ab);\n" - "}\n" - "inline float _b3MprVec3PointSegmentDist2(const b3Float4 *P,\n" - " const b3Float4 *x0,\n" - " const b3Float4 *b,\n" - " b3Float4 *witness)\n" - "{\n" - " // The computation comes from solving equation of segment:\n" - " // S(t) = x0 + t.d\n" - " // where - x0 is initial point of segment\n" - " // - d is direction of segment from x0 (|d| > 0)\n" - " // - t belongs to <0, 1> interval\n" - " // \n" - " // Than, distance from a segment to some point P can be expressed:\n" - " // D(t) = |x0 + t.d - P|^2\n" - " // which is distance from any point on segment. Minimization\n" - " // of this function brings distance from P to segment.\n" - " // Minimization of D(t) leads to simple quadratic equation that's\n" - " // solving is straightforward.\n" - " //\n" - " // Bonus of this method is witness point for free.\n" - " float dist, t;\n" - " b3Float4 d, a;\n" - " // direction of segment\n" - " b3MprVec3Sub2(&d, b, x0);\n" - " // precompute vector from P to x0\n" - " b3MprVec3Sub2(&a, x0, P);\n" - " t = -1.f * b3MprVec3Dot(&a, &d);\n" - " t /= b3MprVec3Len2(&d);\n" - " if (t < 0.f || b3MprIsZero(t)){\n" - " dist = b3MprVec3Dist2(x0, P);\n" - " if (witness)\n" - " b3MprVec3Copy(witness, x0);\n" - " }else if (t > 1.f || b3MprEq(t, 1.f)){\n" - " dist = b3MprVec3Dist2(b, P);\n" - " if (witness)\n" - " b3MprVec3Copy(witness, b);\n" - " }else{\n" - " if (witness){\n" - " b3MprVec3Copy(witness, &d);\n" - " b3MprVec3Scale(witness, t);\n" - " b3MprVec3Add(witness, x0);\n" - " dist = b3MprVec3Dist2(witness, P);\n" - " }else{\n" - " // recycling variables\n" - " b3MprVec3Scale(&d, t);\n" - " b3MprVec3Add(&d, &a);\n" - " dist = b3MprVec3Len2(&d);\n" - " }\n" - " }\n" - " return dist;\n" - "}\n" - "inline float b3MprVec3PointTriDist2(const b3Float4 *P,\n" - " const b3Float4 *x0, const b3Float4 *B,\n" - " const b3Float4 *C,\n" - " b3Float4 *witness)\n" - "{\n" - " // Computation comes from analytic expression for triangle (x0, B, C)\n" - " // T(s, t) = x0 + s.d1 + t.d2, where d1 = B - x0 and d2 = C - x0 and\n" - " // Then equation for distance is:\n" - " // D(s, t) = | T(s, t) - P |^2\n" - " // This leads to minimization of quadratic function of two variables.\n" - " // The solution from is taken only if s is between 0 and 1, t is\n" - " // between 0 and 1 and t + s < 1, otherwise distance from segment is\n" - " // computed.\n" - " b3Float4 d1, d2, a;\n" - " float u, v, w, p, q, r;\n" - " float s, t, dist, dist2;\n" - " b3Float4 witness2;\n" - " b3MprVec3Sub2(&d1, B, x0);\n" - " b3MprVec3Sub2(&d2, C, x0);\n" - " b3MprVec3Sub2(&a, x0, P);\n" - " u = b3MprVec3Dot(&a, &a);\n" - " v = b3MprVec3Dot(&d1, &d1);\n" - " w = b3MprVec3Dot(&d2, &d2);\n" - " p = b3MprVec3Dot(&a, &d1);\n" - " q = b3MprVec3Dot(&a, &d2);\n" - " r = b3MprVec3Dot(&d1, &d2);\n" - " s = (q * r - w * p) / (w * v - r * r);\n" - " t = (-s * r - q) / w;\n" - " if ((b3MprIsZero(s) || s > 0.f)\n" - " && (b3MprEq(s, 1.f) || s < 1.f)\n" - " && (b3MprIsZero(t) || t > 0.f)\n" - " && (b3MprEq(t, 1.f) || t < 1.f)\n" - " && (b3MprEq(t + s, 1.f) || t + s < 1.f)){\n" - " if (witness){\n" - " b3MprVec3Scale(&d1, s);\n" - " b3MprVec3Scale(&d2, t);\n" - " b3MprVec3Copy(witness, x0);\n" - " b3MprVec3Add(witness, &d1);\n" - " b3MprVec3Add(witness, &d2);\n" - " dist = b3MprVec3Dist2(witness, P);\n" - " }else{\n" - " dist = s * s * v;\n" - " dist += t * t * w;\n" - " dist += 2.f * s * t * r;\n" - " dist += 2.f * s * p;\n" - " dist += 2.f * t * q;\n" - " dist += u;\n" - " }\n" - " }else{\n" - " dist = _b3MprVec3PointSegmentDist2(P, x0, B, witness);\n" - " dist2 = _b3MprVec3PointSegmentDist2(P, x0, C, &witness2);\n" - " if (dist2 < dist){\n" - " dist = dist2;\n" - " if (witness)\n" - " b3MprVec3Copy(witness, &witness2);\n" - " }\n" - " dist2 = _b3MprVec3PointSegmentDist2(P, B, C, &witness2);\n" - " if (dist2 < dist){\n" - " dist = dist2;\n" - " if (witness)\n" - " b3MprVec3Copy(witness, &witness2);\n" - " }\n" - " }\n" - " return dist;\n" - "}\n" - "B3_STATIC void b3FindPenetr(int pairIndex,int bodyIndexA, int bodyIndexB, b3ConstArray(b3RigidBodyData_t) cpuBodyBuf, \n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " b3MprSimplex_t *portal,\n" - " float *depth, b3Float4 *pdir, b3Float4 *pos)\n" - "{\n" - " b3Float4 dir;\n" - " b3MprSupport_t v4;\n" - " unsigned long iterations;\n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " iterations = 1UL;\n" - " for (int i=0;i<B3_MPR_MAX_ITERATIONS;i++)\n" - " //while (1)\n" - " {\n" - " // compute portal direction and obtain next support point\n" - " b3PortalDir(portal, &dir);\n" - " \n" - " b3MprSupport(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&dir, &v4);\n" - " // reached tolerance -> find penetration info\n" - " if (portalReachTolerance(portal, &v4, &dir)\n" - " || iterations ==B3_MPR_MAX_ITERATIONS)\n" - " {\n" - " *depth = b3MprVec3PointTriDist2(b3mpr_vec3_origin,&b3MprSimplexPoint(portal, 1)->v,&b3MprSimplexPoint(portal, 2)->v,&b3MprSimplexPoint(portal, 3)->v,pdir);\n" - " *depth = B3_MPR_SQRT(*depth);\n" - " \n" - " if (b3MprIsZero((*pdir).x) && b3MprIsZero((*pdir).y) && b3MprIsZero((*pdir).z))\n" - " {\n" - " \n" - " *pdir = dir;\n" - " } \n" - " b3MprVec3Normalize(pdir);\n" - " \n" - " // barycentric coordinates:\n" - " b3FindPos(portal, pos);\n" - " return;\n" - " }\n" - " b3ExpandPortal(portal, &v4);\n" - " iterations++;\n" - " }\n" - "}\n" - "B3_STATIC void b3FindPenetrTouch(b3MprSimplex_t *portal,float *depth, b3Float4 *dir, b3Float4 *pos)\n" - "{\n" - " // Touching contact on portal's v1 - so depth is zero and direction\n" - " // is unimportant and pos can be guessed\n" - " *depth = 0.f;\n" - " b3Float4 zero = b3MakeFloat4(0,0,0,0);\n" - " b3Float4* b3mpr_vec3_origin = &zero;\n" - " b3MprVec3Copy(dir, b3mpr_vec3_origin);\n" - " b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" - " b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" - " b3MprVec3Scale(pos, 0.5);\n" - "}\n" - "B3_STATIC void b3FindPenetrSegment(b3MprSimplex_t *portal,\n" - " float *depth, b3Float4 *dir, b3Float4 *pos)\n" - "{\n" - " \n" - " // Origin lies on v0-v1 segment.\n" - " // Depth is distance to v1, direction also and position must be\n" - " // computed\n" - " b3MprVec3Copy(pos, &b3MprSimplexPoint(portal, 1)->v1);\n" - " b3MprVec3Add(pos, &b3MprSimplexPoint(portal, 1)->v2);\n" - " b3MprVec3Scale(pos, 0.5f);\n" - " \n" - " b3MprVec3Copy(dir, &b3MprSimplexPoint(portal, 1)->v);\n" - " *depth = B3_MPR_SQRT(b3MprVec3Len2(dir));\n" - " b3MprVec3Normalize(dir);\n" - "}\n" - "inline int b3MprPenetration(int pairIndex, int bodyIndexA, int bodyIndexB,\n" - " b3ConstArray(b3RigidBodyData_t) cpuBodyBuf,\n" - " b3ConstArray(b3ConvexPolyhedronData_t) cpuConvexData, \n" - " b3ConstArray(b3Collidable_t) cpuCollidables,\n" - " b3ConstArray(b3Float4) cpuVertices,\n" - " __global b3Float4* sepAxis,\n" - " __global int* hasSepAxis,\n" - " float *depthOut, b3Float4* dirOut, b3Float4* posOut)\n" - "{\n" - " \n" - " b3MprSimplex_t portal;\n" - " \n" - "// if (!hasSepAxis[pairIndex])\n" - " // return -1;\n" - " \n" - " hasSepAxis[pairIndex] = 0;\n" - " int res;\n" - " // Phase 1: Portal discovery\n" - " res = b3DiscoverPortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices,sepAxis,hasSepAxis, &portal);\n" - " \n" - " \n" - " //sepAxis[pairIndex] = *pdir;//or -dir?\n" - " switch (res)\n" - " {\n" - " case 0:\n" - " {\n" - " // Phase 2: Portal refinement\n" - " \n" - " res = b3RefinePortal(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal);\n" - " if (res < 0)\n" - " return -1;\n" - " // Phase 3. Penetration info\n" - " b3FindPenetr(pairIndex,bodyIndexA,bodyIndexB,cpuBodyBuf,cpuConvexData,cpuCollidables,cpuVertices, sepAxis,&portal, depthOut, dirOut, posOut);\n" - " hasSepAxis[pairIndex] = 1;\n" - " sepAxis[pairIndex] = -*dirOut;\n" - " break;\n" - " }\n" - " case 1:\n" - " {\n" - " // Touching contact on portal's v1.\n" - " b3FindPenetrTouch(&portal, depthOut, dirOut, posOut);\n" - " break;\n" - " }\n" - " case 2:\n" - " {\n" - " \n" - " b3FindPenetrSegment( &portal, depthOut, dirOut, posOut);\n" - " break;\n" - " }\n" - " default:\n" - " {\n" - " hasSepAxis[pairIndex]=0;\n" - " //if (res < 0)\n" - " //{\n" - " // Origin isn't inside portal - no collision.\n" - " return -1;\n" - " //}\n" - " }\n" - " };\n" - " \n" - " return 0;\n" - "};\n" - "#endif //B3_MPR_PENETRATION_H\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" - "#ifdef cl_ext_atomic_counters_32\n" - " #pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - " #define counter32_t volatile __global int*\n" - "#endif\n" - "__kernel void mprPenetrationKernel( __global int4* pairs,\n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global float4* separatingNormals,\n" - " __global int* hasSeparatingAxis,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int contactCapacity,\n" - " int numPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " if (i<numPairs)\n" - " {\n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " //once the broadphase avoids static-static pairs, we can remove this test\n" - " if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" - " {\n" - " return;\n" - " }\n" - " \n" - " if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" - " {\n" - " return;\n" - " }\n" - " float depthOut;\n" - " b3Float4 dirOut;\n" - " b3Float4 posOut;\n" - " int res = b3MprPenetration(pairIndex, bodyIndexA, bodyIndexB,rigidBodies,convexShapes,collidables,vertices,separatingNormals,hasSeparatingAxis,&depthOut, &dirOut, &posOut);\n" - " \n" - " \n" - " \n" - " \n" - " if (res==0)\n" - " {\n" - " //add a contact\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdx<contactCapacity)\n" - " {\n" - " pairs[pairIndex].z = dstIdx;\n" - " __global struct b3Contact4Data* c = globalContactsOut + dstIdx;\n" - " c->m_worldNormalOnB = -dirOut;//normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " //for (int i=0;i<nContacts;i++)\n" - " posOut.w = -depthOut;\n" - " c->m_worldPosB[0] = posOut;//localPoints[contactIdx[i]];\n" - " GET_NPOINTS(*c) = 1;//nContacts;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "typedef float4 Quaternion;\n" - "#define make_float4 (float4)\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "inline void project(__global const b3ConvexPolyhedronData_t* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, __global const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;i<numVerts;i++)\n" - " {\n" - " float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "bool findSeparatingAxisUnitSphere( __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices,\n" - " __global const float4* unitSphereDirections,\n" - " int numUnitSphereDirections,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test unit sphere directions\n" - " for (int i=0;i<numUnitSphereDirections;i++)\n" - " {\n" - " float4 crossje;\n" - " crossje = unitSphereDirections[i]; \n" - " if (dot3F4(DeltaC2,crossje)>0)\n" - " crossje *= -1.f;\n" - " {\n" - " float dist;\n" - " bool result = true;\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" - " \n" - " if(Max0<Min1 || Max1<Min0)\n" - " return false;\n" - " \n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " dist = d0<d1 ? d0:d1;\n" - " result = true;\n" - " \n" - " if(dist<*dmin)\n" - " {\n" - " *dmin = dist;\n" - " *sep = crossje;\n" - " }\n" - " }\n" - " }\n" - " \n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "__kernel void findSeparatingAxisUnitSphereKernel( __global const int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* unitSphereDirections,\n" - " __global float4* separatingNormals,\n" - " __global int* hasSeparatingAxis,\n" - " __global float* dmins,\n" - " int numUnitSphereDirections,\n" - " int numPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " if (hasSeparatingAxis[i])\n" - " {\n" - " \n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " \n" - " float dmin = dmins[i];\n" - " \n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " float4 sepNormal = separatingNormals[i];\n" - " \n" - " int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" - " if (numEdgeEdgeDirections>numUnitSphereDirections)\n" - " {\n" - " bool sepEE = findSeparatingAxisUnitSphere( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,unitSphereDirections,numUnitSphereDirections,&sepNormal,&dmin);\n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis[i] = 1;\n" - " separatingNormals[i] = sepNormal;\n" - " }\n" - " }\n" - " } //if (hasSeparatingAxis[i])\n" - " }//(i<numPairs)\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl deleted file mode 100644 index 9c9e920f13..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.cl +++ /dev/null @@ -1,1374 +0,0 @@ -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_PLANE 4 -#define SHAPE_CONCAVE_TRIMESH 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - -#define max2 max -#define min2 min - -typedef unsigned int u32; - - - - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -///keep this in sync with btCollidable.h -typedef struct -{ - int m_numChildShapes; - float m_radius; - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; - -} ConvexPolyhedronCL; - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -//#define dot3F4 dot - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - -void trMul(float4 translationA, Quaternion orientationA, - float4 translationB, Quaternion orientationB, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtMul(orientationA,orientationB); - *translationOut = transform(&translationB,&translationA,&orientationA); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - - -__inline float4 lerp3(const float4 a,const float4 b, float t) -{ - return make_float4( a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - - -float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace) -{ - float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0); - float dist = dot3F4(n, point) + planeEqn.w; - *closestPointOnFace = point - dist * n; - return dist; -} - - - -inline bool IsPointInPolygon(float4 p, - const btGpuFace* face, - __global const float4* baseVertex, - __global const int* convexIndices, - float4* out) -{ - float4 a; - float4 b; - float4 ab; - float4 ap; - float4 v; - - float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f); - - if (face->m_numIndices<2) - return false; - - - float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]]; - - b = v0; - - for(unsigned i=0; i != face->m_numIndices; ++i) - { - a = b; - float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]]; - b = vi; - ab = b-a; - ap = p-a; - v = cross3(ab,plane); - - if (dot(ap, v) > 0.f) - { - float ab_m2 = dot(ab, ab); - float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f; - if (rt <= 0.f) - { - *out = a; - } - else if (rt >= 1.f) - { - *out = b; - } - else - { - float s = 1.f - rt; - out[0].x = s * a.x + rt * b.x; - out[0].y = s * a.y + rt * b.y; - out[0].z = s * a.z + rt * b.z; - } - return false; - } - } - return true; -} - - - - -void computeContactSphereConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* convexVertices, - __global const int* convexIndices, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 spherePos2, - float radius, - float4 pos, - float4 quat - ) -{ - - float4 invPos; - float4 invOrn; - - trInverse(pos,quat, &invPos,&invOrn); - - float4 spherePos = transform(&spherePos2,&invPos,&invOrn); - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - int numFaces = convexShapes[shapeIndex].m_numFaces; - float4 closestPnt = (float4)(0, 0, 0, 0); - float4 hitNormalWorld = (float4)(0, 0, 0, 0); - float minDist = -1000000.f; - bool bCollide = true; - - for ( int f = 0; f < numFaces; f++ ) - { - btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f]; - - // set up a plane equation - float4 planeEqn; - float4 n1 = face.m_plane; - n1.w = 0.f; - planeEqn = n1; - planeEqn.w = face.m_plane.w; - - - // compute a signed distance from the vertex in cloth to the face of rigidbody. - float4 pntReturn; - float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn); - - // If the distance is positive, the plane is a separating plane. - if ( dist > radius ) - { - bCollide = false; - break; - } - - - if (dist>0) - { - //might hit an edge or vertex - float4 out; - float4 zeroPos = make_float4(0,0,0,0); - - bool isInPoly = IsPointInPolygon(spherePos, - &face, - &convexVertices[convexShapes[shapeIndex].m_vertexOffset], - convexIndices, - &out); - if (isInPoly) - { - if (dist>minDist) - { - minDist = dist; - closestPnt = pntReturn; - hitNormalWorld = planeEqn; - - } - } else - { - float4 tmp = spherePos-out; - float l2 = dot(tmp,tmp); - if (l2<radius*radius) - { - dist = sqrt(l2); - if (dist>minDist) - { - minDist = dist; - closestPnt = out; - hitNormalWorld = tmp/dist; - - } - - } else - { - bCollide = false; - break; - } - } - } else - { - if ( dist > minDist ) - { - minDist = dist; - closestPnt = pntReturn; - hitNormalWorld.xyz = planeEqn.xyz; - } - } - - } - - - - if (bCollide && minDist > -10000) - { - float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); - float4 pOnB1 = transform(&closestPnt,&pos,&quat); - - float actualDepth = minDist-radius; - if (actualDepth<=0.f) - { - - - pOnB1.w = actualDepth; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - - if (1)//dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - GET_NPOINTS(*c) = 1; - } - - } - }//if (hasCollision) - -} - - - -int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0.f); - { - - for (int i=0;i<nPoints;i++) - center += p[i]; - center /= (float)nPoints; - } - - - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} - -#define MAX_PLANE_CONVEX_POINTS 64 - -int computeContactPlaneConvex(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu*collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* convexVertices, - __global const int* convexIndices, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 posB, - Quaternion ornB - ) -{ - int resultIndex=-1; - - int shapeIndex = collidables[collidableIndexB].m_shapeIndex; - __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex]; - - float4 posA; - posA = rigidBodies[bodyIndexA].m_pos; - Quaternion ornA; - ornA = rigidBodies[bodyIndexA].m_quat; - - int numContactsOut = 0; - int numWorldVertsB1= 0; - - float4 planeEq; - planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); - float4 planeNormalWorld; - planeNormalWorld = qtRotate(ornA,planeNormal); - float planeConstant = planeEq.w; - - float4 invPosA;Quaternion invOrnA; - float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; - { - - trInverse(posA,ornA,&invPosA,&invOrnA); - trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - } - float4 invPosB;Quaternion invOrnB; - float4 planeInConvexPos1; Quaternion planeInConvexOrn1; - { - - trInverse(posB,ornB,&invPosB,&invOrnB); - trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); - } - - - float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal); - float maxDot = -1e30; - int hitVertex=-1; - float4 hitVtx; - - - - float4 contactPoints[MAX_PLANE_CONVEX_POINTS]; - int numPoints = 0; - - int4 contactIdx; - contactIdx=make_int4(0,1,2,3); - - - for (int i=0;i<hullB->m_numVertices;i++) - { - float4 vtx = convexVertices[hullB->m_vertexOffset+i]; - float curDot = dot(vtx,planeNormalInConvex); - - - if (curDot>maxDot) - { - hitVertex=i; - maxDot=curDot; - hitVtx = vtx; - //make sure the deepest points is always included - if (numPoints==MAX_PLANE_CONVEX_POINTS) - numPoints--; - } - - if (numPoints<MAX_PLANE_CONVEX_POINTS) - { - float4 vtxWorld = transform(&vtx, &posB, &ornB); - float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld; - float dist = dot(planeNormal,vtxInPlane)-planeConstant; - if (dist<0.f) - { - vtxWorld.w = dist; - contactPoints[numPoints] = vtxWorld; - numPoints++; - } - } - - } - - int numReducedPoints = numPoints; - if (numPoints>4) - { - numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx); - } - - if (numReducedPoints>0) - { - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - resultIndex = dstIdx; - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -planeNormalWorld; - //c->setFrictionCoeff(0.7); - //c->setRestituitionCoeff(0.f); - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - switch (numReducedPoints) - { - case 4: - c->m_worldPosB[3] = contactPoints[contactIdx.w]; - case 3: - c->m_worldPosB[2] = contactPoints[contactIdx.z]; - case 2: - c->m_worldPosB[1] = contactPoints[contactIdx.y]; - case 1: - c->m_worldPosB[0] = contactPoints[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = numReducedPoints; - }//if (dstIdx < numPairs) - } - - return resultIndex; -} - - -void computeContactPlaneSphere(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const btGpuFace* faces, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity) -{ - float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane; - float radius = collidables[collidableIndexB].m_radius; - float4 posA1 = rigidBodies[bodyIndexA].m_pos; - float4 ornA1 = rigidBodies[bodyIndexA].m_quat; - float4 posB1 = rigidBodies[bodyIndexB].m_pos; - float4 ornB1 = rigidBodies[bodyIndexB].m_quat; - - bool hasCollision = false; - float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f); - float planeConstant = planeEq.w; - float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1; - { - float4 invPosA;Quaternion invOrnA; - trInverse(posA1,ornA1,&invPosA,&invOrnA); - trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - } - float4 planeInConvexPos1; Quaternion planeInConvexOrn1; - { - float4 invPosB;Quaternion invOrnB; - trInverse(posB1,ornB1,&invPosB,&invOrnB); - trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); - } - float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius; - float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1); - float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant; - hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold(); - if (hasCollision) - { - float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1; - float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1); - float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1); - float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance; - pOnB1.w = distance; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if (hasCollision) -} - - -__kernel void primitiveContactsKernel( __global int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numPairs, int maxContactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 posB; - posB = rigidBodies[bodyIndexB].m_pos; - Quaternion ornB; - ornB = rigidBodies[bodyIndexB].m_quat; - int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB); - if (contactIndex>=0) - pairs[pairIndex].z = contactIndex; - - return; - } - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - - float4 posA; - posA = rigidBodies[bodyIndexA].m_pos; - Quaternion ornA; - ornA = rigidBodies[bodyIndexA].m_quat; - - - int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); - - if (contactIndex>=0) - pairs[pairIndex].z = contactIndex; - - return; - } - - if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); - return; - } - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_PLANE) - { - - - computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity); - - return; - } - - - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 spherePos = rigidBodies[bodyIndexA].m_pos; - float sphereRadius = collidables[collidableIndexA].m_radius; - float4 convexPos = rigidBodies[bodyIndexB].m_pos; - float4 convexOrn = rigidBodies[bodyIndexB].m_quat; - - computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - - if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = rigidBodies[bodyIndexA].m_pos; - float4 convexOrn = rigidBodies[bodyIndexA].m_quat; - - computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - return; - } - - - - - - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - //sphere-sphere - float radiusA = collidables[collidableIndexA].m_radius; - float radiusB = collidables[collidableIndexB].m_radius; - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - float4 diff = posA-posB; - float len = length(diff); - - ///iff distance positive, don't generate a new contact - if ( len <= (radiusA+radiusB)) - { - ///distance (negative means penetration) - float dist = len - (radiusA+radiusB); - float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); - if (len > 0.00001) - { - normalOnSurfaceB = diff / len; - } - float4 contactPosB = posB + normalOnSurfaceB*radiusB; - contactPosB.w = dist; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = normalOnSurfaceB; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_worldPosB[0] = contactPosB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if ( len <= (radiusA+radiusB)) - - return; - }//SHAPE_SPHERE SHAPE_SPHERE - - }// if (i<numPairs) - -} - - -// work-in-progress -__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global const btGpuChildShape* gpuChildShapes, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity - ) -{ - - int i = get_global_id(0); - if (i<numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - int pairIndex = i; - if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL)) - { - - computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB); - return; - } - - if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE)) - { - - computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices, - faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA); - return; - } - - if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE)) - { - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = posA; - float4 convexOrn = ornA; - - computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - - if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL)) - { - - float4 spherePos = rigidBodies[bodyIndexA].m_pos; - float sphereRadius = collidables[collidableIndexA].m_radius; - float4 convexPos = posB; - float4 convexOrn = ornB; - - - computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, - rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn); - - return; - } - }// if (i<numCompoundPairs) -} - - -bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p ) -{ - - const float4* p1 = &vertices[0]; - const float4* p2 = &vertices[1]; - const float4* p3 = &vertices[2]; - - float4 edge1; edge1 = (*p2 - *p1); - float4 edge2; edge2 = ( *p3 - *p2 ); - float4 edge3; edge3 = ( *p1 - *p3 ); - - - float4 p1_to_p; p1_to_p = ( *p - *p1 ); - float4 p2_to_p; p2_to_p = ( *p - *p2 ); - float4 p3_to_p; p3_to_p = ( *p - *p3 ); - - float4 edge1_normal; edge1_normal = ( cross(edge1,*normal)); - float4 edge2_normal; edge2_normal = ( cross(edge2,*normal)); - float4 edge3_normal; edge3_normal = ( cross(edge3,*normal)); - - - - float r1, r2, r3; - r1 = dot(edge1_normal,p1_to_p ); - r2 = dot(edge2_normal,p2_to_p ); - r3 = dot(edge3_normal,p3_to_p ); - - if ( r1 > 0 && r2 > 0 && r3 > 0 ) - return true; - if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) - return true; - return false; - -} - - -float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) -{ - float4 diff = p - from; - float4 v = to - from; - float t = dot(v,diff); - - if (t > 0) - { - float dotVV = dot(v,v); - if (t < dotVV) - { - t /= dotVV; - diff -= t*v; - } else - { - t = 1; - diff -= v; - } - } else - { - t = 0; - } - *nearest = from + t*v; - return dot(diff,diff); -} - - -void computeContactSphereTriangle(int pairIndex, - int bodyIndexA, int bodyIndexB, - int collidableIndexA, int collidableIndexB, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - const float4* triangleVertices, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int maxContactCapacity, - float4 spherePos2, - float radius, - float4 pos, - float4 quat, - int faceIndex - ) -{ - - float4 invPos; - float4 invOrn; - - trInverse(pos,quat, &invPos,&invOrn); - float4 spherePos = transform(&spherePos2,&invPos,&invOrn); - int numFaces = 3; - float4 closestPnt = (float4)(0, 0, 0, 0); - float4 hitNormalWorld = (float4)(0, 0, 0, 0); - float minDist = -1000000.f; - bool bCollide = false; - - - ////////////////////////////////////// - - float4 sphereCenter; - sphereCenter = spherePos; - - const float4* vertices = triangleVertices; - float contactBreakingThreshold = 0.f;//todo? - float radiusWithThreshold = radius + contactBreakingThreshold; - float4 edge10; - edge10 = vertices[1]-vertices[0]; - edge10.w = 0.f;//is this needed? - float4 edge20; - edge20 = vertices[2]-vertices[0]; - edge20.w = 0.f;//is this needed? - float4 normal = cross3(edge10,edge20); - normal = normalize(normal); - float4 p1ToCenter; - p1ToCenter = sphereCenter - vertices[0]; - - float distanceFromPlane = dot(p1ToCenter,normal); - - if (distanceFromPlane < 0.f) - { - //triangle facing the other way - distanceFromPlane *= -1.f; - normal *= -1.f; - } - hitNormalWorld = normal; - - bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold; - - // Check for contact / intersection - bool hasContact = false; - float4 contactPoint; - if (isInsideContactPlane) - { - - if (pointInTriangle(vertices,&normal, &sphereCenter)) - { - // Inside the contact wedge - touches a point on the shell plane - hasContact = true; - contactPoint = sphereCenter - normal*distanceFromPlane; - - } else { - // Could be inside one of the contact capsules - float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold; - float4 nearestOnEdge; - int numEdges = 3; - for (int i = 0; i < numEdges; i++) - { - float4 pa =vertices[i]; - float4 pb = vertices[(i+1)%3]; - - float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge); - if (distanceSqr < contactCapsuleRadiusSqr) - { - // Yep, we're inside a capsule - hasContact = true; - contactPoint = nearestOnEdge; - - } - - } - } - } - - if (hasContact) - { - - closestPnt = contactPoint; - float4 contactToCenter = sphereCenter - contactPoint; - minDist = length(contactToCenter); - if (minDist>FLT_EPSILON) - { - hitNormalWorld = normalize(contactToCenter);//*(1./minDist); - bCollide = true; - } - - } - - - ///////////////////////////////////// - - if (bCollide && minDist > -10000) - { - - float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld); - float4 pOnB1 = transform(&closestPnt,&pos,&quat); - float actualDepth = minDist-radius; - - - if (actualDepth<=0.f) - { - pOnB1.w = actualDepth; - int dstIdx; - - - float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1); - if (lenSqr>FLT_EPSILON) - { - AppendInc( nGlobalContactsOut, dstIdx ); - - if (dstIdx < maxContactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB1; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB; - c->m_worldPosB[0] = pOnB1; - - c->m_childIndexA = -1; - c->m_childIndexB = faceIndex; - - GET_NPOINTS(*c) = 1; - } - } - - } - }//if (hasCollision) - -} - - - -// work-in-progress -__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numConcavePairs, int maxContactCapacity - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE) - { - int f = concavePairs[i].z; - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - } - - float4 spherePos = rigidBodies[bodyIndexB].m_pos; - float sphereRadius = collidables[collidableIndexB].m_radius; - float4 convexPos = rigidBodies[bodyIndexA].m_pos; - float4 convexOrn = rigidBodies[bodyIndexA].m_quat; - - computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, - rigidBodies,collidables, - verticesA, - globalContactsOut, nGlobalContactsOut,maxContactCapacity, - spherePos,sphereRadius,convexPos,convexOrn, f); - - return; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h deleted file mode 100644 index b2e0a2dd47..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/primitiveContacts.h +++ /dev/null @@ -1,1288 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* primitiveContactsKernelsCL = - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_PLANE 4\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define SHAPE_SPHERE 7\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define max2 max\n" - "#define min2 min\n" - "typedef unsigned int u32;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "///keep this in sync with btCollidable.h\n" - "typedef struct\n" - "{\n" - " int m_numChildShapes;\n" - " float m_radius;\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - " \n" - "} btCollidableGpu;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx; \n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " \n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " \n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} btGpuFace;\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "//#define dot3F4 dot\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "void trInverse(float4 translationIn, Quaternion orientationIn,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtInvert(orientationIn);\n" - " *translationOut = qtRotate(*orientationOut, -translationIn);\n" - "}\n" - "void trMul(float4 translationA, Quaternion orientationA,\n" - " float4 translationB, Quaternion orientationB,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtMul(orientationA,orientationB);\n" - " *translationOut = transform(&translationB,&translationA,&orientationA);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "__inline float4 lerp3(const float4 a,const float4 b, float t)\n" - "{\n" - " return make_float4( a.x + (b.x - a.x) * t,\n" - " a.y + (b.y - a.y) * t,\n" - " a.z + (b.z - a.z) * t,\n" - " 0.f);\n" - "}\n" - "float signedDistanceFromPointToPlane(float4 point, float4 planeEqn, float4* closestPointOnFace)\n" - "{\n" - " float4 n = (float4)(planeEqn.x, planeEqn.y, planeEqn.z, 0);\n" - " float dist = dot3F4(n, point) + planeEqn.w;\n" - " *closestPointOnFace = point - dist * n;\n" - " return dist;\n" - "}\n" - "inline bool IsPointInPolygon(float4 p, \n" - " const btGpuFace* face,\n" - " __global const float4* baseVertex,\n" - " __global const int* convexIndices,\n" - " float4* out)\n" - "{\n" - " float4 a;\n" - " float4 b;\n" - " float4 ab;\n" - " float4 ap;\n" - " float4 v;\n" - " float4 plane = make_float4(face->m_plane.x,face->m_plane.y,face->m_plane.z,0.f);\n" - " \n" - " if (face->m_numIndices<2)\n" - " return false;\n" - " \n" - " float4 v0 = baseVertex[convexIndices[face->m_indexOffset + face->m_numIndices-1]];\n" - " \n" - " b = v0;\n" - " for(unsigned i=0; i != face->m_numIndices; ++i)\n" - " {\n" - " a = b;\n" - " float4 vi = baseVertex[convexIndices[face->m_indexOffset + i]];\n" - " b = vi;\n" - " ab = b-a;\n" - " ap = p-a;\n" - " v = cross3(ab,plane);\n" - " if (dot(ap, v) > 0.f)\n" - " {\n" - " float ab_m2 = dot(ab, ab);\n" - " float rt = ab_m2 != 0.f ? dot(ab, ap) / ab_m2 : 0.f;\n" - " if (rt <= 0.f)\n" - " {\n" - " *out = a;\n" - " }\n" - " else if (rt >= 1.f) \n" - " {\n" - " *out = b;\n" - " }\n" - " else\n" - " {\n" - " float s = 1.f - rt;\n" - " out[0].x = s * a.x + rt * b.x;\n" - " out[0].y = s * a.y + rt * b.y;\n" - " out[0].z = s * a.z + rt * b.z;\n" - " }\n" - " return false;\n" - " }\n" - " }\n" - " return true;\n" - "}\n" - "void computeContactSphereConvex(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB, \n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " __global const float4* convexVertices,\n" - " __global const int* convexIndices,\n" - " __global const btGpuFace* faces,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity,\n" - " float4 spherePos2,\n" - " float radius,\n" - " float4 pos,\n" - " float4 quat\n" - " )\n" - "{\n" - " float4 invPos;\n" - " float4 invOrn;\n" - " trInverse(pos,quat, &invPos,&invOrn);\n" - " float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" - " int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" - " int numFaces = convexShapes[shapeIndex].m_numFaces;\n" - " float4 closestPnt = (float4)(0, 0, 0, 0);\n" - " float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" - " float minDist = -1000000.f;\n" - " bool bCollide = true;\n" - " for ( int f = 0; f < numFaces; f++ )\n" - " {\n" - " btGpuFace face = faces[convexShapes[shapeIndex].m_faceOffset+f];\n" - " // set up a plane equation \n" - " float4 planeEqn;\n" - " float4 n1 = face.m_plane;\n" - " n1.w = 0.f;\n" - " planeEqn = n1;\n" - " planeEqn.w = face.m_plane.w;\n" - " \n" - " \n" - " // compute a signed distance from the vertex in cloth to the face of rigidbody.\n" - " float4 pntReturn;\n" - " float dist = signedDistanceFromPointToPlane(spherePos, planeEqn, &pntReturn);\n" - " // If the distance is positive, the plane is a separating plane. \n" - " if ( dist > radius )\n" - " {\n" - " bCollide = false;\n" - " break;\n" - " }\n" - " if (dist>0)\n" - " {\n" - " //might hit an edge or vertex\n" - " float4 out;\n" - " float4 zeroPos = make_float4(0,0,0,0);\n" - " bool isInPoly = IsPointInPolygon(spherePos,\n" - " &face,\n" - " &convexVertices[convexShapes[shapeIndex].m_vertexOffset],\n" - " convexIndices,\n" - " &out);\n" - " if (isInPoly)\n" - " {\n" - " if (dist>minDist)\n" - " {\n" - " minDist = dist;\n" - " closestPnt = pntReturn;\n" - " hitNormalWorld = planeEqn;\n" - " \n" - " }\n" - " } else\n" - " {\n" - " float4 tmp = spherePos-out;\n" - " float l2 = dot(tmp,tmp);\n" - " if (l2<radius*radius)\n" - " {\n" - " dist = sqrt(l2);\n" - " if (dist>minDist)\n" - " {\n" - " minDist = dist;\n" - " closestPnt = out;\n" - " hitNormalWorld = tmp/dist;\n" - " \n" - " }\n" - " \n" - " } else\n" - " {\n" - " bCollide = false;\n" - " break;\n" - " }\n" - " }\n" - " } else\n" - " {\n" - " if ( dist > minDist )\n" - " {\n" - " minDist = dist;\n" - " closestPnt = pntReturn;\n" - " hitNormalWorld.xyz = planeEqn.xyz;\n" - " }\n" - " }\n" - " \n" - " }\n" - " \n" - " if (bCollide && minDist > -10000)\n" - " {\n" - " float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" - " float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" - " \n" - " float actualDepth = minDist-radius;\n" - " if (actualDepth<=0.f)\n" - " {\n" - " \n" - " pOnB1.w = actualDepth;\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " \n" - " if (1)//dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB1;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_worldPosB[0] = pOnB1;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " } \n" - " }\n" - " }//if (hasCollision)\n" - "}\n" - " \n" - "int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" - "{\n" - " if( nPoints == 0 )\n" - " return 0;\n" - " \n" - " if (nPoints <=4)\n" - " return nPoints;\n" - " \n" - " \n" - " if (nPoints >64)\n" - " nPoints = 64;\n" - " \n" - " float4 center = make_float4(0.f);\n" - " {\n" - " \n" - " for (int i=0;i<nPoints;i++)\n" - " center += p[i];\n" - " center /= (float)nPoints;\n" - " }\n" - " \n" - " \n" - " \n" - " // sample 4 directions\n" - " \n" - " float4 aVector = p[0] - center;\n" - " float4 u = cross3( nearNormal, aVector );\n" - " float4 v = cross3( nearNormal, u );\n" - " u = normalize3( u );\n" - " v = normalize3( v );\n" - " \n" - " \n" - " //keep point with deepest penetration\n" - " float minW= FLT_MAX;\n" - " \n" - " int minIndex=-1;\n" - " \n" - " float4 maxDots;\n" - " maxDots.x = FLT_MIN;\n" - " maxDots.y = FLT_MIN;\n" - " maxDots.z = FLT_MIN;\n" - " maxDots.w = FLT_MIN;\n" - " \n" - " // idx, distance\n" - " for(int ie = 0; ie<nPoints; ie++ )\n" - " {\n" - " if (p[ie].w<minW)\n" - " {\n" - " minW = p[ie].w;\n" - " minIndex=ie;\n" - " }\n" - " float f;\n" - " float4 r = p[ie]-center;\n" - " f = dot3F4( u, r );\n" - " if (f<maxDots.x)\n" - " {\n" - " maxDots.x = f;\n" - " contactIdx[0].x = ie;\n" - " }\n" - " \n" - " f = dot3F4( -u, r );\n" - " if (f<maxDots.y)\n" - " {\n" - " maxDots.y = f;\n" - " contactIdx[0].y = ie;\n" - " }\n" - " \n" - " \n" - " f = dot3F4( v, r );\n" - " if (f<maxDots.z)\n" - " {\n" - " maxDots.z = f;\n" - " contactIdx[0].z = ie;\n" - " }\n" - " \n" - " f = dot3F4( -v, r );\n" - " if (f<maxDots.w)\n" - " {\n" - " maxDots.w = f;\n" - " contactIdx[0].w = ie;\n" - " }\n" - " \n" - " }\n" - " \n" - " if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" - " {\n" - " //replace the first contact with minimum (todo: replace contact with least penetration)\n" - " contactIdx[0].x = minIndex;\n" - " }\n" - " \n" - " return 4;\n" - " \n" - "}\n" - "#define MAX_PLANE_CONVEX_POINTS 64\n" - "int computeContactPlaneConvex(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB, \n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu*collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " __global const float4* convexVertices,\n" - " __global const int* convexIndices,\n" - " __global const btGpuFace* faces,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity,\n" - " float4 posB,\n" - " Quaternion ornB\n" - " )\n" - "{\n" - " int resultIndex=-1;\n" - " int shapeIndex = collidables[collidableIndexB].m_shapeIndex;\n" - " __global const ConvexPolyhedronCL* hullB = &convexShapes[shapeIndex];\n" - " \n" - " float4 posA;\n" - " posA = rigidBodies[bodyIndexA].m_pos;\n" - " Quaternion ornA;\n" - " ornA = rigidBodies[bodyIndexA].m_quat;\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " float4 planeEq;\n" - " planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" - " float4 planeNormal = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" - " float4 planeNormalWorld;\n" - " planeNormalWorld = qtRotate(ornA,planeNormal);\n" - " float planeConstant = planeEq.w;\n" - " \n" - " float4 invPosA;Quaternion invOrnA;\n" - " float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" - " {\n" - " \n" - " trInverse(posA,ornA,&invPosA,&invOrnA);\n" - " trMul(invPosA,invOrnA,posB,ornB,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" - " }\n" - " float4 invPosB;Quaternion invOrnB;\n" - " float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" - " {\n" - " \n" - " trInverse(posB,ornB,&invPosB,&invOrnB);\n" - " trMul(invPosB,invOrnB,posA,ornA,&planeInConvexPos1,&planeInConvexOrn1); \n" - " }\n" - " \n" - " float4 planeNormalInConvex = qtRotate(planeInConvexOrn1,-planeNormal);\n" - " float maxDot = -1e30;\n" - " int hitVertex=-1;\n" - " float4 hitVtx;\n" - " float4 contactPoints[MAX_PLANE_CONVEX_POINTS];\n" - " int numPoints = 0;\n" - " int4 contactIdx;\n" - " contactIdx=make_int4(0,1,2,3);\n" - " \n" - " \n" - " for (int i=0;i<hullB->m_numVertices;i++)\n" - " {\n" - " float4 vtx = convexVertices[hullB->m_vertexOffset+i];\n" - " float curDot = dot(vtx,planeNormalInConvex);\n" - " if (curDot>maxDot)\n" - " {\n" - " hitVertex=i;\n" - " maxDot=curDot;\n" - " hitVtx = vtx;\n" - " //make sure the deepest points is always included\n" - " if (numPoints==MAX_PLANE_CONVEX_POINTS)\n" - " numPoints--;\n" - " }\n" - " if (numPoints<MAX_PLANE_CONVEX_POINTS)\n" - " {\n" - " float4 vtxWorld = transform(&vtx, &posB, &ornB);\n" - " float4 vtxInPlane = transform(&vtxWorld, &invPosA, &invOrnA);//oplaneTransform.inverse()*vtxWorld;\n" - " float dist = dot(planeNormal,vtxInPlane)-planeConstant;\n" - " if (dist<0.f)\n" - " {\n" - " vtxWorld.w = dist;\n" - " contactPoints[numPoints] = vtxWorld;\n" - " numPoints++;\n" - " }\n" - " }\n" - " }\n" - " int numReducedPoints = numPoints;\n" - " if (numPoints>4)\n" - " {\n" - " numReducedPoints = extractManifoldSequential( contactPoints, numPoints, planeNormalInConvex, &contactIdx);\n" - " }\n" - " if (numReducedPoints>0)\n" - " {\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " resultIndex = dstIdx;\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -planeNormalWorld;\n" - " //c->setFrictionCoeff(0.7);\n" - " //c->setRestituitionCoeff(0.f);\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " switch (numReducedPoints)\n" - " {\n" - " case 4:\n" - " c->m_worldPosB[3] = contactPoints[contactIdx.w];\n" - " case 3:\n" - " c->m_worldPosB[2] = contactPoints[contactIdx.z];\n" - " case 2:\n" - " c->m_worldPosB[1] = contactPoints[contactIdx.y];\n" - " case 1:\n" - " c->m_worldPosB[0] = contactPoints[contactIdx.x];\n" - " default:\n" - " {\n" - " }\n" - " };\n" - " \n" - " GET_NPOINTS(*c) = numReducedPoints;\n" - " }//if (dstIdx < numPairs)\n" - " } \n" - " return resultIndex;\n" - "}\n" - "void computeContactPlaneSphere(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB, \n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const btGpuFace* faces,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity)\n" - "{\n" - " float4 planeEq = faces[collidables[collidableIndexA].m_shapeIndex].m_plane;\n" - " float radius = collidables[collidableIndexB].m_radius;\n" - " float4 posA1 = rigidBodies[bodyIndexA].m_pos;\n" - " float4 ornA1 = rigidBodies[bodyIndexA].m_quat;\n" - " float4 posB1 = rigidBodies[bodyIndexB].m_pos;\n" - " float4 ornB1 = rigidBodies[bodyIndexB].m_quat;\n" - " \n" - " bool hasCollision = false;\n" - " float4 planeNormal1 = make_float4(planeEq.x,planeEq.y,planeEq.z,0.f);\n" - " float planeConstant = planeEq.w;\n" - " float4 convexInPlaneTransPos1; Quaternion convexInPlaneTransOrn1;\n" - " {\n" - " float4 invPosA;Quaternion invOrnA;\n" - " trInverse(posA1,ornA1,&invPosA,&invOrnA);\n" - " trMul(invPosA,invOrnA,posB1,ornB1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" - " }\n" - " float4 planeInConvexPos1; Quaternion planeInConvexOrn1;\n" - " {\n" - " float4 invPosB;Quaternion invOrnB;\n" - " trInverse(posB1,ornB1,&invPosB,&invOrnB);\n" - " trMul(invPosB,invOrnB,posA1,ornA1,&planeInConvexPos1,&planeInConvexOrn1); \n" - " }\n" - " float4 vtx1 = qtRotate(planeInConvexOrn1,-planeNormal1)*radius;\n" - " float4 vtxInPlane1 = transform(&vtx1,&convexInPlaneTransPos1,&convexInPlaneTransOrn1);\n" - " float distance = dot3F4(planeNormal1,vtxInPlane1) - planeConstant;\n" - " hasCollision = distance < 0.f;//m_manifoldPtr->getContactBreakingThreshold();\n" - " if (hasCollision)\n" - " {\n" - " float4 vtxInPlaneProjected1 = vtxInPlane1 - distance*planeNormal1;\n" - " float4 vtxInPlaneWorld1 = transform(&vtxInPlaneProjected1,&posA1,&ornA1);\n" - " float4 normalOnSurfaceB1 = qtRotate(ornA1,planeNormal1);\n" - " float4 pOnB1 = vtxInPlaneWorld1+normalOnSurfaceB1*distance;\n" - " pOnB1.w = distance;\n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB1;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_worldPosB[0] = pOnB1;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " }//if (dstIdx < numPairs)\n" - " }//if (hasCollision)\n" - "}\n" - "__kernel void primitiveContactsKernel( __global int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numPairs, int maxContactCapacity)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " float4 worldVertsB1[64];\n" - " float4 worldVertsB2[64];\n" - " int capacityWorldVerts = 64; \n" - " float4 localContactsOut[64];\n" - " int localContactCapacity=64;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " if (i<numPairs)\n" - " {\n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" - " {\n" - " float4 posB;\n" - " posB = rigidBodies[bodyIndexB].m_pos;\n" - " Quaternion ornB;\n" - " ornB = rigidBodies[bodyIndexB].m_quat;\n" - " int contactIndex = computeContactPlaneConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity, posB,ornB);\n" - " if (contactIndex>=0)\n" - " pairs[pairIndex].z = contactIndex;\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" - " {\n" - " float4 posA;\n" - " posA = rigidBodies[bodyIndexA].m_pos;\n" - " Quaternion ornA;\n" - " ornA = rigidBodies[bodyIndexA].m_quat;\n" - " int contactIndex = computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" - " if (contactIndex>=0)\n" - " pairs[pairIndex].z = contactIndex;\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_PLANE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " computeContactPlaneSphere(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_PLANE)\n" - " {\n" - " computeContactPlaneSphere( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity);\n" - " return;\n" - " }\n" - " \n" - " \n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_CONVEX_HULL)\n" - " {\n" - " \n" - " float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" - " float sphereRadius = collidables[collidableIndexA].m_radius;\n" - " float4 convexPos = rigidBodies[bodyIndexB].m_pos;\n" - " float4 convexOrn = rigidBodies[bodyIndexB].m_quat;\n" - " computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " return;\n" - " }\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_CONVEX_HULL &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " \n" - " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" - " float sphereRadius = collidables[collidableIndexB].m_radius;\n" - " float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" - " float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" - " computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " return;\n" - " }\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " \n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " //sphere-sphere\n" - " float radiusA = collidables[collidableIndexA].m_radius;\n" - " float radiusB = collidables[collidableIndexB].m_radius;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " float4 diff = posA-posB;\n" - " float len = length(diff);\n" - " \n" - " ///iff distance positive, don't generate a new contact\n" - " if ( len <= (radiusA+radiusB))\n" - " {\n" - " ///distance (negative means penetration)\n" - " float dist = len - (radiusA+radiusB);\n" - " float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" - " if (len > 0.00001)\n" - " {\n" - " normalOnSurfaceB = diff / len;\n" - " }\n" - " float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" - " contactPosB.w = dist;\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = normalOnSurfaceB;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_worldPosB[0] = contactPosB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " }//if (dstIdx < numPairs)\n" - " }//if ( len <= (radiusA+radiusB))\n" - " return;\n" - " }//SHAPE_SPHERE SHAPE_SPHERE\n" - " }// if (i<numPairs)\n" - "}\n" - "// work-in-progress\n" - "__kernel void processCompoundPairsPrimitivesKernel( __global const int4* gpuCompoundPairs,\n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numCompoundPairs, int maxContactCapacity\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i<numCompoundPairs)\n" - " {\n" - " int bodyIndexA = gpuCompoundPairs[i].x;\n" - " int bodyIndexB = gpuCompoundPairs[i].y;\n" - " int childShapeIndexA = gpuCompoundPairs[i].z;\n" - " int childShapeIndexB = gpuCompoundPairs[i].w;\n" - " \n" - " int collidableIndexA = -1;\n" - " int collidableIndexB = -1;\n" - " \n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " \n" - " float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " \n" - " if (childShapeIndexA >= 0)\n" - " {\n" - " collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " posA = newPosA;\n" - " ornA = newOrnA;\n" - " } else\n" - " {\n" - " collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " }\n" - " \n" - " if (childShapeIndexB>=0)\n" - " {\n" - " collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " } else\n" - " {\n" - " collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" - " }\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " int pairIndex = i;\n" - " if ((shapeTypeA == SHAPE_PLANE) && (shapeTypeB==SHAPE_CONVEX_HULL))\n" - " {\n" - " computeContactPlaneConvex( pairIndex, bodyIndexA,bodyIndexB, collidableIndexA,collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posB,ornB);\n" - " return;\n" - " }\n" - " if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB==SHAPE_PLANE))\n" - " {\n" - " computeContactPlaneConvex( pairIndex, bodyIndexB,bodyIndexA, collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,\n" - " faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,posA,ornA);\n" - " return;\n" - " }\n" - " if ((shapeTypeA == SHAPE_CONVEX_HULL) && (shapeTypeB == SHAPE_SPHERE))\n" - " {\n" - " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" - " float sphereRadius = collidables[collidableIndexB].m_radius;\n" - " float4 convexPos = posA;\n" - " float4 convexOrn = ornA;\n" - " \n" - " computeContactSphereConvex(pairIndex, bodyIndexB, bodyIndexA , collidableIndexB,collidableIndexA, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " \n" - " return;\n" - " }\n" - " if ((shapeTypeA == SHAPE_SPHERE) && (shapeTypeB == SHAPE_CONVEX_HULL))\n" - " {\n" - " float4 spherePos = rigidBodies[bodyIndexA].m_pos;\n" - " float sphereRadius = collidables[collidableIndexA].m_radius;\n" - " float4 convexPos = posB;\n" - " float4 convexOrn = ornB;\n" - " \n" - " computeContactSphereConvex(pairIndex, bodyIndexA, bodyIndexB, collidableIndexA, collidableIndexB, \n" - " rigidBodies,collidables,convexShapes,vertices,indices,faces, globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn);\n" - " \n" - " return;\n" - " }\n" - " }// if (i<numCompoundPairs)\n" - "}\n" - "bool pointInTriangle(const float4* vertices, const float4* normal, float4 *p )\n" - "{\n" - " const float4* p1 = &vertices[0];\n" - " const float4* p2 = &vertices[1];\n" - " const float4* p3 = &vertices[2];\n" - " float4 edge1; edge1 = (*p2 - *p1);\n" - " float4 edge2; edge2 = ( *p3 - *p2 );\n" - " float4 edge3; edge3 = ( *p1 - *p3 );\n" - " \n" - " float4 p1_to_p; p1_to_p = ( *p - *p1 );\n" - " float4 p2_to_p; p2_to_p = ( *p - *p2 );\n" - " float4 p3_to_p; p3_to_p = ( *p - *p3 );\n" - " float4 edge1_normal; edge1_normal = ( cross(edge1,*normal));\n" - " float4 edge2_normal; edge2_normal = ( cross(edge2,*normal));\n" - " float4 edge3_normal; edge3_normal = ( cross(edge3,*normal));\n" - " \n" - " \n" - " float r1, r2, r3;\n" - " r1 = dot(edge1_normal,p1_to_p );\n" - " r2 = dot(edge2_normal,p2_to_p );\n" - " r3 = dot(edge3_normal,p3_to_p );\n" - " \n" - " if ( r1 > 0 && r2 > 0 && r3 > 0 )\n" - " return true;\n" - " if ( r1 <= 0 && r2 <= 0 && r3 <= 0 ) \n" - " return true;\n" - " return false;\n" - "}\n" - "float segmentSqrDistance(float4 from, float4 to,float4 p, float4* nearest) \n" - "{\n" - " float4 diff = p - from;\n" - " float4 v = to - from;\n" - " float t = dot(v,diff);\n" - " \n" - " if (t > 0) \n" - " {\n" - " float dotVV = dot(v,v);\n" - " if (t < dotVV) \n" - " {\n" - " t /= dotVV;\n" - " diff -= t*v;\n" - " } else \n" - " {\n" - " t = 1;\n" - " diff -= v;\n" - " }\n" - " } else\n" - " {\n" - " t = 0;\n" - " }\n" - " *nearest = from + t*v;\n" - " return dot(diff,diff); \n" - "}\n" - "void computeContactSphereTriangle(int pairIndex,\n" - " int bodyIndexA, int bodyIndexB,\n" - " int collidableIndexA, int collidableIndexB, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " const float4* triangleVertices,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int maxContactCapacity,\n" - " float4 spherePos2,\n" - " float radius,\n" - " float4 pos,\n" - " float4 quat,\n" - " int faceIndex\n" - " )\n" - "{\n" - " float4 invPos;\n" - " float4 invOrn;\n" - " trInverse(pos,quat, &invPos,&invOrn);\n" - " float4 spherePos = transform(&spherePos2,&invPos,&invOrn);\n" - " int numFaces = 3;\n" - " float4 closestPnt = (float4)(0, 0, 0, 0);\n" - " float4 hitNormalWorld = (float4)(0, 0, 0, 0);\n" - " float minDist = -1000000.f;\n" - " bool bCollide = false;\n" - " \n" - " //////////////////////////////////////\n" - " float4 sphereCenter;\n" - " sphereCenter = spherePos;\n" - " const float4* vertices = triangleVertices;\n" - " float contactBreakingThreshold = 0.f;//todo?\n" - " float radiusWithThreshold = radius + contactBreakingThreshold;\n" - " float4 edge10;\n" - " edge10 = vertices[1]-vertices[0];\n" - " edge10.w = 0.f;//is this needed?\n" - " float4 edge20;\n" - " edge20 = vertices[2]-vertices[0];\n" - " edge20.w = 0.f;//is this needed?\n" - " float4 normal = cross3(edge10,edge20);\n" - " normal = normalize(normal);\n" - " float4 p1ToCenter;\n" - " p1ToCenter = sphereCenter - vertices[0];\n" - " \n" - " float distanceFromPlane = dot(p1ToCenter,normal);\n" - " if (distanceFromPlane < 0.f)\n" - " {\n" - " //triangle facing the other way\n" - " distanceFromPlane *= -1.f;\n" - " normal *= -1.f;\n" - " }\n" - " hitNormalWorld = normal;\n" - " bool isInsideContactPlane = distanceFromPlane < radiusWithThreshold;\n" - " \n" - " // Check for contact / intersection\n" - " bool hasContact = false;\n" - " float4 contactPoint;\n" - " if (isInsideContactPlane) \n" - " {\n" - " \n" - " if (pointInTriangle(vertices,&normal, &sphereCenter)) \n" - " {\n" - " // Inside the contact wedge - touches a point on the shell plane\n" - " hasContact = true;\n" - " contactPoint = sphereCenter - normal*distanceFromPlane;\n" - " \n" - " } else {\n" - " // Could be inside one of the contact capsules\n" - " float contactCapsuleRadiusSqr = radiusWithThreshold*radiusWithThreshold;\n" - " float4 nearestOnEdge;\n" - " int numEdges = 3;\n" - " for (int i = 0; i < numEdges; i++) \n" - " {\n" - " float4 pa =vertices[i];\n" - " float4 pb = vertices[(i+1)%3];\n" - " float distanceSqr = segmentSqrDistance(pa,pb,sphereCenter, &nearestOnEdge);\n" - " if (distanceSqr < contactCapsuleRadiusSqr) \n" - " {\n" - " // Yep, we're inside a capsule\n" - " hasContact = true;\n" - " contactPoint = nearestOnEdge;\n" - " \n" - " }\n" - " \n" - " }\n" - " }\n" - " }\n" - " if (hasContact) \n" - " {\n" - " closestPnt = contactPoint;\n" - " float4 contactToCenter = sphereCenter - contactPoint;\n" - " minDist = length(contactToCenter);\n" - " if (minDist>FLT_EPSILON)\n" - " {\n" - " hitNormalWorld = normalize(contactToCenter);//*(1./minDist);\n" - " bCollide = true;\n" - " }\n" - " \n" - " }\n" - " /////////////////////////////////////\n" - " if (bCollide && minDist > -10000)\n" - " {\n" - " \n" - " float4 normalOnSurfaceB1 = qtRotate(quat,-hitNormalWorld);\n" - " float4 pOnB1 = transform(&closestPnt,&pos,&quat);\n" - " float actualDepth = minDist-radius;\n" - " \n" - " if (actualDepth<=0.f)\n" - " {\n" - " pOnB1.w = actualDepth;\n" - " int dstIdx;\n" - " \n" - " float lenSqr = dot3F4(normalOnSurfaceB1,normalOnSurfaceB1);\n" - " if (lenSqr>FLT_EPSILON)\n" - " {\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " \n" - " if (dstIdx < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB1;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyIndexA].m_invMass==0?-bodyIndexA:bodyIndexA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyIndexB].m_invMass==0?-bodyIndexB:bodyIndexB;\n" - " c->m_worldPosB[0] = pOnB1;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = faceIndex;\n" - " GET_NPOINTS(*c) = 1;\n" - " } \n" - " }\n" - " }\n" - " }//if (hasCollision)\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSphereContactsKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numConcavePairs, int maxContactCapacity\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " int pairIdx = i;\n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " if (collidables[collidableIndexB].m_shapeType==SHAPE_SPHERE)\n" - " {\n" - " int f = concavePairs[i].z;\n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " }\n" - " float4 spherePos = rigidBodies[bodyIndexB].m_pos;\n" - " float sphereRadius = collidables[collidableIndexB].m_radius;\n" - " float4 convexPos = rigidBodies[bodyIndexA].m_pos;\n" - " float4 convexOrn = rigidBodies[bodyIndexA].m_quat;\n" - " computeContactSphereTriangle(i, bodyIndexB, bodyIndexA, collidableIndexB, collidableIndexA, \n" - " rigidBodies,collidables,\n" - " verticesA,\n" - " globalContactsOut, nGlobalContactsOut,maxContactCapacity,\n" - " spherePos,sphereRadius,convexPos,convexOrn, f);\n" - " return;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl deleted file mode 100644 index a6565fd6fa..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/sat.cl +++ /dev/null @@ -1,2018 +0,0 @@ -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 - -#define B3_MAX_STACK_DEPTH 256 - - -typedef unsigned int u32; - -///keep this in sync with btCollidable.h -typedef struct -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - union - { - float m_radius; - int m_compoundBvhIndex; - }; - - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} b3QuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - - -int getTriangleIndex(const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeafNode(const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} b3BvhSubtreeInfo; - - - - - - - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -} ConvexPolyhedronCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int2.h" - - - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define make_float4 (float4) - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); - - -// float4 a1 = make_float4(a.xyz,0.f); -// float4 b1 = make_float4(b.xyz,0.f); - -// return cross(a1,b1); - -//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); - - // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); - - //return c; -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = make_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - -inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0<d1 ? d0:d1; - return true; -} - - - - -inline bool IsAlmostZero(const float4 v) -{ - if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) - return false; - return true; -} - - - -bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - -bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* verticesA, - __global const float4* uniqueEdgesA, - __global const btGpuFace* facesA, - __global const int* indicesA, - const float4* verticesB, - const float4* uniqueEdgesB, - const btGpuFace* facesB, - const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, __global const float4* vertices,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0<d1 ? d0:d1; - return true; -} - - -bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = faces[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - - curPlaneTests++; - - float d; - if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d)) - return false; - - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - - return true; -} - - - - -bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - float4* sep, - float* dmin) -{ - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test unit sphere directions - for (int i=0;i<numUnitSphereDirections;i++) - { - - float4 crossje; - crossje = unitSphereDirections[i]; - - if (dot3F4(DeltaC2,crossje)>0) - crossje *= -1.f; - { - float dist; - bool result = true; - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje*=-1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - -// work-in-progress -__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global const btGpuChildShape* gpuChildShapes, - __global volatile float4* gpuCompoundSepNormalsOut, - __global volatile int* gpuHasCompoundSepNormalsOut, - int numCompoundPairs - ) -{ - - int i = get_global_id(0); - if (i<numCompoundPairs) - { - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - gpuHasCompoundSepNormalsOut[i] = 0; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int shapeTypeA = collidables[collidableIndexA].m_shapeType; - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - - - if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL)) - { - return; - } - - int hasSeparatingAxis = 5; - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - posA.w = 0.f; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = make_float4(1,0,0,0); - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else//(!sepB) - { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin); - if (sepEE) - { - gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal); - gpuHasCompoundSepNormalsOut[i] = 1; - }//sepEE - }//(!sepB) - }//(!sepA) - - - } - -} - - -inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) -{ - b3Float4 vecOut; - vecOut = b3MakeFloat4( - (float)(vecIn[0]) / (quantization.x), - (float)(vecIn[1]) / (quantization.y), - (float)(vecIn[2]) / (quantization.z), - 0.f); - - vecOut += bvhAabbMin; - return vecOut; -} - -inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin) -{ - b3Float4 vecOut; - vecOut = b3MakeFloat4( - (float)(vecIn[0]) / (quantization.x), - (float)(vecIn[1]) / (quantization.y), - (float)(vecIn[2]) / (quantization.z), - 0.f); - - vecOut += bvhAabbMin; - return vecOut; -} - - -// work-in-progress -__kernel void findCompoundPairsKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global b3Aabb_t* aabbLocalSpace, - __global const btGpuChildShape* gpuChildShapes, - __global volatile int4* gpuCompoundPairsOut, - __global volatile int* numCompoundPairsOut, - __global const b3BvhSubtreeInfo* subtrees, - __global const b3QuantizedBvhNode* quantizedNodes, - __global const b3BvhInfo* bvhInfos, - int numPairs, - int maxNumCompoundPairsCapacity - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int bvhA = collidables[collidableIndexA].m_compoundBvhIndex; - int bvhB = collidables[collidableIndexB].m_compoundBvhIndex; - int numSubTreesA = bvhInfos[bvhA].m_numSubTrees; - int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset; - int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset; - - - int numSubTreesB = bvhInfos[bvhB].m_numSubTrees; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - b3Quat ornA = rigidBodies[bodyIndexA].m_quat; - - b3Quat ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - - for (int p=0;p<numSubTreesA;p++) - { - b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p]; - //bvhInfos[bvhA].m_quantization - b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - - b3Float4 aabbAMinOut,aabbAMaxOut; - float margin=0.f; - b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut); - - for (int q=0;q<numSubTreesB;q++) - { - b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q]; - - b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - - b3Float4 aabbBMinOut,aabbBMaxOut; - float margin=0.f; - b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut); - - - - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (aabbOverlap) - { - - int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset; - int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize; - - int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset; - int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize; - - - b3Int2 nodeStack[B3_MAX_STACK_DEPTH]; - b3Int2 node0; - node0.x = startNodeIndexA; - node0.y = startNodeIndexB; - int maxStackDepth = B3_MAX_STACK_DEPTH; - int depth=0; - nodeStack[depth++]=node0; - - do - { - b3Int2 node = nodeStack[--depth]; - - b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin); - - b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin); - - float margin=0.f; - b3Float4 aabbAMinOut,aabbAMaxOut; - b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut); - - b3Float4 aabbBMinOut,aabbBMaxOut; - b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut); - - - bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut); - if (nodeOverlap) - { - bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]); - bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]); - bool isInternalA = !isLeafA; - bool isInternalB = !isLeafB; - - //fail, even though it might hit two leaf nodes - if (depth+4>maxStackDepth && !(isLeafA && isLeafB)) - { - //printf("Error: traversal exceeded maxStackDepth"); - continue; - } - - if(isInternalA) - { - int nodeAleftChild = node.x+1; - bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]); - int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]); - - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); - - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild); - nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild); - } - else - { - nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y); - nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y); - } - } - else - { - if(isInternalB) - { - int nodeBleftChild = node.y+1; - bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]); - int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild); - nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild); - } - else - { - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]); - int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]); - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); - } - } - } - } - } while (depth); - } - } - } - - return; - } - - - - - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - - if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - - int numChildrenA = collidables[collidableIndexA].m_numChildShapes; - for (int c=0;c<numChildrenA;c++) - { - int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c; - int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - - int shapeIndexA = collidables[childColIndexA].m_shapeIndex; - b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA]; - float margin = 0.f; - - b3Float4 aabbAMinWS; - b3Float4 aabbAMaxWS; - - b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin, - newPosA, - newOrnA, - &aabbAMinWS,&aabbAMaxWS); - - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB]; - - b3Float4 aabbBMinWS; - b3Float4 aabbBMaxWS; - - b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin, - newPosB, - newOrnB, - &aabbBMinWS,&aabbBMaxWS); - - - - bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS); - if (aabbOverlap) - { - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - {// - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB); - } - }// - }//fi(1) - } //for (int b=0 - }//if (collidables[collidableIndexB]. - else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - if (1) - { - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = newPosA; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = newOrnA; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - { - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1); - }//if (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - }//for (int b=0;b<numChildrenB;b++) - return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) - && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - { - int numChildrenB = collidables[collidableIndexB].m_numChildShapes; - for (int b=0;b<numChildrenB;b++) - { - int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = qtRotate(ornB,childPosB)+posB; - float4 newOrnB = qtMul(ornB,childOrnB); - - int shapeIndexB = collidables[childColIndexB].m_shapeIndex; - - - ////////////////////////////////////// - - if (1) - { - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - float dmin = FLT_MAX; - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = newPosB; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =newOrnB; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - {// - int compoundPairIdx = atomic_inc(numCompoundPairsOut); - if (compoundPairIdx<maxNumCompoundPairsCapacity) - { - gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB); - }//fi (compoundPairIdx<maxNumCompoundPairsCapacity) - }// - }//fi (1) - }//for (int b=0;b<numChildrenB;b++) - return; - }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - return; - }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)) - }//i<numPairs -} - -// work-in-progress -__kernel void findSeparatingAxisKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global volatile float4* separatingNormals, - __global volatile int* hasSeparatingAxis, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - hasSeparatingAxis[i] = 0; - return; - } - - - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - hasSeparatingAxis[i] = 0; - return; - } - - if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH)) - { - hasSeparatingAxis[i] = 0; - return; - } - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = FLT_MAX; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal; - - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - hasSeparatingAxis[i] = 4; - if (!sepA) - { - hasSeparatingAxis[i] = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - - if (!sepB) - { - hasSeparatingAxis[i] = 0; - } else - { - bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - } - - } - -} - - -__kernel void findSeparatingAxisVertexFaceKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global volatile float4* separatingNormals, - __global volatile int* hasSeparatingAxis, - __global float* dmins, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - hasSeparatingAxis[i] = 0; - - //once the broadphase avoids static-static pairs, we can remove this test - if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0)) - { - return; - } - - - if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL)) - { - return; - } - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = FLT_MAX; - - dmins[i] = dmin; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal; - - bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - hasSeparatingAxis[i] = 4; - if (!sepA) - { - hasSeparatingAxis[i] = 0; - } else - { - bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - - if (sepB) - { - dmins[i] = dmin; - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - - } - -} - - -__kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global btAabbCL* aabbs, - __global float4* separatingNormals, - __global int* hasSeparatingAxis, - __global float* dmins, - __global const float4* unitSphereDirections, - int numUnitSphereDirections, - int numPairs - ) -{ - - int i = get_global_id(0); - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - - float dmin = dmins[i]; - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 c0local = convexShapes[shapeIndexA].m_localCenter; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - float4 sepNormal = separatingNormals[i]; - - - - bool sepEE = false; - int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges; - if (numEdgeEdgeDirections<=numUnitSphereDirections) - { - sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,uniqueEdges,faces, - indices,&sepNormal,&dmin); - - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - /* - ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy - else - { - sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA, - posB,ornB, - DeltaC2, - vertices,unitSphereDirections,numUnitSphereDirections, - &sepNormal,&dmin); - if (!sepEE) - { - hasSeparatingAxis[i] = 0; - } else - { - hasSeparatingAxis[i] = 1; - separatingNormals[i] = sepNormal; - } - } - */ - } //if (hasSeparatingAxis[i]) - }//(i<numPairs) -} - - - - - -inline int findClippingFaces(const float4 separatingNormal, - const ConvexPolyhedronCL* hullA, - __global const ConvexPolyhedronCL* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=0; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; - int numVertices = polyB.m_numIndices; - if (numVertices>capacityWorldVerts) - numVertices = capacityWorldVerts; - - for(int e0=0;e0<numVertices;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - } - - int closestFaceA=0; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; - if (numVerticesA>capacityWorldVerts) - numVerticesA = capacityWorldVerts; - - for(int e0=0;e0<numVerticesA;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - concaveHasSeparatingNormals[i] = 0; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& - collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - float4 c0local = convexPolyhedronA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - - bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA, - posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces,indices, - verticesA,uniqueEdgesA,facesA,indicesA, - &sepAxis,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else - { - bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - - if (!sepEE) - { - hasSeparatingAxis = 0; - } else - { - hasSeparatingAxis = 1; - } - } - } - - if (hasSeparatingAxis) - { - sepAxis.w = dmin; - concaveSeparatingNormalsOut[pairIdx]=sepAxis; - concaveHasSeparatingNormals[i]=1; - - - float minDist = -1e30f; - float maxDist = 0.02f; - - - - findClippingFaces(sepAxis, - &convexPolyhedronA, - &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsA1GPU, - worldNormalsAGPU, - worldVertsB1GPU, - vertexFaceCapacity, - minDist, maxDist, - verticesA, - facesA, - indicesA, - vertices, - faces, - indices, - clippingFacesOut, pairIdx); - - - } else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - - concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts -} - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl deleted file mode 100644 index f433971741..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl +++ /dev/null @@ -1,1888 +0,0 @@ - -#define TRIANGLE_NUM_CONVEX_FACES 5 - - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - -#define max2 max -#define min2 min - -typedef unsigned int u32; - - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - - - -#define GET_NPOINTS(x) (x).m_worldNormalOnB.w - - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -//#define dot3F4 dot - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - - -__inline float4 lerp3(const float4 a,const float4 b, float t) -{ - return make_float4( a.x + (b.x - a.x) * t, - a.y + (b.y - a.y) * t, - a.z + (b.z - a.z) * t, - 0.f); -} - - - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut) -{ - - int ve; - float ds, de; - int numVertsOut = 0; - //double-check next test - if (numVertsIn < 2) - return 0; - - float4 firstVertex=pVtxIn[numVertsIn-1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex=pVtxIn[ve]; - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; - if (ds<0) - { - if (de<0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - - -// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut -int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut) -{ - - int ve; - float ds, de; - int numVertsOut = 0; -//double-check next test - if (numVertsIn < 2) - return 0; - - float4 firstVertex=pVtxIn[numVertsIn-1]; - float4 endVertex = pVtxIn[0]; - - ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; - - for (ve = 0; ve < numVertsIn; ve++) - { - endVertex=pVtxIn[ve]; - - de = dot3F4(planeNormalWS,endVertex)+planeEqWS; - - if (ds<0) - { - if (de<0) - { - // Start < 0, end < 0, so output endVertex - ppVtxOut[numVertsOut++] = endVertex; - } - else - { - // Start < 0, end >= 0, so output intersection - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - } - } - else - { - if (de<0) - { - // Start >= 0, end < 0 so output intersection and end - ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); - ppVtxOut[numVertsOut++] = endVertex; - } - } - firstVertex = endVertex; - ds = de; - } - return numVertsOut; -} - - -int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, - const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - faces[hullA->m_faceOffset+face].m_plane.x, - faces[hullA->m_faceOffset+face].m_plane.y, - faces[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]]; - const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = qtRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;i<numVertsIn;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - } - - return numContactsOut; -} - - - -int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, - const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, - float4* worldVertsB2, int capacityWorldVertsB2, - const float minDist, float maxDist, - const float4* verticesA, - const b3GpuFace_t* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const b3GpuFace_t* facesB, - __global const int* indicesB, - float4* contactsOut, - int contactCapacity) -{ - int numContactsOut = 0; - - float4* pVtxIn = worldVertsB1; - float4* pVtxOut = worldVertsB2; - - int numVertsIn = numWorldVertsB1; - int numVertsOut = 0; - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z,0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - } - } - } - if (closestFaceA<0) - return numContactsOut; - - b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA]; - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - int numVerticesA = polyA.m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; - const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; - const float4 edge0 = a - b; - const float4 WorldEdge0 = qtRotate(ornA,edge0); - float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); - - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = transform(&a,&posA,&ornA); - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - - //clip face - //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); - numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); - - //btSwap(pVtxIn,pVtxOut); - float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsIn = numVertsOut; - numVertsOut = 0; - } - - - // only keep points that are behind the witness face - { - float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); - float localPlaneEq = polyA.m_plane.w; - float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); - float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); - for (int i=0;i<numVertsIn;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - //resultOut.addContactPoint(separatingNormal,point,depth); - contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - } - - return numContactsOut; -} - -int clipHullAgainstHull(const float4 separatingNormal, - __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - float4* localContactsOut, - int localContactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, - faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices, - faces, - indices,localContactsOut,localContactCapacity); - } - - return numContactsOut; -} - - -int clipHullAgainstHullLocalA(const float4 separatingNormal, - const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const b3GpuFace_t* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const b3GpuFace_t* facesB, - __global const int* indicesB, - float4* localContactsOut, - int localContactCapacity) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - if (closestFaceB>=0) - { - numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, - posA,ornA, - worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, - verticesA,facesA,indicesA, - verticesB,facesB,indicesB, - localContactsOut,localContactCapacity); - } - - return numContactsOut; -} - -#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; -#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} -#define REDUCE_MAX(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } -#define REDUCE_MIN(v, n) {int i=0;\ -for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } - -int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) -{ - if( nPoints == 0 ) - return 0; - - if (nPoints <=4) - return nPoints; - - - if (nPoints >64) - nPoints = 64; - - float4 center = make_float4(0.f); - { - - for (int i=0;i<nPoints;i++) - center += p[i]; - center /= (float)nPoints; - } - - - - // sample 4 directions - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - - //keep point with deepest penetration - float minW= FLT_MAX; - - int minIndex=-1; - - float4 maxDots; - maxDots.x = FLT_MIN; - maxDots.y = FLT_MIN; - maxDots.z = FLT_MIN; - maxDots.w = FLT_MIN; - - // idx, distance - for(int ie = 0; ie<nPoints; ie++ ) - { - if (p[ie].w<minW) - { - minW = p[ie].w; - minIndex=ie; - } - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - if (f<maxDots.x) - { - maxDots.x = f; - contactIdx[0].x = ie; - } - - f = dot3F4( -u, r ); - if (f<maxDots.y) - { - maxDots.y = f; - contactIdx[0].y = ie; - } - - - f = dot3F4( v, r ); - if (f<maxDots.z) - { - maxDots.z = f; - contactIdx[0].z = ie; - } - - f = dot3F4( -v, r ); - if (f<maxDots.w) - { - maxDots.w = f; - contactIdx[0].w = ie; - } - - } - - if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) - { - //replace the first contact with minimum (todo: replace contact with least penetration) - contactIdx[0].x = minIndex; - } - - return 4; - -} - - -int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx) -{ - contactIdx[0] = 0; - contactIdx[1] = 1; - contactIdx[2] = 2; - contactIdx[3] = 3; - - if( nPoints == 0 ) return 0; - - nPoints = min2( nPoints, 4 ); - return nPoints; - -} - - - -int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx) -{ - if( nPoints == 0 ) return 0; - - nPoints = min2( nPoints, 64 ); - - float4 center = make_float4(0.f); - { - float4 v[64]; - for (int i=0;i<nPoints;i++) - v[i] = p[i]; - //memcpy( v, p, nPoints*sizeof(float4) ); - PARALLEL_SUM( v, nPoints ); - center = v[0]/(float)nPoints; - } - - - - { // sample 4 directions - if( nPoints < 4 ) - { - for(int i=0; i<nPoints; i++) - contactIdx[i] = i; - return nPoints; - } - - float4 aVector = p[0] - center; - float4 u = cross3( nearNormal, aVector ); - float4 v = cross3( nearNormal, u ); - u = normalize3( u ); - v = normalize3( v ); - - int idx[4]; - - float2 max00 = make_float2(0,FLT_MAX); - { - // idx, distance - { - { - int4 a[64]; - for(int ie = 0; ie<nPoints; ie++ ) - { - - - float f; - float4 r = p[ie]-center; - f = dot3F4( u, r ); - a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - - f = dot3F4( -u, r ); - a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - - f = dot3F4( v, r ); - a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - - f = dot3F4( -v, r ); - a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); - } - - for(int ie=0; ie<nPoints; ie++) - { - a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x; - a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; - a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; - a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; - } - - idx[0] = (int)a[0].x & 0xff; - idx[1] = (int)a[0].y & 0xff; - idx[2] = (int)a[0].z & 0xff; - idx[3] = (int)a[0].w & 0xff; - } - } - - { - float2 h[64]; - PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); - REDUCE_MIN( h, nPoints ); - max00 = h[0]; - } - } - - contactIdx[0] = idx[0]; - contactIdx[1] = idx[1]; - contactIdx[2] = idx[2]; - contactIdx[3] = idx[3]; - - - return 4; - } -} - - - -__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* closestPointsWorld, - __global const float4* separatingNormalsWorld, - __global const int* contactCounts, - __global const int* contactOffsets, - __global struct b3Contact4Data* restrict contactsOut, - counter32_t nContactsOut, - int contactCapacity, - int numPairs, - int pairIndex - ) -{ - int idx = get_global_id(0); - - if (idx<numPairs) - { - float4 normal = separatingNormalsWorld[idx]; - int nPoints = contactCounts[idx]; - __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]]; - float4 localPoints[64]; - for (int i=0;i<nPoints;i++) - { - localPoints[i] = pointsIn[i]; - } - - int contactIdx[4];// = {-1,-1,-1,-1}; - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nContactsOut, dstIdx ); - if (dstIdx<contactCapacity) - { - __global struct b3Contact4Data* c = contactsOut + dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = idx; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - for (int i=0;i<nContacts;i++) - { - c->m_worldPosB[i] = localPoints[contactIdx[i]]; - } - GET_NPOINTS(*c) = nContacts; - } - } -} - - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - -void trMul(float4 translationA, Quaternion orientationA, - float4 translationB, Quaternion orientationB, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtMul(orientationA,orientationB); - *translationOut = transform(&translationB,&translationA,&orientationA); -} - - - - -__kernel void clipHullHullKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numPairs, - int contactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - if (hasSeparatingAxis[i]) - { - - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - - - int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, - rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -separatingNormals[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - - int mprContactIndex = pairs[pairIndex].z; - - int dstIdx = mprContactIndex; - if (dstIdx<0) - { - AppendInc( nGlobalContactsOut, dstIdx ); - } - - if (dstIdx<contactCapacity) - { - pairs[pairIndex].z = dstIdx; - - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - for (int i=0;i<nReducedContacts;i++) - { - //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact - if (i>0||(mprContactIndex<0)) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - -} - - -__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const b3GpuChildShape_t* gpuChildShapes, - __global const float4* gpuCompoundSepNormalsOut, - __global const int* gpuHasCompoundSepNormalsOut, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int numCompoundPairs, int maxContactCapacity) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numCompoundPairs) - { - - if (gpuHasCompoundSepNormalsOut[i]) - { - - int bodyIndexA = gpuCompoundPairs[i].x; - int bodyIndexB = gpuCompoundPairs[i].y; - - int childShapeIndexA = gpuCompoundPairs[i].z; - int childShapeIndexB = gpuCompoundPairs[i].w; - - int collidableIndexA = -1; - int collidableIndexB = -1; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 posA = rigidBodies[bodyIndexA].m_pos; - - float4 ornB = rigidBodies[bodyIndexB].m_quat; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - if (childShapeIndexA >= 0) - { - collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; - float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; - float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; - float4 newPosA = qtRotate(ornA,childPosA)+posA; - float4 newOrnA = qtMul(ornA,childOrnA); - posA = newPosA; - ornA = newOrnA; - } else - { - collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - } - - if (childShapeIndexB>=0) - { - collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - } else - { - collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - } - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -gpuCompoundSepNormalsOut[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if ((dstIdx+nReducedContacts) < maxContactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = gpuCompoundPairs[pairIndex].x; - int bodyB = gpuCompoundPairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;i<nReducedContacts;i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (gpuHasCompoundSepNormalsOut[i]) - }// if (i<numCompoundPairs) - -} - - - -__kernel void sphereSphereCollisionKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int contactCapacity, - int numPairs) -{ - - int i = get_global_id(0); - int pairIndex = i; - - if (i<numPairs) - { - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && - collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) - { - //sphere-sphere - float radiusA = collidables[collidableIndexA].m_radius; - float radiusB = collidables[collidableIndexB].m_radius; - float4 posA = rigidBodies[bodyIndexA].m_pos; - float4 posB = rigidBodies[bodyIndexB].m_pos; - - float4 diff = posA-posB; - float len = length(diff); - - ///iff distance positive, don't generate a new contact - if ( len <= (radiusA+radiusB)) - { - ///distance (negative means penetration) - float dist = len - (radiusA+radiusB); - float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); - if (len > 0.00001) - { - normalOnSurfaceB = diff / len; - } - float4 contactPosB = posB + normalOnSurfaceB*radiusB; - contactPosB.w = dist; - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx < contactCapacity) - { - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normalOnSurfaceB; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_worldPosB[0] = contactPosB; - c->m_childIndexA = -1; - c->m_childIndexB = -1; - - GET_NPOINTS(*c) = 1; - }//if (dstIdx < numPairs) - }//if ( len <= (radiusA+radiusB)) - }//SHAPE_SPHERE SHAPE_SPHERE - }//if (i<numPairs) -} - -__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const b3GpuChildShape_t* gpuChildShapes, - __global const float4* separatingNormals, - __global struct b3Contact4Data* restrict globalContactsOut, - counter32_t nGlobalContactsOut, - int contactCapacity, - int numConcavePairs) -{ - - int i = get_global_id(0); - int pairIndex = i; - - float4 worldVertsB1[64]; - float4 worldVertsB2[64]; - int capacityWorldVerts = 64; - - float4 localContactsOut[64]; - int localContactCapacity=64; - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numConcavePairs) - { - //negative value means that the pair is invalid - if (concavePairsIn[i].w<0) - return; - - int bodyIndexA = concavePairsIn[i].x; - int bodyIndexB = concavePairsIn[i].y; - int f = concavePairsIn[i].z; - int childShapeIndexA = f; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - /////////////////////////////////////////////////////////////// - - - bool overlap = false; - - b3ConvexPolyhedronData_t convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - } - - float dmin = FLT_MAX; - - int localCC=0; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot3F4(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot3F4(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - float4 sepAxis = separatingNormals[i]; - - int shapeTypeB = collidables[collidableIndexB].m_shapeType; - int childShapeIndexB =-1; - if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - /////////////////// - ///compound shape support - - childShapeIndexB = concavePairsIn[pairIndex].w; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - - } - - //////////////////////////////////////// - - - - int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis, - &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsB1,worldVertsB2,capacityWorldVerts, - minDist, maxDist, - &verticesA,&facesA,&indicesA, - vertices,faces,indices, - localContactsOut,localContactCapacity); - - if (numLocalContactsOut>0) - { - float4 normal = -separatingNormals[i]; - int nPoints = numLocalContactsOut; - float4* pointsIn = localContactsOut; - int contactIdx[4];// = {-1,-1,-1,-1}; - - contactIdx[0] = -1; - contactIdx[1] = -1; - contactIdx[2] = -1; - contactIdx[3] = -1; - - int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); - - int dstIdx; - AppendInc( nGlobalContactsOut, dstIdx ); - if (dstIdx<contactCapacity) - { - __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = concavePairsIn[pairIndex].x; - int bodyB = concavePairsIn[pairIndex].y; - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA = childShapeIndexA; - c->m_childIndexB = childShapeIndexB; - for (int i=0;i<nReducedContacts;i++) - { - c->m_worldPosB[i] = pointsIn[contactIdx[i]]; - } - GET_NPOINTS(*c) = nReducedContacts; - } - - }// if (numContactsOut>0) - }// if (i<numPairs) -} - - - - - - -int findClippingFaces(const float4 separatingNormal, - __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - __global const float4* vertices, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=-1; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, - faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; - const int numVertices = polyB.m_numIndices; - for(int e0=0;e0<numVertices;e0++) - { - const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - - int closestFaceA=-1; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - faces[hullA->m_faceOffset+face].m_plane.x, - faces[hullA->m_faceOffset+face].m_plane.y, - faces[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices; - for(int e0=0;e0<numVerticesA;e0++) - { - const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - -int clipFaces(__global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - __global float4* worldVertsB2, - int capacityWorldVertsB2, - const float minDist, float maxDist, - __global int4* clippingFaces, - int pairIndex) -{ - int numContactsOut = 0; - - int closestFaceA = clippingFaces[pairIndex].x; - int closestFaceB = clippingFaces[pairIndex].y; - int numVertsInA = clippingFaces[pairIndex].z; - int numVertsInB = clippingFaces[pairIndex].w; - - int numVertsOut = 0; - - if (closestFaceA<0) - return numContactsOut; - - __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; - __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; - - - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for(int e0=0;e0<numVertsInA;e0++) - { - const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; - const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; - const float4 WorldEdge0 = aw - bw; - float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = aw; - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); - __global float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsInB = numVertsOut; - numVertsOut = 0; - } - - //float4 planeNormalWS = worldNormalsA1[pairIndex]; - //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); - - - - /*for (int i=0;i<numVertsInB;i++) - { - pVtxOut[i] = pVtxIn[i]; - }*/ - - - - - //numVertsInB=0; - - float4 planeNormalWS = worldNormalsA1[pairIndex]; - float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); - - for (int i=0;i<numVertsInB;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - - clippingFaces[pairIndex].w =numContactsOut; - - - return numContactsOut; - -} - - - - -__kernel void findClippingFacesKernel( __global const int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const b3Collidable_t* collidables, - __global const b3ConvexPolyhedronData_t* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const b3GpuFace_t* faces, - __global const int* indices, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global int4* clippingFacesOut, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - int numPairs - ) -{ - - int i = get_global_id(0); - int pairIndex = i; - - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - int bodyIndexA = pairs[i].x; - int bodyIndexB = pairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - - int numLocalContactsOut = findClippingFaces(separatingNormals[i], - &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], - rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, - rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, - worldVertsA1, - worldNormalsA1, - worldVertsB1,capacityWorldVerts, - minDist, maxDist, - vertices,faces,indices, - clippingFacesOut,i); - - - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - -} - - - - -__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global int4* clippingFacesOut, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - __global float4* worldVertsB2, - int vertexFaceCapacity, - int numPairs, - int debugMode - ) -{ - int i = get_global_id(0); - int pairIndex = i; - - - float minDist = -1e30f; - float maxDist = 0.02f; - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - -// int bodyIndexA = pairs[i].x; - // int bodyIndexB = pairs[i].y; - - int numLocalContactsOut = 0; - - int capacityWorldVertsB2 = vertexFaceCapacity; - - __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; - __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; - - - { - __global int4* clippingFaces = clippingFacesOut; - - - int closestFaceA = clippingFaces[pairIndex].x; - int closestFaceB = clippingFaces[pairIndex].y; - int numVertsInA = clippingFaces[pairIndex].z; - int numVertsInB = clippingFaces[pairIndex].w; - - int numVertsOut = 0; - - if (closestFaceA>=0) - { - - - - // clip polygon to back of planes of all faces of hull A that are adjacent to witness face - - for(int e0=0;e0<numVertsInA;e0++) - { - const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; - const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; - const float4 WorldEdge0 = aw - bw; - float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; - float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); - float4 worldA1 = aw; - float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); - float4 planeNormalWS = planeNormalWS1; - float planeEqWS=planeEqWS1; - numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); - __global float4* tmp = pVtxOut; - pVtxOut = pVtxIn; - pVtxIn = tmp; - numVertsInB = numVertsOut; - numVertsOut = 0; - } - - float4 planeNormalWS = worldNormalsA1[pairIndex]; - float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); - - for (int i=0;i<numVertsInB;i++) - { - float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; - if (depth <=minDist) - { - depth = minDist; - } - - if (depth <=maxDist) - { - float4 pointInWorld = pVtxIn[i]; - pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); - } - } - - } - clippingFaces[pairIndex].w =numLocalContactsOut; - - - } - - for (int i=0;i<numLocalContactsOut;i++) - pVtxIn[i] = pVtxOut[i]; - - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - -} - - - - - -__kernel void newContactReductionKernel( __global int4* pairs, - __global const b3RigidBodyData_t* rigidBodies, - __global const float4* separatingNormals, - __global const int* hasSeparatingAxis, - __global struct b3Contact4Data* globalContactsOut, - __global int4* clippingFaces, - __global float4* worldVertsB2, - volatile __global int* nGlobalContactsOut, - int vertexFaceCapacity, - int contactCapacity, - int numPairs - ) -{ - int i = get_global_id(0); - int pairIndex = i; - - int4 contactIdx; - contactIdx=make_int4(0,1,2,3); - - if (i<numPairs) - { - - if (hasSeparatingAxis[i]) - { - - - - - int nPoints = clippingFaces[pairIndex].w; - - if (nPoints>0) - { - - __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity]; - float4 normal = -separatingNormals[i]; - - int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); - - int mprContactIndex = pairs[pairIndex].z; - - int dstIdx = mprContactIndex; - - if (dstIdx<0) - { - AppendInc( nGlobalContactsOut, dstIdx ); - } -//#if 0 - - if (dstIdx < contactCapacity) - { - - __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; - c->m_worldNormalOnB = -normal; - c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); - c->m_batchIdx = pairIndex; - int bodyA = pairs[pairIndex].x; - int bodyB = pairs[pairIndex].y; - - pairs[pairIndex].w = dstIdx; - - c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; - c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; - c->m_childIndexA =-1; - c->m_childIndexB =-1; - - switch (nReducedContacts) - { - case 4: - c->m_worldPosB[3] = pointsIn[contactIdx.w]; - case 3: - c->m_worldPosB[2] = pointsIn[contactIdx.z]; - case 2: - c->m_worldPosB[1] = pointsIn[contactIdx.y]; - case 1: - if (mprContactIndex<0)//test - c->m_worldPosB[0] = pointsIn[contactIdx.x]; - default: - { - } - }; - - GET_NPOINTS(*c) = nReducedContacts; - - } - - -//#endif - - }// if (numContactsOut>0) - }// if (hasSeparatingAxis[i]) - }// if (i<numPairs) - - - -} diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h deleted file mode 100644 index 907809d8bd..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.h +++ /dev/null @@ -1,2098 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satClipKernelsCL = - "#define TRIANGLE_NUM_CONVEX_FACES 5\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define max2 max\n" - "#define min2 min\n" - "typedef unsigned int u32;\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#ifndef B3_CONVEX_POLYHEDRON_DATA_H\n" - "#define B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "typedef struct b3GpuFace b3GpuFace_t;\n" - "struct b3GpuFace\n" - "{\n" - " b3Float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - " int m_unusedPadding1;\n" - " int m_unusedPadding2;\n" - "};\n" - "typedef struct b3ConvexPolyhedronData b3ConvexPolyhedronData_t;\n" - "struct b3ConvexPolyhedronData\n" - "{\n" - " b3Float4 m_localCenter;\n" - " b3Float4 m_extents;\n" - " b3Float4 mC;\n" - " b3Float4 mE;\n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "};\n" - "#endif //B3_CONVEX_POLYHEDRON_DATA_H\n" - "#ifndef B3_COLLIDABLE_H\n" - "#define B3_COLLIDABLE_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "enum b3ShapeTypes\n" - "{\n" - " SHAPE_HEIGHT_FIELD=1,\n" - " SHAPE_CONVEX_HULL=3,\n" - " SHAPE_PLANE=4,\n" - " SHAPE_CONCAVE_TRIMESH=5,\n" - " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" - " SHAPE_SPHERE=7,\n" - " MAX_NUM_SHAPE_TYPES,\n" - "};\n" - "typedef struct b3Collidable b3Collidable_t;\n" - "struct b3Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - "};\n" - "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" - "struct b3GpuChildShape\n" - "{\n" - " b3Float4 m_childPosition;\n" - " b3Quat m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "struct b3CompoundOverlappingPair\n" - "{\n" - " int m_bodyIndexA;\n" - " int m_bodyIndexB;\n" - "// int m_pairType;\n" - " int m_childShapeIndexA;\n" - " int m_childShapeIndexB;\n" - "};\n" - "#endif //B3_COLLIDABLE_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "#define GET_NPOINTS(x) (x).m_worldNormalOnB.w\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "//#define dot3F4 dot\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "__inline float4 lerp3(const float4 a,const float4 b, float t)\n" - "{\n" - " return make_float4( a.x + (b.x - a.x) * t,\n" - " a.y + (b.y - a.y) * t,\n" - " a.z + (b.z - a.z) * t,\n" - " 0.f);\n" - "}\n" - "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" - "int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut)\n" - "{\n" - " \n" - " int ve;\n" - " float ds, de;\n" - " int numVertsOut = 0;\n" - " //double-check next test\n" - " if (numVertsIn < 2)\n" - " return 0;\n" - " \n" - " float4 firstVertex=pVtxIn[numVertsIn-1];\n" - " float4 endVertex = pVtxIn[0];\n" - " \n" - " ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" - " \n" - " for (ve = 0; ve < numVertsIn; ve++)\n" - " {\n" - " endVertex=pVtxIn[ve];\n" - " de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" - " if (ds<0)\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start < 0, end < 0, so output endVertex\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " else\n" - " {\n" - " // Start < 0, end >= 0, so output intersection\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " }\n" - " }\n" - " else\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start >= 0, end < 0 so output intersection and end\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " }\n" - " firstVertex = endVertex;\n" - " ds = de;\n" - " }\n" - " return numVertsOut;\n" - "}\n" - "// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut\n" - "int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut)\n" - "{\n" - " \n" - " int ve;\n" - " float ds, de;\n" - " int numVertsOut = 0;\n" - "//double-check next test\n" - " if (numVertsIn < 2)\n" - " return 0;\n" - " float4 firstVertex=pVtxIn[numVertsIn-1];\n" - " float4 endVertex = pVtxIn[0];\n" - " \n" - " ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS;\n" - " for (ve = 0; ve < numVertsIn; ve++)\n" - " {\n" - " endVertex=pVtxIn[ve];\n" - " de = dot3F4(planeNormalWS,endVertex)+planeEqWS;\n" - " if (ds<0)\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start < 0, end < 0, so output endVertex\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " else\n" - " {\n" - " // Start < 0, end >= 0, so output intersection\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " }\n" - " }\n" - " else\n" - " {\n" - " if (de<0)\n" - " {\n" - " // Start >= 0, end < 0 so output intersection and end\n" - " ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) );\n" - " ppVtxOut[numVertsOut++] = endVertex;\n" - " }\n" - " }\n" - " firstVertex = endVertex;\n" - " ds = de;\n" - " }\n" - " return numVertsOut;\n" - "}\n" - "int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, \n" - " const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" - " float4* worldVertsB2, int capacityWorldVertsB2,\n" - " const float minDist, float maxDist,\n" - " __global const float4* vertices,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " float4* contactsOut,\n" - " int contactCapacity)\n" - "{\n" - " int numContactsOut = 0;\n" - " float4* pVtxIn = worldVertsB1;\n" - " float4* pVtxOut = worldVertsB2;\n" - " \n" - " int numVertsIn = numWorldVertsB1;\n" - " int numVertsOut = 0;\n" - " int closestFaceA=-1;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;face<hullA->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " faces[hullA->m_faceOffset+face].m_plane.x, \n" - " faces[hullA->m_faceOffset+face].m_plane.y, \n" - " faces[hullA->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " }\n" - " }\n" - " }\n" - " if (closestFaceA<0)\n" - " return numContactsOut;\n" - " b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA];\n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " int numVerticesA = polyA.m_numIndices;\n" - " for(int e0=0;e0<numVerticesA;e0++)\n" - " {\n" - " const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]];\n" - " const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" - " const float4 edge0 = a - b;\n" - " const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" - " float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" - " float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" - " float4 worldA1 = transform(&a,&posA,&ornA);\n" - " float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" - " \n" - " float4 planeNormalWS = planeNormalWS1;\n" - " float planeEqWS=planeEqWS1;\n" - " \n" - " //clip face\n" - " //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" - " numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" - " //btSwap(pVtxIn,pVtxOut);\n" - " float4* tmp = pVtxOut;\n" - " pVtxOut = pVtxIn;\n" - " pVtxIn = tmp;\n" - " numVertsIn = numVertsOut;\n" - " numVertsOut = 0;\n" - " }\n" - " \n" - " // only keep points that are behind the witness face\n" - " {\n" - " float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float localPlaneEq = polyA.m_plane.w;\n" - " float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" - " float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" - " for (int i=0;i<numVertsIn;i++)\n" - " {\n" - " float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" - " if (depth <=minDist)\n" - " {\n" - " depth = minDist;\n" - " }\n" - " if (depth <=maxDist)\n" - " {\n" - " float4 pointInWorld = pVtxIn[i];\n" - " //resultOut.addContactPoint(separatingNormal,point,depth);\n" - " contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" - " }\n" - " }\n" - " }\n" - " return numContactsOut;\n" - "}\n" - "int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, \n" - " const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1,\n" - " float4* worldVertsB2, int capacityWorldVertsB2,\n" - " const float minDist, float maxDist,\n" - " const float4* verticesA,\n" - " const b3GpuFace_t* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB,\n" - " __global const b3GpuFace_t* facesB,\n" - " __global const int* indicesB,\n" - " float4* contactsOut,\n" - " int contactCapacity)\n" - "{\n" - " int numContactsOut = 0;\n" - " float4* pVtxIn = worldVertsB1;\n" - " float4* pVtxOut = worldVertsB2;\n" - " \n" - " int numVertsIn = numWorldVertsB1;\n" - " int numVertsOut = 0;\n" - " int closestFaceA=-1;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;face<hullA->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " facesA[hullA->m_faceOffset+face].m_plane.x, \n" - " facesA[hullA->m_faceOffset+face].m_plane.y, \n" - " facesA[hullA->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " }\n" - " }\n" - " }\n" - " if (closestFaceA<0)\n" - " return numContactsOut;\n" - " b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA];\n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " int numVerticesA = polyA.m_numIndices;\n" - " for(int e0=0;e0<numVerticesA;e0++)\n" - " {\n" - " const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]];\n" - " const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]];\n" - " const float4 edge0 = a - b;\n" - " const float4 WorldEdge0 = qtRotate(ornA,edge0);\n" - " float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA);\n" - " float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" - " float4 worldA1 = transform(&a,&posA,&ornA);\n" - " float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" - " \n" - " float4 planeNormalWS = planeNormalWS1;\n" - " float planeEqWS=planeEqWS1;\n" - " \n" - " //clip face\n" - " //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS);\n" - " numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut);\n" - " //btSwap(pVtxIn,pVtxOut);\n" - " float4* tmp = pVtxOut;\n" - " pVtxOut = pVtxIn;\n" - " pVtxIn = tmp;\n" - " numVertsIn = numVertsOut;\n" - " numVertsOut = 0;\n" - " }\n" - " \n" - " // only keep points that are behind the witness face\n" - " {\n" - " float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f);\n" - " float localPlaneEq = polyA.m_plane.w;\n" - " float4 planeNormalWS = qtRotate(ornA,localPlaneNormal);\n" - " float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA);\n" - " for (int i=0;i<numVertsIn;i++)\n" - " {\n" - " float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" - " if (depth <=minDist)\n" - " {\n" - " depth = minDist;\n" - " }\n" - " if (depth <=maxDist)\n" - " {\n" - " float4 pointInWorld = pVtxIn[i];\n" - " //resultOut.addContactPoint(separatingNormal,point,depth);\n" - " contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" - " }\n" - " }\n" - " }\n" - " return numContactsOut;\n" - "}\n" - "int clipHullAgainstHull(const float4 separatingNormal,\n" - " __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" - " float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " __global const float4* vertices,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " float4* localContactsOut,\n" - " int localContactCapacity)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " int closestFaceB=-1;\n" - " float dmax = -FLT_MAX;\n" - " {\n" - " for(int face=0;face<hullB->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, \n" - " faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " {\n" - " const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" - " const int numVertices = polyB.m_numIndices;\n" - " for(int e0=0;e0<numVertices;e0++)\n" - " {\n" - " const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " if (closestFaceB>=0)\n" - " {\n" - " numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, \n" - " posA,ornA,\n" - " worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices,\n" - " faces,\n" - " indices,localContactsOut,localContactCapacity);\n" - " }\n" - " return numContactsOut;\n" - "}\n" - "int clipHullAgainstHullLocalA(const float4 separatingNormal,\n" - " const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, \n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, \n" - " float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " const float4* verticesA,\n" - " const b3GpuFace_t* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB,\n" - " __global const b3GpuFace_t* facesB,\n" - " __global const int* indicesB,\n" - " float4* localContactsOut,\n" - " int localContactCapacity)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " int closestFaceB=-1;\n" - " float dmax = -FLT_MAX;\n" - " {\n" - " for(int face=0;face<hullB->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, \n" - " facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " {\n" - " const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" - " const int numVertices = polyB.m_numIndices;\n" - " for(int e0=0;e0<numVertices;e0++)\n" - " {\n" - " const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " if (closestFaceB>=0)\n" - " {\n" - " numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, \n" - " posA,ornA,\n" - " worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,\n" - " verticesA,facesA,indicesA,\n" - " verticesB,facesB,indicesB,\n" - " localContactsOut,localContactCapacity);\n" - " }\n" - " return numContactsOut;\n" - "}\n" - "#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j];\n" - "#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;}\n" - "#define REDUCE_MAX(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; }\n" - "#define REDUCE_MIN(v, n) {int i=0; for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; }\n" - "int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx)\n" - "{\n" - " if( nPoints == 0 )\n" - " return 0;\n" - " \n" - " if (nPoints <=4)\n" - " return nPoints;\n" - " \n" - " \n" - " if (nPoints >64)\n" - " nPoints = 64;\n" - " \n" - " float4 center = make_float4(0.f);\n" - " {\n" - " \n" - " for (int i=0;i<nPoints;i++)\n" - " center += p[i];\n" - " center /= (float)nPoints;\n" - " }\n" - " \n" - " \n" - " \n" - " // sample 4 directions\n" - " \n" - " float4 aVector = p[0] - center;\n" - " float4 u = cross3( nearNormal, aVector );\n" - " float4 v = cross3( nearNormal, u );\n" - " u = normalize3( u );\n" - " v = normalize3( v );\n" - " \n" - " \n" - " //keep point with deepest penetration\n" - " float minW= FLT_MAX;\n" - " \n" - " int minIndex=-1;\n" - " \n" - " float4 maxDots;\n" - " maxDots.x = FLT_MIN;\n" - " maxDots.y = FLT_MIN;\n" - " maxDots.z = FLT_MIN;\n" - " maxDots.w = FLT_MIN;\n" - " \n" - " // idx, distance\n" - " for(int ie = 0; ie<nPoints; ie++ )\n" - " {\n" - " if (p[ie].w<minW)\n" - " {\n" - " minW = p[ie].w;\n" - " minIndex=ie;\n" - " }\n" - " float f;\n" - " float4 r = p[ie]-center;\n" - " f = dot3F4( u, r );\n" - " if (f<maxDots.x)\n" - " {\n" - " maxDots.x = f;\n" - " contactIdx[0].x = ie;\n" - " }\n" - " \n" - " f = dot3F4( -u, r );\n" - " if (f<maxDots.y)\n" - " {\n" - " maxDots.y = f;\n" - " contactIdx[0].y = ie;\n" - " }\n" - " \n" - " \n" - " f = dot3F4( v, r );\n" - " if (f<maxDots.z)\n" - " {\n" - " maxDots.z = f;\n" - " contactIdx[0].z = ie;\n" - " }\n" - " \n" - " f = dot3F4( -v, r );\n" - " if (f<maxDots.w)\n" - " {\n" - " maxDots.w = f;\n" - " contactIdx[0].w = ie;\n" - " }\n" - " \n" - " }\n" - " \n" - " if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex)\n" - " {\n" - " //replace the first contact with minimum (todo: replace contact with least penetration)\n" - " contactIdx[0].x = minIndex;\n" - " }\n" - " \n" - " return 4;\n" - " \n" - "}\n" - "int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" - "{\n" - " contactIdx[0] = 0;\n" - " contactIdx[1] = 1;\n" - " contactIdx[2] = 2;\n" - " contactIdx[3] = 3;\n" - " \n" - " if( nPoints == 0 ) return 0;\n" - " \n" - " nPoints = min2( nPoints, 4 );\n" - " return nPoints;\n" - " \n" - "}\n" - "int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx)\n" - "{\n" - " if( nPoints == 0 ) return 0;\n" - " nPoints = min2( nPoints, 64 );\n" - " float4 center = make_float4(0.f);\n" - " {\n" - " float4 v[64];\n" - " for (int i=0;i<nPoints;i++)\n" - " v[i] = p[i];\n" - " //memcpy( v, p, nPoints*sizeof(float4) );\n" - " PARALLEL_SUM( v, nPoints );\n" - " center = v[0]/(float)nPoints;\n" - " }\n" - " \n" - " { // sample 4 directions\n" - " if( nPoints < 4 )\n" - " {\n" - " for(int i=0; i<nPoints; i++) \n" - " contactIdx[i] = i;\n" - " return nPoints;\n" - " }\n" - " float4 aVector = p[0] - center;\n" - " float4 u = cross3( nearNormal, aVector );\n" - " float4 v = cross3( nearNormal, u );\n" - " u = normalize3( u );\n" - " v = normalize3( v );\n" - " int idx[4];\n" - " float2 max00 = make_float2(0,FLT_MAX);\n" - " {\n" - " // idx, distance\n" - " {\n" - " {\n" - " int4 a[64];\n" - " for(int ie = 0; ie<nPoints; ie++ )\n" - " {\n" - " \n" - " \n" - " float f;\n" - " float4 r = p[ie]-center;\n" - " f = dot3F4( u, r );\n" - " a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" - " f = dot3F4( -u, r );\n" - " a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" - " f = dot3F4( v, r );\n" - " a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" - " f = dot3F4( -v, r );\n" - " a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie);\n" - " }\n" - " for(int ie=0; ie<nPoints; ie++)\n" - " {\n" - " a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x;\n" - " a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y;\n" - " a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z;\n" - " a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w;\n" - " }\n" - " idx[0] = (int)a[0].x & 0xff;\n" - " idx[1] = (int)a[0].y & 0xff;\n" - " idx[2] = (int)a[0].z & 0xff;\n" - " idx[3] = (int)a[0].w & 0xff;\n" - " }\n" - " }\n" - " {\n" - " float2 h[64];\n" - " PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints );\n" - " REDUCE_MIN( h, nPoints );\n" - " max00 = h[0];\n" - " }\n" - " }\n" - " contactIdx[0] = idx[0];\n" - " contactIdx[1] = idx[1];\n" - " contactIdx[2] = idx[2];\n" - " contactIdx[3] = idx[3];\n" - " return 4;\n" - " }\n" - "}\n" - "__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const float4* closestPointsWorld,\n" - " __global const float4* separatingNormalsWorld,\n" - " __global const int* contactCounts,\n" - " __global const int* contactOffsets,\n" - " __global struct b3Contact4Data* restrict contactsOut,\n" - " counter32_t nContactsOut,\n" - " int contactCapacity,\n" - " int numPairs,\n" - " int pairIndex\n" - " )\n" - "{\n" - " int idx = get_global_id(0);\n" - " \n" - " if (idx<numPairs)\n" - " {\n" - " float4 normal = separatingNormalsWorld[idx];\n" - " int nPoints = contactCounts[idx];\n" - " __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]];\n" - " float4 localPoints[64];\n" - " for (int i=0;i<nPoints;i++)\n" - " {\n" - " localPoints[i] = pointsIn[i];\n" - " }\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx);\n" - " int dstIdx;\n" - " AppendInc( nContactsOut, dstIdx );\n" - " if (dstIdx<contactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = contactsOut + dstIdx;\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = idx;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " for (int i=0;i<nContacts;i++)\n" - " {\n" - " c->m_worldPosB[i] = localPoints[contactIdx[i]];\n" - " }\n" - " GET_NPOINTS(*c) = nContacts;\n" - " }\n" - " }\n" - "}\n" - "void trInverse(float4 translationIn, Quaternion orientationIn,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtInvert(orientationIn);\n" - " *translationOut = qtRotate(*orientationOut, -translationIn);\n" - "}\n" - "void trMul(float4 translationA, Quaternion orientationA,\n" - " float4 translationB, Quaternion orientationB,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtMul(orientationA,orientationB);\n" - " *translationOut = transform(&translationB,&translationA,&orientationA);\n" - "}\n" - "__kernel void clipHullHullKernel( __global int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " __global const float4* separatingNormals,\n" - " __global const int* hasSeparatingAxis,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numPairs,\n" - " int contactCapacity)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " float4 worldVertsB1[64];\n" - " float4 worldVertsB2[64];\n" - " int capacityWorldVerts = 64; \n" - " float4 localContactsOut[64];\n" - " int localContactCapacity=64;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " if (i<numPairs)\n" - " {\n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " if (hasSeparatingAxis[i])\n" - " {\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i],\n" - " &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" - " rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" - " rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" - " worldVertsB1,worldVertsB2,capacityWorldVerts,\n" - " minDist, maxDist,\n" - " vertices,faces,indices,\n" - " localContactsOut,localContactCapacity);\n" - " \n" - " if (numLocalContactsOut>0)\n" - " {\n" - " float4 normal = -separatingNormals[i];\n" - " int nPoints = numLocalContactsOut;\n" - " float4* pointsIn = localContactsOut;\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " \n" - " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" - " \n" - " \n" - " int mprContactIndex = pairs[pairIndex].z;\n" - " int dstIdx = mprContactIndex;\n" - " if (dstIdx<0)\n" - " {\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " }\n" - " if (dstIdx<contactCapacity)\n" - " {\n" - " pairs[pairIndex].z = dstIdx;\n" - " __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " for (int i=0;i<nReducedContacts;i++)\n" - " {\n" - " //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact\n" - " if (i>0||(mprContactIndex<0))\n" - " {\n" - " c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" - " }\n" - " }\n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " }\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (hasSeparatingAxis[i])\n" - " }// if (i<numPairs)\n" - "}\n" - "__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " __global const b3GpuChildShape_t* gpuChildShapes,\n" - " __global const float4* gpuCompoundSepNormalsOut,\n" - " __global const int* gpuHasCompoundSepNormalsOut,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int numCompoundPairs, int maxContactCapacity)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " float4 worldVertsB1[64];\n" - " float4 worldVertsB2[64];\n" - " int capacityWorldVerts = 64; \n" - " float4 localContactsOut[64];\n" - " int localContactCapacity=64;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " if (i<numCompoundPairs)\n" - " {\n" - " if (gpuHasCompoundSepNormalsOut[i])\n" - " {\n" - " int bodyIndexA = gpuCompoundPairs[i].x;\n" - " int bodyIndexB = gpuCompoundPairs[i].y;\n" - " \n" - " int childShapeIndexA = gpuCompoundPairs[i].z;\n" - " int childShapeIndexB = gpuCompoundPairs[i].w;\n" - " \n" - " int collidableIndexA = -1;\n" - " int collidableIndexB = -1;\n" - " \n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " \n" - " float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " \n" - " if (childShapeIndexA >= 0)\n" - " {\n" - " collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " posA = newPosA;\n" - " ornA = newOrnA;\n" - " } else\n" - " {\n" - " collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " }\n" - " \n" - " if (childShapeIndexB>=0)\n" - " {\n" - " collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " } else\n" - " {\n" - " collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" - " }\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i],\n" - " &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " worldVertsB1,worldVertsB2,capacityWorldVerts,\n" - " minDist, maxDist,\n" - " vertices,faces,indices,\n" - " localContactsOut,localContactCapacity);\n" - " \n" - " if (numLocalContactsOut>0)\n" - " {\n" - " float4 normal = -gpuCompoundSepNormalsOut[i];\n" - " int nPoints = numLocalContactsOut;\n" - " float4* pointsIn = localContactsOut;\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " \n" - " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if ((dstIdx+nReducedContacts) < maxContactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = gpuCompoundPairs[pairIndex].x;\n" - " int bodyB = gpuCompoundPairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA = childShapeIndexA;\n" - " c->m_childIndexB = childShapeIndexB;\n" - " for (int i=0;i<nReducedContacts;i++)\n" - " {\n" - " c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" - " }\n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " }\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (gpuHasCompoundSepNormalsOut[i])\n" - " }// if (i<numCompoundPairs)\n" - "}\n" - "__kernel void sphereSphereCollisionKernel( __global const int4* pairs, \n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const float4* separatingNormals,\n" - " __global const int* hasSeparatingAxis,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int contactCapacity,\n" - " int numPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE &&\n" - " collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " //sphere-sphere\n" - " float radiusA = collidables[collidableIndexA].m_radius;\n" - " float radiusB = collidables[collidableIndexB].m_radius;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " float4 diff = posA-posB;\n" - " float len = length(diff);\n" - " \n" - " ///iff distance positive, don't generate a new contact\n" - " if ( len <= (radiusA+radiusB))\n" - " {\n" - " ///distance (negative means penetration)\n" - " float dist = len - (radiusA+radiusB);\n" - " float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f);\n" - " if (len > 0.00001)\n" - " {\n" - " normalOnSurfaceB = diff / len;\n" - " }\n" - " float4 contactPosB = posB + normalOnSurfaceB*radiusB;\n" - " contactPosB.w = dist;\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdx < contactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normalOnSurfaceB;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_worldPosB[0] = contactPosB;\n" - " c->m_childIndexA = -1;\n" - " c->m_childIndexB = -1;\n" - " GET_NPOINTS(*c) = 1;\n" - " }//if (dstIdx < numPairs)\n" - " }//if ( len <= (radiusA+radiusB))\n" - " }//SHAPE_SPHERE SHAPE_SPHERE\n" - " }//if (i<numPairs)\n" - "} \n" - "__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn,\n" - " __global const b3RigidBodyData_t* rigidBodies, \n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " __global const b3GpuChildShape_t* gpuChildShapes,\n" - " __global const float4* separatingNormals,\n" - " __global struct b3Contact4Data* restrict globalContactsOut,\n" - " counter32_t nGlobalContactsOut,\n" - " int contactCapacity,\n" - " int numConcavePairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " float4 worldVertsB1[64];\n" - " float4 worldVertsB2[64];\n" - " int capacityWorldVerts = 64; \n" - " float4 localContactsOut[64];\n" - " int localContactCapacity=64;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " if (i<numConcavePairs)\n" - " {\n" - " //negative value means that the pair is invalid\n" - " if (concavePairsIn[i].w<0)\n" - " return;\n" - " int bodyIndexA = concavePairsIn[i].x;\n" - " int bodyIndexB = concavePairsIn[i].y;\n" - " int f = concavePairsIn[i].z;\n" - " int childShapeIndexA = f;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " ///////////////////////////////////////////////////////////////\n" - " \n" - " \n" - " bool overlap = false;\n" - " \n" - " b3ConvexPolyhedronData_t convexPolyhedronA;\n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " }\n" - " float dmin = FLT_MAX;\n" - " int localCC=0;\n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot3F4(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;i<numVertices;i++)\n" - " {\n" - " float4 v0 = verticesA[i];\n" - " float4 v1 = verticesA[prevVertex];\n" - " \n" - " float4 edgeNormal = normalize(cross(normal,v1-v0));\n" - " float c = -dot3F4(edgeNormal,v0);\n" - " facesA[fidx].m_numIndices = 2;\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[curUsedIndices++]=i;\n" - " indicesA[curUsedIndices++]=prevVertex;\n" - " \n" - " facesA[fidx].m_plane.x = edgeNormal.x;\n" - " facesA[fidx].m_plane.y = edgeNormal.y;\n" - " facesA[fidx].m_plane.z = edgeNormal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " fidx++;\n" - " prevVertex = i;\n" - " }\n" - " }\n" - " convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" - " convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " float4 sepAxis = separatingNormals[i];\n" - " \n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " int childShapeIndexB =-1;\n" - " if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " ///////////////////\n" - " ///compound shape support\n" - " \n" - " childShapeIndexB = concavePairsIn[pairIndex].w;\n" - " int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " \n" - " }\n" - " \n" - " ////////////////////////////////////////\n" - " \n" - " \n" - " \n" - " int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis,\n" - " &convexPolyhedronA, &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " worldVertsB1,worldVertsB2,capacityWorldVerts,\n" - " minDist, maxDist,\n" - " &verticesA,&facesA,&indicesA,\n" - " vertices,faces,indices,\n" - " localContactsOut,localContactCapacity);\n" - " \n" - " if (numLocalContactsOut>0)\n" - " {\n" - " float4 normal = -separatingNormals[i];\n" - " int nPoints = numLocalContactsOut;\n" - " float4* pointsIn = localContactsOut;\n" - " int contactIdx[4];// = {-1,-1,-1,-1};\n" - " contactIdx[0] = -1;\n" - " contactIdx[1] = -1;\n" - " contactIdx[2] = -1;\n" - " contactIdx[3] = -1;\n" - " \n" - " int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx);\n" - " \n" - " int dstIdx;\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " if (dstIdx<contactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = globalContactsOut+ dstIdx;\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = concavePairsIn[pairIndex].x;\n" - " int bodyB = concavePairsIn[pairIndex].y;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA = childShapeIndexA;\n" - " c->m_childIndexB = childShapeIndexB;\n" - " for (int i=0;i<nReducedContacts;i++)\n" - " {\n" - " c->m_worldPosB[i] = pointsIn[contactIdx[i]];\n" - " }\n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " }\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (i<numPairs)\n" - "}\n" - "int findClippingFaces(const float4 separatingNormal,\n" - " __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB,\n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" - " __global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " __global const float4* vertices,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " __global int4* clippingFaces, int pairIndex)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " \n" - " \n" - " int closestFaceB=-1;\n" - " float dmax = -FLT_MAX;\n" - " \n" - " {\n" - " for(int face=0;face<hullB->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x,\n" - " faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " \n" - " {\n" - " const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB];\n" - " const int numVertices = polyB.m_numIndices;\n" - " for(int e0=0;e0<numVertices;e0++)\n" - " {\n" - " const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " \n" - " int closestFaceA=-1;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;face<hullA->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " faces[hullA->m_faceOffset+face].m_plane.x,\n" - " faces[hullA->m_faceOffset+face].m_plane.y,\n" - " faces[hullA->m_faceOffset+face].m_plane.z,\n" - " 0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " worldNormalsA1[pairIndex] = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " \n" - " int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" - " for(int e0=0;e0<numVerticesA;e0++)\n" - " {\n" - " const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" - " worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" - " }\n" - " \n" - " clippingFaces[pairIndex].x = closestFaceA;\n" - " clippingFaces[pairIndex].y = closestFaceB;\n" - " clippingFaces[pairIndex].z = numVerticesA;\n" - " clippingFaces[pairIndex].w = numWorldVertsB1;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "int clipFaces(__global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " __global float4* worldVertsB2, \n" - " int capacityWorldVertsB2,\n" - " const float minDist, float maxDist,\n" - " __global int4* clippingFaces,\n" - " int pairIndex)\n" - "{\n" - " int numContactsOut = 0;\n" - " \n" - " int closestFaceA = clippingFaces[pairIndex].x;\n" - " int closestFaceB = clippingFaces[pairIndex].y;\n" - " int numVertsInA = clippingFaces[pairIndex].z;\n" - " int numVertsInB = clippingFaces[pairIndex].w;\n" - " \n" - " int numVertsOut = 0;\n" - " \n" - " if (closestFaceA<0)\n" - " return numContactsOut;\n" - " \n" - " __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" - " __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" - " \n" - " \n" - " \n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " \n" - " for(int e0=0;e0<numVertsInA;e0++)\n" - " {\n" - " const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" - " const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" - " const float4 WorldEdge0 = aw - bw;\n" - " float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" - " float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" - " float4 worldA1 = aw;\n" - " float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" - " float4 planeNormalWS = planeNormalWS1;\n" - " float planeEqWS=planeEqWS1;\n" - " numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" - " __global float4* tmp = pVtxOut;\n" - " pVtxOut = pVtxIn;\n" - " pVtxIn = tmp;\n" - " numVertsInB = numVertsOut;\n" - " numVertsOut = 0;\n" - " }\n" - " \n" - " //float4 planeNormalWS = worldNormalsA1[pairIndex];\n" - " //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" - " \n" - " /*for (int i=0;i<numVertsInB;i++)\n" - " {\n" - " pVtxOut[i] = pVtxIn[i];\n" - " }*/\n" - " \n" - " \n" - " \n" - " \n" - " //numVertsInB=0;\n" - " \n" - " float4 planeNormalWS = worldNormalsA1[pairIndex];\n" - " float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" - " for (int i=0;i<numVertsInB;i++)\n" - " {\n" - " float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" - " if (depth <=minDist)\n" - " {\n" - " depth = minDist;\n" - " }\n" - " \n" - " if (depth <=maxDist)\n" - " {\n" - " float4 pointInWorld = pVtxIn[i];\n" - " pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" - " }\n" - " }\n" - " \n" - " clippingFaces[pairIndex].w =numContactsOut;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "__kernel void findClippingFacesKernel( __global const int4* pairs,\n" - " __global const b3RigidBodyData_t* rigidBodies,\n" - " __global const b3Collidable_t* collidables,\n" - " __global const b3ConvexPolyhedronData_t* convexShapes,\n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const b3GpuFace_t* faces,\n" - " __global const int* indices,\n" - " __global const float4* separatingNormals,\n" - " __global const int* hasSeparatingAxis,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " int capacityWorldVerts,\n" - " int numPairs\n" - " )\n" - "{\n" - " \n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " \n" - " if (hasSeparatingAxis[i])\n" - " {\n" - " \n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " \n" - " int numLocalContactsOut = findClippingFaces(separatingNormals[i],\n" - " &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],\n" - " rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat,\n" - " rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat,\n" - " worldVertsA1,\n" - " worldNormalsA1,\n" - " worldVertsB1,capacityWorldVerts,\n" - " minDist, maxDist,\n" - " vertices,faces,indices,\n" - " clippingFacesOut,i);\n" - " \n" - " \n" - " }// if (hasSeparatingAxis[i])\n" - " }// if (i<numPairs)\n" - " \n" - "}\n" - "__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals,\n" - " __global const int* hasSeparatingAxis,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " __global float4* worldVertsB2,\n" - " int vertexFaceCapacity,\n" - " int numPairs,\n" - " int debugMode\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " \n" - " if (hasSeparatingAxis[i])\n" - " {\n" - " \n" - "// int bodyIndexA = pairs[i].x;\n" - " // int bodyIndexB = pairs[i].y;\n" - " \n" - " int numLocalContactsOut = 0;\n" - " int capacityWorldVertsB2 = vertexFaceCapacity;\n" - " \n" - " __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2];\n" - " __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2];\n" - " \n" - " {\n" - " __global int4* clippingFaces = clippingFacesOut;\n" - " \n" - " \n" - " int closestFaceA = clippingFaces[pairIndex].x;\n" - " int closestFaceB = clippingFaces[pairIndex].y;\n" - " int numVertsInA = clippingFaces[pairIndex].z;\n" - " int numVertsInB = clippingFaces[pairIndex].w;\n" - " \n" - " int numVertsOut = 0;\n" - " \n" - " if (closestFaceA>=0)\n" - " {\n" - " \n" - " \n" - " \n" - " // clip polygon to back of planes of all faces of hull A that are adjacent to witness face\n" - " \n" - " for(int e0=0;e0<numVertsInA;e0++)\n" - " {\n" - " const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0];\n" - " const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)];\n" - " const float4 WorldEdge0 = aw - bw;\n" - " float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex];\n" - " float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1);\n" - " float4 worldA1 = aw;\n" - " float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1);\n" - " float4 planeNormalWS = planeNormalWS1;\n" - " float planeEqWS=planeEqWS1;\n" - " numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut);\n" - " __global float4* tmp = pVtxOut;\n" - " pVtxOut = pVtxIn;\n" - " pVtxIn = tmp;\n" - " numVertsInB = numVertsOut;\n" - " numVertsOut = 0;\n" - " }\n" - " \n" - " float4 planeNormalWS = worldNormalsA1[pairIndex];\n" - " float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]);\n" - " \n" - " for (int i=0;i<numVertsInB;i++)\n" - " {\n" - " float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS;\n" - " if (depth <=minDist)\n" - " {\n" - " depth = minDist;\n" - " }\n" - " \n" - " if (depth <=maxDist)\n" - " {\n" - " float4 pointInWorld = pVtxIn[i];\n" - " pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth);\n" - " }\n" - " }\n" - " \n" - " }\n" - " clippingFaces[pairIndex].w =numLocalContactsOut;\n" - " \n" - " }\n" - " \n" - " for (int i=0;i<numLocalContactsOut;i++)\n" - " pVtxIn[i] = pVtxOut[i];\n" - " \n" - " }// if (hasSeparatingAxis[i])\n" - " }// if (i<numPairs)\n" - " \n" - "}\n" - "__kernel void newContactReductionKernel( __global int4* pairs,\n" - " __global const b3RigidBodyData_t* rigidBodies,\n" - " __global const float4* separatingNormals,\n" - " __global const int* hasSeparatingAxis,\n" - " __global struct b3Contact4Data* globalContactsOut,\n" - " __global int4* clippingFaces,\n" - " __global float4* worldVertsB2,\n" - " volatile __global int* nGlobalContactsOut,\n" - " int vertexFaceCapacity,\n" - " int contactCapacity,\n" - " int numPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " int pairIndex = i;\n" - " \n" - " int4 contactIdx;\n" - " contactIdx=make_int4(0,1,2,3);\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " \n" - " if (hasSeparatingAxis[i])\n" - " {\n" - " \n" - " \n" - " \n" - " \n" - " int nPoints = clippingFaces[pairIndex].w;\n" - " \n" - " if (nPoints>0)\n" - " {\n" - " __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity];\n" - " float4 normal = -separatingNormals[i];\n" - " \n" - " int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx);\n" - " \n" - " int mprContactIndex = pairs[pairIndex].z;\n" - " int dstIdx = mprContactIndex;\n" - " if (dstIdx<0)\n" - " {\n" - " AppendInc( nGlobalContactsOut, dstIdx );\n" - " }\n" - "//#if 0\n" - " \n" - " if (dstIdx < contactCapacity)\n" - " {\n" - " __global struct b3Contact4Data* c = &globalContactsOut[dstIdx];\n" - " c->m_worldNormalOnB = -normal;\n" - " c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff);\n" - " c->m_batchIdx = pairIndex;\n" - " int bodyA = pairs[pairIndex].x;\n" - " int bodyB = pairs[pairIndex].y;\n" - " pairs[pairIndex].w = dstIdx;\n" - " c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA;\n" - " c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB;\n" - " c->m_childIndexA =-1;\n" - " c->m_childIndexB =-1;\n" - " switch (nReducedContacts)\n" - " {\n" - " case 4:\n" - " c->m_worldPosB[3] = pointsIn[contactIdx.w];\n" - " case 3:\n" - " c->m_worldPosB[2] = pointsIn[contactIdx.z];\n" - " case 2:\n" - " c->m_worldPosB[1] = pointsIn[contactIdx.y];\n" - " case 1:\n" - " if (mprContactIndex<0)//test\n" - " c->m_worldPosB[0] = pointsIn[contactIdx.x];\n" - " default:\n" - " {\n" - " }\n" - " };\n" - " \n" - " GET_NPOINTS(*c) = nReducedContacts;\n" - " \n" - " }\n" - " \n" - " \n" - "//#endif\n" - " \n" - " }// if (numContactsOut>0)\n" - " }// if (hasSeparatingAxis[i])\n" - " }// if (i<numPairs)\n" - " \n" - " \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl deleted file mode 100644 index 31ca43b8cd..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcave.cl +++ /dev/null @@ -1,1220 +0,0 @@ - -//keep this enum in sync with the CPU version (in btCollidable.h) -//written by Erwin Coumans - - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_CONCAVE_TRIMESH 5 -#define TRIANGLE_NUM_CONVEX_FACES 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 - -#define B3_MAX_STACK_DEPTH 256 - - -typedef unsigned int u32; - -///keep this in sync with btCollidable.h -typedef struct -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - union - { - float m_radius; - int m_compoundBvhIndex; - }; - - int m_shapeType; - int m_shapeIndex; - -} btCollidableGpu; - -#define MAX_NUM_PARTS_IN_BITS 10 - -///b3QuantizedBvhNode is a compressed aabb node, 16 bytes. -///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range). -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes - int m_escapeIndexOrTriangleIndex; -} b3QuantizedBvhNode; - -typedef struct -{ - float4 m_aabbMin; - float4 m_aabbMax; - float4 m_quantization; - int m_numNodes; - int m_numSubTrees; - int m_nodeOffset; - int m_subTreeOffset; - -} b3BvhInfo; - - -int getTriangleIndex(const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - unsigned int x=0; - unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS); - // Get only the lower bits where the triangle index is stored - return (rootNode->m_escapeIndexOrTriangleIndex&~(y)); -} - -int isLeafNode(const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - //skipindex is negative (internal node), triangleindex >=0 (leafnode) - return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0; -} - -int getEscapeIndex(const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - -int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode) -{ - return -rootNode->m_escapeIndexOrTriangleIndex; -} - - -typedef struct -{ - //12 bytes - unsigned short int m_quantizedAabbMin[3]; - unsigned short int m_quantizedAabbMax[3]; - //4 bytes, points to the root of the subtree - int m_rootNodeIndex; - //4 bytes - int m_subtreeSize; - int m_padding[3]; -} b3BvhSubtreeInfo; - - - - - - - -typedef struct -{ - float4 m_childPosition; - float4 m_childOrientation; - int m_shapeIndex; - int m_unused0; - int m_unused1; - int m_unused2; -} btGpuChildShape; - - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} BodyData; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; -} ConvexPolyhedronCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - -#include "Bullet3Collision/BroadPhaseCollision/shared/b3Aabb.h" -#include "Bullet3Common/shared/b3Int2.h" - - - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} btGpuFace; - -#define make_float4 (float4) - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); - - -// float4 a1 = make_float4(a.xyz,0.f); -// float4 b1 = make_float4(b.xyz,0.f); - -// return cross(a1,b1); - -//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f); - - // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f); - - //return c; -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = make_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - -__inline -float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) -{ - return qtRotate( *orientation, *p ) + (*translation); -} - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -} - -inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, -const float4* dir, __global const float4* vertices, float* min, float* max) -{ - min[0] = FLT_MAX; - max[0] = -FLT_MAX; - int numVerts = hull->m_numVertices; - - const float4 localDir = qtInvRotate(orn,*dir); - float offset = dot(pos,*dir); - for(int i=0;i<numVerts;i++) - { - float dp = dot(vertices[hull->m_vertexOffset+i],localDir); - if(dp < min[0]) - min[0] = dp; - if(dp > max[0]) - max[0] = dp; - } - if(min[0]>max[0]) - { - float tmp = min[0]; - min[0] = max[0]; - max[0] = tmp; - } - min[0] += offset; - max[0] += offset; -} - -inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA,const float4 ornA, - const float4 posB,const float4 ornB, - float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth) -{ - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0); - project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - return false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - *depth = d0<d1 ? d0:d1; - return true; -} - - - - -inline bool IsAlmostZero(const float4 v) -{ - if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f) - return false; - return true; -} - - - -bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS*=-1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - -bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - __global const float4* verticesA, - __global const float4* uniqueEdgesA, - __global const btGpuFace* facesA, - __global const int* indicesA, - const float4* verticesB, - const float4* uniqueEdgesB, - const btGpuFace* facesB, - const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - int curPlaneTests=0; - { - int numFacesA = hullA->m_numFaces; - // Test normals from hullA - for(int i=0;i<numFacesA;i++) - { - const float4 normal = facesA[hullA->m_faceOffset+i].m_plane; - float4 faceANormalWS = qtRotate(ornA,normal); - if (dot3F4(DeltaC2,faceANormalWS)<0) - faceANormalWS *= -1.f; - curPlaneTests++; - float d; - if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d)) - return false; - if(d<*dmin) - { - *dmin = d; - *sep = faceANormalWS; - } - } - } - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, - const float4 posA1, - const float4 ornA, - const float4 posB1, - const float4 ornB, - const float4 DeltaC2, - const float4* verticesA, - const float4* uniqueEdgesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const float4* uniqueEdgesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - float4* sep, - float* dmin) -{ - - - float4 posA = posA1; - posA.w = 0.f; - float4 posB = posB1; - posB.w = 0.f; - - int curPlaneTests=0; - - int curEdgeEdge = 0; - // Test edges - for(int e0=0;e0<hullA->m_numUniqueEdges;e0++) - { - const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0]; - float4 edge0World = qtRotate(ornA,edge0); - - for(int e1=0;e1<hullB->m_numUniqueEdges;e1++) - { - const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1]; - float4 edge1World = qtRotate(ornB,edge1); - - - float4 crossje = cross3(edge0World,edge1World); - - curEdgeEdge++; - if(!IsAlmostZero(crossje)) - { - crossje = normalize3(crossje); - if (dot3F4(DeltaC2,crossje)<0) - crossje *= -1.f; - - float dist; - bool result = true; - { - float Min0,Max0; - float Min1,Max1; - projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0); - project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1); - - if(Max0<Min1 || Max1<Min0) - result = false; - - float d0 = Max0 - Min1; - float d1 = Max1 - Min0; - dist = d0<d1 ? d0:d1; - result = true; - - } - - - if(dist<*dmin) - { - *dmin = dist; - *sep = crossje; - } - } - } - - } - - - if((dot3F4(-DeltaC2,*sep))>0.0f) - { - *sep = -(*sep); - } - return true; -} - - - -inline int findClippingFaces(const float4 separatingNormal, - const ConvexPolyhedronCL* hullA, - __global const ConvexPolyhedronCL* hullB, - const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, - __global float4* worldVertsA1, - __global float4* worldNormalsA1, - __global float4* worldVertsB1, - int capacityWorldVerts, - const float minDist, float maxDist, - const float4* verticesA, - const btGpuFace* facesA, - const int* indicesA, - __global const float4* verticesB, - __global const btGpuFace* facesB, - __global const int* indicesB, - __global int4* clippingFaces, int pairIndex) -{ - int numContactsOut = 0; - int numWorldVertsB1= 0; - - - int closestFaceB=0; - float dmax = -FLT_MAX; - - { - for(int face=0;face<hullB->m_numFaces;face++) - { - const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, - facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); - const float4 WorldNormal = qtRotate(ornB, Normal); - float d = dot3F4(WorldNormal,separatingNormal); - if (d > dmax) - { - dmax = d; - closestFaceB = face; - } - } - } - - { - const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB]; - int numVertices = polyB.m_numIndices; - if (numVertices>capacityWorldVerts) - numVertices = capacityWorldVerts; - if (numVertices<0) - numVertices = 0; - - for(int e0=0;e0<numVertices;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; - worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); - } - } - } - - int closestFaceA=0; - { - float dmin = FLT_MAX; - for(int face=0;face<hullA->m_numFaces;face++) - { - const float4 Normal = make_float4( - facesA[hullA->m_faceOffset+face].m_plane.x, - facesA[hullA->m_faceOffset+face].m_plane.y, - facesA[hullA->m_faceOffset+face].m_plane.z, - 0.f); - const float4 faceANormalWS = qtRotate(ornA,Normal); - - float d = dot3F4(faceANormalWS,separatingNormal); - if (d < dmin) - { - dmin = d; - closestFaceA = face; - worldNormalsA1[pairIndex] = faceANormalWS; - } - } - } - - int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices; - if (numVerticesA>capacityWorldVerts) - numVerticesA = capacityWorldVerts; - if (numVerticesA<0) - numVerticesA=0; - - for(int e0=0;e0<numVerticesA;e0++) - { - if (e0<capacityWorldVerts) - { - const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; - worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); - } - } - - clippingFaces[pairIndex].x = closestFaceA; - clippingFaces[pairIndex].y = closestFaceB; - clippingFaces[pairIndex].z = numVerticesA; - clippingFaces[pairIndex].w = numWorldVertsB1; - - - return numContactsOut; -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - __global float* dmins, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - concaveHasSeparatingNormals[i] = 0; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&& - collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - concavePairs[pairIdx].w = -1; - return; - } - - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = FLT_MAX; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - float4 c0local = convexPolyhedronA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - - bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - hasSeparatingAxis = 4; - if (!sepA) - { - hasSeparatingAxis = 0; - } else - { - bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA, - posB,ornB, - posA,ornA, - DeltaC2, - vertices,uniqueEdges,faces,indices, - verticesA,uniqueEdgesA,facesA,indicesA, - &sepAxis,&dmin); - - if (!sepB) - { - hasSeparatingAxis = 0; - } else - { - hasSeparatingAxis = 1; - } - } - - if (hasSeparatingAxis) - { - dmins[i] = dmin; - concaveSeparatingNormalsOut[pairIdx]=sepAxis; - concaveHasSeparatingNormals[i]=1; - - } else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } -} - - - - -// work-in-progress -__kernel void findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs, - __global const BodyData* rigidBodies, - __global const btCollidableGpu* collidables, - __global const ConvexPolyhedronCL* convexShapes, - __global const float4* vertices, - __global const float4* uniqueEdges, - __global const btGpuFace* faces, - __global const int* indices, - __global const btGpuChildShape* gpuChildShapes, - __global btAabbCL* aabbs, - __global float4* concaveSeparatingNormalsOut, - __global int* concaveHasSeparatingNormals, - __global int4* clippingFacesOut, - __global float4* worldVertsA1GPU, - __global float4* worldNormalsAGPU, - __global float4* worldVertsB1GPU, - __global float* dmins, - int vertexFaceCapacity, - int numConcavePairs - ) -{ - - int i = get_global_id(0); - if (i>=numConcavePairs) - return; - - if (!concaveHasSeparatingNormals[i]) - return; - - int pairIdx = i; - - int bodyIndexA = concavePairs[i].x; - int bodyIndexB = concavePairs[i].y; - - int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; - int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; - - int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; - int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; - - - int numFacesA = convexShapes[shapeIndexA].m_numFaces; - int numActualConcaveConvexTests = 0; - - int f = concavePairs[i].z; - - bool overlap = false; - - ConvexPolyhedronCL convexPolyhedronA; - - //add 3 vertices of the triangle - convexPolyhedronA.m_numVertices = 3; - convexPolyhedronA.m_vertexOffset = 0; - float4 localCenter = make_float4(0.f,0.f,0.f,0.f); - - btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; - float4 triMinAabb, triMaxAabb; - btAabbCL triAabb; - triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f); - triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f); - - float4 verticesA[3]; - for (int i=0;i<3;i++) - { - int index = indices[face.m_indexOffset+i]; - float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; - verticesA[i] = vert; - localCenter += vert; - - triAabb.m_min = min(triAabb.m_min,vert); - triAabb.m_max = max(triAabb.m_max,vert); - - } - - overlap = true; - overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap; - overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap; - overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap; - - if (overlap) - { - float dmin = dmins[i]; - int hasSeparatingAxis=5; - float4 sepAxis=make_float4(1,2,3,4); - sepAxis = concaveSeparatingNormalsOut[pairIdx]; - - int localCC=0; - numActualConcaveConvexTests++; - - //a triangle has 3 unique edges - convexPolyhedronA.m_numUniqueEdges = 3; - convexPolyhedronA.m_uniqueEdgesOffset = 0; - float4 uniqueEdgesA[3]; - - uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); - uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); - uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); - - - convexPolyhedronA.m_faceOffset = 0; - - float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); - - btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES]; - int indicesA[3+3+2+2+2]; - int curUsedIndices=0; - int fidx=0; - - //front size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[0] = 0; - indicesA[1] = 1; - indicesA[2] = 2; - curUsedIndices+=3; - float c = face.m_plane.w; - facesA[fidx].m_plane.x = normal.x; - facesA[fidx].m_plane.y = normal.y; - facesA[fidx].m_plane.z = normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - //back size of triangle - { - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[3]=2; - indicesA[4]=1; - indicesA[5]=0; - curUsedIndices+=3; - float c = dot(normal,verticesA[0]); - float c1 = -face.m_plane.w; - facesA[fidx].m_plane.x = -normal.x; - facesA[fidx].m_plane.y = -normal.y; - facesA[fidx].m_plane.z = -normal.z; - facesA[fidx].m_plane.w = c; - facesA[fidx].m_numIndices=3; - } - fidx++; - - bool addEdgePlanes = true; - if (addEdgePlanes) - { - int numVertices=3; - int prevVertex = numVertices-1; - for (int i=0;i<numVertices;i++) - { - float4 v0 = verticesA[i]; - float4 v1 = verticesA[prevVertex]; - - float4 edgeNormal = normalize(cross(normal,v1-v0)); - float c = -dot(edgeNormal,v0); - - facesA[fidx].m_numIndices = 2; - facesA[fidx].m_indexOffset=curUsedIndices; - indicesA[curUsedIndices++]=i; - indicesA[curUsedIndices++]=prevVertex; - - facesA[fidx].m_plane.x = edgeNormal.x; - facesA[fidx].m_plane.y = edgeNormal.y; - facesA[fidx].m_plane.z = edgeNormal.z; - facesA[fidx].m_plane.w = c; - fidx++; - prevVertex = i; - } - } - convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; - convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); - - - float4 posA = rigidBodies[bodyIndexA].m_pos; - posA.w = 0.f; - float4 posB = rigidBodies[bodyIndexB].m_pos; - posB.w = 0.f; - - float4 ornA = rigidBodies[bodyIndexA].m_quat; - float4 ornB =rigidBodies[bodyIndexB].m_quat; - - - - - /////////////////// - ///compound shape support - - if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) - { - int compoundChild = concavePairs[pairIdx].w; - int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild; - int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; - float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; - float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; - float4 newPosB = transform(&childPosB,&posB,&ornB); - float4 newOrnB = qtMul(ornB,childOrnB); - posB = newPosB; - ornB = newOrnB; - shapeIndexB = collidables[childColIndexB].m_shapeIndex; - } - ////////////////// - - float4 c0local = convexPolyhedronA.m_localCenter; - float4 c0 = transform(&c0local, &posA, &ornA); - float4 c1local = convexShapes[shapeIndexB].m_localCenter; - float4 c1 = transform(&c1local,&posB,&ornB); - const float4 DeltaC2 = c0 - c1; - - - { - bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - DeltaC2, - verticesA,uniqueEdgesA,facesA,indicesA, - vertices,uniqueEdges,faces,indices, - &sepAxis,&dmin); - - if (!sepEE) - { - hasSeparatingAxis = 0; - } else - { - hasSeparatingAxis = 1; - } - } - - - if (hasSeparatingAxis) - { - sepAxis.w = dmin; - dmins[i] = dmin; - concaveSeparatingNormalsOut[pairIdx]=sepAxis; - concaveHasSeparatingNormals[i]=1; - - float minDist = -1e30f; - float maxDist = 0.02f; - - - findClippingFaces(sepAxis, - &convexPolyhedronA, - &convexShapes[shapeIndexB], - posA,ornA, - posB,ornB, - worldVertsA1GPU, - worldNormalsAGPU, - worldVertsB1GPU, - vertexFaceCapacity, - minDist, maxDist, - verticesA, - facesA, - indicesA, - vertices, - faces, - indices, - clippingFacesOut, pairIdx); - - - } else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - } - else - { - //mark this pair as in-active - concavePairs[pairIdx].w = -1; - } - - concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h deleted file mode 100644 index a60702ca62..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satConcaveKernels.h +++ /dev/null @@ -1,1456 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satConcaveKernelsCL = - "//keep this enum in sync with the CPU version (in btCollidable.h)\n" - "//written by Erwin Coumans\n" - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define TRIANGLE_NUM_CONVEX_FACES 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define B3_MAX_STACK_DEPTH 256\n" - "typedef unsigned int u32;\n" - "///keep this in sync with btCollidable.h\n" - "typedef struct\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " \n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - " \n" - "} btCollidableGpu;\n" - "#define MAX_NUM_PARTS_IN_BITS 10\n" - "///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" - "///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes\n" - " int m_escapeIndexOrTriangleIndex;\n" - "} b3QuantizedBvhNode;\n" - "typedef struct\n" - "{\n" - " float4 m_aabbMin;\n" - " float4 m_aabbMax;\n" - " float4 m_quantization;\n" - " int m_numNodes;\n" - " int m_numSubTrees;\n" - " int m_nodeOffset;\n" - " int m_subTreeOffset;\n" - "} b3BvhInfo;\n" - "int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - "int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - " \n" - "int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes, points to the root of the subtree\n" - " int m_rootNodeIndex;\n" - " //4 bytes\n" - " int m_subtreeSize;\n" - " int m_padding[3];\n" - "} b3BvhSubtreeInfo;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " \n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "#ifndef B3_AABB_H\n" - "#define B3_AABB_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3Aabb b3Aabb_t;\n" - "struct b3Aabb\n" - "{\n" - " union\n" - " {\n" - " float m_min[4];\n" - " b3Float4 m_minVec;\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float m_max[4];\n" - " b3Float4 m_maxVec;\n" - " int m_signedMaxIndices[4];\n" - " };\n" - "};\n" - "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" - " b3Float4ConstArg pos,\n" - " b3QuatConstArg orn,\n" - " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" - "{\n" - " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" - " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" - " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" - " b3Mat3x3 m;\n" - " m = b3QuatGetRotationMatrix(orn);\n" - " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" - " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" - " \n" - " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" - " 0.f);\n" - " *aabbMinOut = center-extent;\n" - " *aabbMaxOut = center+extent;\n" - "}\n" - "/// conservative test for overlap between two aabbs\n" - "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" - " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" - " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" - " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "#endif //B3_AABB_H\n" - "/*\n" - "Bullet Continuous Collision Detection and Physics Library\n" - "Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "#ifndef B3_INT2_H\n" - "#define B3_INT2_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#define b3UnsignedInt2 uint2\n" - "#define b3Int2 int2\n" - "#define b3MakeInt2 (int2)\n" - "#endif //__cplusplus\n" - "#endif\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} btGpuFace;\n" - "#define make_float4 (float4)\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - " \n" - "// float4 a1 = make_float4(a.xyz,0.f);\n" - "// float4 b1 = make_float4(b.xyz,0.f);\n" - "// return cross(a1,b1);\n" - "//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " //return c;\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " v = make_float4(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;i<numVerts;i++)\n" - " {\n" - " float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, __global const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;i<numVerts;i++)\n" - " {\n" - " float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA,const float4 ornA,\n" - " const float4 posB,const float4 ornB,\n" - " float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" - "{\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" - " if(Max0<Min1 || Max1<Min0)\n" - " return false;\n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " *depth = d0<d1 ? d0:d1;\n" - " return true;\n" - "}\n" - "inline bool IsAlmostZero(const float4 v)\n" - "{\n" - " if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" - " return false;\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " \n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;i<numFacesA;i++)\n" - " {\n" - " const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS*=-1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* verticesA, \n" - " __global const float4* uniqueEdgesA, \n" - " __global const btGpuFace* facesA,\n" - " __global const int* indicesA,\n" - " const float4* verticesB,\n" - " const float4* uniqueEdgesB, \n" - " const btGpuFace* facesB,\n" - " const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;i<numFacesA;i++)\n" - " {\n" - " const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS *= -1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test edges\n" - " for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" - " {\n" - " const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" - " float4 edge0World = qtRotate(ornA,edge0);\n" - " for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" - " {\n" - " const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" - " float4 edge1World = qtRotate(ornB,edge1);\n" - " float4 crossje = cross3(edge0World,edge1World);\n" - " curEdgeEdge++;\n" - " if(!IsAlmostZero(crossje))\n" - " {\n" - " crossje = normalize3(crossje);\n" - " if (dot3F4(DeltaC2,crossje)<0)\n" - " crossje *= -1.f;\n" - " float dist;\n" - " bool result = true;\n" - " {\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" - " \n" - " if(Max0<Min1 || Max1<Min0)\n" - " result = false;\n" - " \n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " dist = d0<d1 ? d0:d1;\n" - " result = true;\n" - " }\n" - " \n" - " if(dist<*dmin)\n" - " {\n" - " *dmin = dist;\n" - " *sep = crossje;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "inline int findClippingFaces(const float4 separatingNormal,\n" - " const ConvexPolyhedronCL* hullA, \n" - " __global const ConvexPolyhedronCL* hullB,\n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" - " __global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " const float4* verticesA,\n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB,\n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " __global int4* clippingFaces, int pairIndex)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " \n" - " \n" - " int closestFaceB=0;\n" - " float dmax = -FLT_MAX;\n" - " \n" - " {\n" - " for(int face=0;face<hullB->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" - " facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " \n" - " {\n" - " const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" - " int numVertices = polyB.m_numIndices;\n" - " if (numVertices>capacityWorldVerts)\n" - " numVertices = capacityWorldVerts;\n" - " if (numVertices<0)\n" - " numVertices = 0;\n" - " \n" - " for(int e0=0;e0<numVertices;e0++)\n" - " {\n" - " if (e0<capacityWorldVerts)\n" - " {\n" - " const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " }\n" - " \n" - " int closestFaceA=0;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;face<hullA->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " facesA[hullA->m_faceOffset+face].m_plane.x,\n" - " facesA[hullA->m_faceOffset+face].m_plane.y,\n" - " facesA[hullA->m_faceOffset+face].m_plane.z,\n" - " 0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " worldNormalsA1[pairIndex] = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " \n" - " int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" - " if (numVerticesA>capacityWorldVerts)\n" - " numVerticesA = capacityWorldVerts;\n" - " if (numVerticesA<0)\n" - " numVerticesA=0;\n" - " \n" - " for(int e0=0;e0<numVerticesA;e0++)\n" - " {\n" - " if (e0<capacityWorldVerts)\n" - " {\n" - " const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" - " worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" - " }\n" - " }\n" - " \n" - " clippingFaces[pairIndex].x = closestFaceA;\n" - " clippingFaces[pairIndex].y = closestFaceB;\n" - " clippingFaces[pairIndex].z = numVerticesA;\n" - " clippingFaces[pairIndex].w = numWorldVertsB1;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSeparatingAxisVertexFaceKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global btAabbCL* aabbs,\n" - " __global float4* concaveSeparatingNormalsOut,\n" - " __global int* concaveHasSeparatingNormals,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1GPU,\n" - " __global float4* worldNormalsAGPU,\n" - " __global float4* worldVertsB1GPU,\n" - " __global float* dmins,\n" - " int vertexFaceCapacity,\n" - " int numConcavePairs\n" - " )\n" - "{\n" - " \n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " \n" - " concaveHasSeparatingNormals[i] = 0;\n" - " \n" - " int pairIdx = i;\n" - " \n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" - " collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " concavePairs[pairIdx].w = -1;\n" - " return;\n" - " }\n" - " \n" - " \n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " int numActualConcaveConvexTests = 0;\n" - " \n" - " int f = concavePairs[i].z;\n" - " \n" - " bool overlap = false;\n" - " \n" - " ConvexPolyhedronCL convexPolyhedronA;\n" - " \n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " \n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " float4 triMinAabb, triMaxAabb;\n" - " btAabbCL triAabb;\n" - " triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" - " triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " \n" - " triAabb.m_min = min(triAabb.m_min,vert);\n" - " triAabb.m_max = max(triAabb.m_max,vert);\n" - " \n" - " }\n" - " \n" - " overlap = true;\n" - " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" - " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" - " overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" - " \n" - " if (overlap)\n" - " {\n" - " float dmin = FLT_MAX;\n" - " int hasSeparatingAxis=5;\n" - " float4 sepAxis=make_float4(1,2,3,4);\n" - " \n" - " int localCC=0;\n" - " numActualConcaveConvexTests++;\n" - " \n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " \n" - " \n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " \n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " \n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;i<numVertices;i++)\n" - " {\n" - " float4 v0 = verticesA[i];\n" - " float4 v1 = verticesA[prevVertex];\n" - " \n" - " float4 edgeNormal = normalize(cross(normal,v1-v0));\n" - " float c = -dot(edgeNormal,v0);\n" - " \n" - " facesA[fidx].m_numIndices = 2;\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[curUsedIndices++]=i;\n" - " indicesA[curUsedIndices++]=prevVertex;\n" - " \n" - " facesA[fidx].m_plane.x = edgeNormal.x;\n" - " facesA[fidx].m_plane.y = edgeNormal.y;\n" - " facesA[fidx].m_plane.z = edgeNormal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " fidx++;\n" - " prevVertex = i;\n" - " }\n" - " }\n" - " convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" - " convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" - " \n" - " \n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " \n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " \n" - " \n" - " \n" - " \n" - " ///////////////////\n" - " ///compound shape support\n" - " \n" - " if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " int compoundChild = concavePairs[pairIdx].w;\n" - " int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" - " int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" - " }\n" - " //////////////////\n" - " \n" - " float4 c0local = convexPolyhedronA.m_localCenter;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " \n" - " \n" - " bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " verticesA,uniqueEdgesA,facesA,indicesA,\n" - " vertices,uniqueEdges,faces,indices,\n" - " &sepAxis,&dmin);\n" - " hasSeparatingAxis = 4;\n" - " if (!sepA)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA,\n" - " posB,ornB,\n" - " posA,ornA,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,indices,\n" - " verticesA,uniqueEdgesA,facesA,indicesA,\n" - " &sepAxis,&dmin);\n" - " \n" - " if (!sepB)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis = 1;\n" - " }\n" - " } \n" - " \n" - " if (hasSeparatingAxis)\n" - " {\n" - " dmins[i] = dmin;\n" - " concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" - " concaveHasSeparatingNormals[i]=1;\n" - " \n" - " } else\n" - " { \n" - " //mark this pair as in-active\n" - " concavePairs[pairIdx].w = -1;\n" - " }\n" - " }\n" - " else\n" - " { \n" - " //mark this pair as in-active\n" - " concavePairs[pairIdx].w = -1;\n" - " }\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSeparatingAxisEdgeEdgeKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global btAabbCL* aabbs,\n" - " __global float4* concaveSeparatingNormalsOut,\n" - " __global int* concaveHasSeparatingNormals,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1GPU,\n" - " __global float4* worldNormalsAGPU,\n" - " __global float4* worldVertsB1GPU,\n" - " __global float* dmins,\n" - " int vertexFaceCapacity,\n" - " int numConcavePairs\n" - " )\n" - "{\n" - " \n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " \n" - " if (!concaveHasSeparatingNormals[i])\n" - " return;\n" - " \n" - " int pairIdx = i;\n" - " \n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " int numActualConcaveConvexTests = 0;\n" - " \n" - " int f = concavePairs[i].z;\n" - " \n" - " bool overlap = false;\n" - " \n" - " ConvexPolyhedronCL convexPolyhedronA;\n" - " \n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " \n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " float4 triMinAabb, triMaxAabb;\n" - " btAabbCL triAabb;\n" - " triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" - " triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " \n" - " triAabb.m_min = min(triAabb.m_min,vert);\n" - " triAabb.m_max = max(triAabb.m_max,vert);\n" - " \n" - " }\n" - " \n" - " overlap = true;\n" - " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" - " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" - " overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" - " \n" - " if (overlap)\n" - " {\n" - " float dmin = dmins[i];\n" - " int hasSeparatingAxis=5;\n" - " float4 sepAxis=make_float4(1,2,3,4);\n" - " sepAxis = concaveSeparatingNormalsOut[pairIdx];\n" - " \n" - " int localCC=0;\n" - " numActualConcaveConvexTests++;\n" - " \n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " \n" - " \n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " \n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " \n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;i<numVertices;i++)\n" - " {\n" - " float4 v0 = verticesA[i];\n" - " float4 v1 = verticesA[prevVertex];\n" - " \n" - " float4 edgeNormal = normalize(cross(normal,v1-v0));\n" - " float c = -dot(edgeNormal,v0);\n" - " \n" - " facesA[fidx].m_numIndices = 2;\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[curUsedIndices++]=i;\n" - " indicesA[curUsedIndices++]=prevVertex;\n" - " \n" - " facesA[fidx].m_plane.x = edgeNormal.x;\n" - " facesA[fidx].m_plane.y = edgeNormal.y;\n" - " facesA[fidx].m_plane.z = edgeNormal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " fidx++;\n" - " prevVertex = i;\n" - " }\n" - " }\n" - " convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" - " convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" - " \n" - " \n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " \n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " \n" - " \n" - " \n" - " \n" - " ///////////////////\n" - " ///compound shape support\n" - " \n" - " if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " int compoundChild = concavePairs[pairIdx].w;\n" - " int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" - " int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" - " }\n" - " //////////////////\n" - " \n" - " float4 c0local = convexPolyhedronA.m_localCenter;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " \n" - " \n" - " {\n" - " bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " verticesA,uniqueEdgesA,facesA,indicesA,\n" - " vertices,uniqueEdges,faces,indices,\n" - " &sepAxis,&dmin);\n" - " \n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis = 1;\n" - " }\n" - " }\n" - " \n" - " \n" - " if (hasSeparatingAxis)\n" - " {\n" - " sepAxis.w = dmin;\n" - " dmins[i] = dmin;\n" - " concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" - " concaveHasSeparatingNormals[i]=1;\n" - " \n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " \n" - " findClippingFaces(sepAxis,\n" - " &convexPolyhedronA,\n" - " &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " worldVertsA1GPU,\n" - " worldNormalsAGPU,\n" - " worldVertsB1GPU,\n" - " vertexFaceCapacity,\n" - " minDist, maxDist,\n" - " verticesA,\n" - " facesA,\n" - " indicesA,\n" - " vertices,\n" - " faces,\n" - " indices,\n" - " clippingFacesOut, pairIdx);\n" - " \n" - " \n" - " } else\n" - " { \n" - " //mark this pair as in-active\n" - " concavePairs[pairIdx].w = -1;\n" - " }\n" - " }\n" - " else\n" - " { \n" - " //mark this pair as in-active\n" - " concavePairs[pairIdx].w = -1;\n" - " }\n" - " \n" - " concavePairs[i].z = -1;//for the next stage, z is used to determine existing contact points\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h deleted file mode 100644 index e627af2799..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satKernels.h +++ /dev/null @@ -1,2103 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* satKernelsCL = - "//keep this enum in sync with the CPU version (in btCollidable.h)\n" - "//written by Erwin Coumans\n" - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define TRIANGLE_NUM_CONVEX_FACES 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define B3_MAX_STACK_DEPTH 256\n" - "typedef unsigned int u32;\n" - "///keep this in sync with btCollidable.h\n" - "typedef struct\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " \n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - " \n" - "} btCollidableGpu;\n" - "#define MAX_NUM_PARTS_IN_BITS 10\n" - "///b3QuantizedBvhNode is a compressed aabb node, 16 bytes.\n" - "///Node can be used for leafnode or internal node. Leafnodes can point to 32-bit triangle index (non-negative range).\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes\n" - " int m_escapeIndexOrTriangleIndex;\n" - "} b3QuantizedBvhNode;\n" - "typedef struct\n" - "{\n" - " float4 m_aabbMin;\n" - " float4 m_aabbMax;\n" - " float4 m_quantization;\n" - " int m_numNodes;\n" - " int m_numSubTrees;\n" - " int m_nodeOffset;\n" - " int m_subTreeOffset;\n" - "} b3BvhInfo;\n" - "int getTriangleIndex(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int getTriangleIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " unsigned int x=0;\n" - " unsigned int y = (~(x&0))<<(31-MAX_NUM_PARTS_IN_BITS);\n" - " // Get only the lower bits where the triangle index is stored\n" - " return (rootNode->m_escapeIndexOrTriangleIndex&~(y));\n" - "}\n" - "int isLeafNode(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - "int isLeafNodeGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " //skipindex is negative (internal node), triangleindex >=0 (leafnode)\n" - " return (rootNode->m_escapeIndexOrTriangleIndex >= 0)? 1 : 0;\n" - "}\n" - " \n" - "int getEscapeIndex(const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "int getEscapeIndexGlobal(__global const b3QuantizedBvhNode* rootNode)\n" - "{\n" - " return -rootNode->m_escapeIndexOrTriangleIndex;\n" - "}\n" - "typedef struct\n" - "{\n" - " //12 bytes\n" - " unsigned short int m_quantizedAabbMin[3];\n" - " unsigned short int m_quantizedAabbMax[3];\n" - " //4 bytes, points to the root of the subtree\n" - " int m_rootNodeIndex;\n" - " //4 bytes\n" - " int m_subtreeSize;\n" - " int m_padding[3];\n" - "} b3BvhSubtreeInfo;\n" - "typedef struct\n" - "{\n" - " float4 m_childPosition;\n" - " float4 m_childOrientation;\n" - " int m_shapeIndex;\n" - " int m_unused0;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "} btGpuChildShape;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} BodyData;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " \n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "#ifndef B3_AABB_H\n" - "#define B3_AABB_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3Aabb b3Aabb_t;\n" - "struct b3Aabb\n" - "{\n" - " union\n" - " {\n" - " float m_min[4];\n" - " b3Float4 m_minVec;\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float m_max[4];\n" - " b3Float4 m_maxVec;\n" - " int m_signedMaxIndices[4];\n" - " };\n" - "};\n" - "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" - " b3Float4ConstArg pos,\n" - " b3QuatConstArg orn,\n" - " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" - "{\n" - " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" - " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" - " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" - " b3Mat3x3 m;\n" - " m = b3QuatGetRotationMatrix(orn);\n" - " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" - " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" - " \n" - " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" - " 0.f);\n" - " *aabbMinOut = center-extent;\n" - " *aabbMaxOut = center+extent;\n" - "}\n" - "/// conservative test for overlap between two aabbs\n" - "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" - " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" - " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" - " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "#endif //B3_AABB_H\n" - "/*\n" - "Bullet Continuous Collision Detection and Physics Library\n" - "Copyright (c) 2003-2013 Erwin Coumans http://bulletphysics.org\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "#ifndef B3_INT2_H\n" - "#define B3_INT2_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#define b3UnsignedInt2 uint2\n" - "#define b3Int2 int2\n" - "#define b3MakeInt2 (int2)\n" - "#endif //__cplusplus\n" - "#endif\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} btGpuFace;\n" - "#define make_float4 (float4)\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - " \n" - "// float4 a1 = make_float4(a.xyz,0.f);\n" - "// float4 b1 = make_float4(b.xyz,0.f);\n" - "// return cross(a1,b1);\n" - "//float4 c = make_float4(a.y*b.z - a.z*b.y,a.z*b.x - a.x*b.z,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " // float4 c = make_float4(a.y*b.z - a.z*b.y,1.f,a.x*b.y - a.y*b.x,0.f);\n" - " \n" - " //return c;\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " v = make_float4(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "__inline\n" - "float4 transform(const float4* p, const float4* translation, const Quaternion* orientation)\n" - "{\n" - " return qtRotate( *orientation, *p ) + (*translation);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "}\n" - "inline void projectLocal(const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;i<numVerts;i++)\n" - " {\n" - " float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline void project(__global const ConvexPolyhedronCL* hull, const float4 pos, const float4 orn, \n" - "const float4* dir, __global const float4* vertices, float* min, float* max)\n" - "{\n" - " min[0] = FLT_MAX;\n" - " max[0] = -FLT_MAX;\n" - " int numVerts = hull->m_numVertices;\n" - " const float4 localDir = qtInvRotate(orn,*dir);\n" - " float offset = dot(pos,*dir);\n" - " for(int i=0;i<numVerts;i++)\n" - " {\n" - " float dp = dot(vertices[hull->m_vertexOffset+i],localDir);\n" - " if(dp < min[0]) \n" - " min[0] = dp;\n" - " if(dp > max[0]) \n" - " max[0] = dp;\n" - " }\n" - " if(min[0]>max[0])\n" - " {\n" - " float tmp = min[0];\n" - " min[0] = max[0];\n" - " max[0] = tmp;\n" - " }\n" - " min[0] += offset;\n" - " max[0] += offset;\n" - "}\n" - "inline bool TestSepAxisLocalA(const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA,const float4 ornA,\n" - " const float4 posB,const float4 ornB,\n" - " float4* sep_axis, const float4* verticesA, __global const float4* verticesB,float* depth)\n" - "{\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,sep_axis,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB, sep_axis,verticesB, &Min1, &Max1);\n" - " if(Max0<Min1 || Max1<Min0)\n" - " return false;\n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " *depth = d0<d1 ? d0:d1;\n" - " return true;\n" - "}\n" - "inline bool IsAlmostZero(const float4 v)\n" - "{\n" - " if(fabs(v.x)>1e-6f || fabs(v.y)>1e-6f || fabs(v.z)>1e-6f)\n" - " return false;\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " \n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;i<numFacesA;i++)\n" - " {\n" - " const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS*=-1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, verticesA, verticesB,&d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisLocalB( __global const ConvexPolyhedronCL* hullA, const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* verticesA, \n" - " __global const float4* uniqueEdgesA, \n" - " __global const btGpuFace* facesA,\n" - " __global const int* indicesA,\n" - " const float4* verticesB,\n" - " const float4* uniqueEdgesB, \n" - " const btGpuFace* facesB,\n" - " const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;i<numFacesA;i++)\n" - " {\n" - " const float4 normal = facesA[hullA->m_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS *= -1.f;\n" - " curPlaneTests++;\n" - " float d;\n" - " if(!TestSepAxisLocalA( hullB, hullA, posB,ornB,posA,ornA, &faceANormalWS, verticesB,verticesA, &d))\n" - " return false;\n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisEdgeEdgeLocalA( const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " const float4* verticesA, \n" - " const float4* uniqueEdgesA, \n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB, \n" - " __global const float4* uniqueEdgesB, \n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test edges\n" - " for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" - " {\n" - " const float4 edge0 = uniqueEdgesA[hullA->m_uniqueEdgesOffset+e0];\n" - " float4 edge0World = qtRotate(ornA,edge0);\n" - " for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" - " {\n" - " const float4 edge1 = uniqueEdgesB[hullB->m_uniqueEdgesOffset+e1];\n" - " float4 edge1World = qtRotate(ornB,edge1);\n" - " float4 crossje = cross3(edge0World,edge1World);\n" - " curEdgeEdge++;\n" - " if(!IsAlmostZero(crossje))\n" - " {\n" - " crossje = normalize3(crossje);\n" - " if (dot3F4(DeltaC2,crossje)<0)\n" - " crossje *= -1.f;\n" - " float dist;\n" - " bool result = true;\n" - " {\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " projectLocal(hullA,posA,ornA,&crossje,verticesA, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,verticesB, &Min1, &Max1);\n" - " \n" - " if(Max0<Min1 || Max1<Min0)\n" - " result = false;\n" - " \n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " dist = d0<d1 ? d0:d1;\n" - " result = true;\n" - " }\n" - " \n" - " if(dist<*dmin)\n" - " {\n" - " *dmin = dist;\n" - " *sep = crossje;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "inline bool TestSepAxis(__global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA,const float4 ornA,\n" - " const float4 posB,const float4 ornB,\n" - " float4* sep_axis, __global const float4* vertices,float* depth)\n" - "{\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,sep_axis,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB, sep_axis,vertices, &Min1, &Max1);\n" - " if(Max0<Min1 || Max1<Min0)\n" - " return false;\n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " *depth = d0<d1 ? d0:d1;\n" - " return true;\n" - "}\n" - "bool findSeparatingAxis( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices, \n" - " __global const float4* uniqueEdges, \n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " \n" - " int curPlaneTests=0;\n" - " {\n" - " int numFacesA = hullA->m_numFaces;\n" - " // Test normals from hullA\n" - " for(int i=0;i<numFacesA;i++)\n" - " {\n" - " const float4 normal = faces[hullA->m_faceOffset+i].m_plane;\n" - " float4 faceANormalWS = qtRotate(ornA,normal);\n" - " \n" - " if (dot3F4(DeltaC2,faceANormalWS)<0)\n" - " faceANormalWS*=-1.f;\n" - " \n" - " curPlaneTests++;\n" - " \n" - " float d;\n" - " if(!TestSepAxis( hullA, hullB, posA,ornA,posB,ornB,&faceANormalWS, vertices,&d))\n" - " return false;\n" - " \n" - " if(d<*dmin)\n" - " {\n" - " *dmin = d;\n" - " *sep = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " \n" - " return true;\n" - "}\n" - "bool findSeparatingAxisUnitSphere( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices,\n" - " __global const float4* unitSphereDirections,\n" - " int numUnitSphereDirections,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test unit sphere directions\n" - " for (int i=0;i<numUnitSphereDirections;i++)\n" - " {\n" - " float4 crossje;\n" - " crossje = unitSphereDirections[i]; \n" - " if (dot3F4(DeltaC2,crossje)>0)\n" - " crossje *= -1.f;\n" - " {\n" - " float dist;\n" - " bool result = true;\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" - " \n" - " if(Max0<Min1 || Max1<Min0)\n" - " return false;\n" - " \n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " dist = d0<d1 ? d0:d1;\n" - " result = true;\n" - " \n" - " if(dist<*dmin)\n" - " {\n" - " *dmin = dist;\n" - " *sep = crossje;\n" - " }\n" - " }\n" - " }\n" - " \n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "bool findSeparatingAxisEdgeEdge( __global const ConvexPolyhedronCL* hullA, __global const ConvexPolyhedronCL* hullB, \n" - " const float4 posA1,\n" - " const float4 ornA,\n" - " const float4 posB1,\n" - " const float4 ornB,\n" - " const float4 DeltaC2,\n" - " __global const float4* vertices, \n" - " __global const float4* uniqueEdges, \n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " float4* sep,\n" - " float* dmin)\n" - "{\n" - " \n" - " float4 posA = posA1;\n" - " posA.w = 0.f;\n" - " float4 posB = posB1;\n" - " posB.w = 0.f;\n" - " int curPlaneTests=0;\n" - " int curEdgeEdge = 0;\n" - " // Test edges\n" - " for(int e0=0;e0<hullA->m_numUniqueEdges;e0++)\n" - " {\n" - " const float4 edge0 = uniqueEdges[hullA->m_uniqueEdgesOffset+e0];\n" - " float4 edge0World = qtRotate(ornA,edge0);\n" - " for(int e1=0;e1<hullB->m_numUniqueEdges;e1++)\n" - " {\n" - " const float4 edge1 = uniqueEdges[hullB->m_uniqueEdgesOffset+e1];\n" - " float4 edge1World = qtRotate(ornB,edge1);\n" - " float4 crossje = cross3(edge0World,edge1World);\n" - " curEdgeEdge++;\n" - " if(!IsAlmostZero(crossje))\n" - " {\n" - " crossje = normalize3(crossje);\n" - " if (dot3F4(DeltaC2,crossje)<0)\n" - " crossje*=-1.f;\n" - " \n" - " float dist;\n" - " bool result = true;\n" - " {\n" - " float Min0,Max0;\n" - " float Min1,Max1;\n" - " project(hullA,posA,ornA,&crossje,vertices, &Min0, &Max0);\n" - " project(hullB,posB,ornB,&crossje,vertices, &Min1, &Max1);\n" - " \n" - " if(Max0<Min1 || Max1<Min0)\n" - " return false;\n" - " \n" - " float d0 = Max0 - Min1;\n" - " float d1 = Max1 - Min0;\n" - " dist = d0<d1 ? d0:d1;\n" - " result = true;\n" - " }\n" - " \n" - " if(dist<*dmin)\n" - " {\n" - " *dmin = dist;\n" - " *sep = crossje;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - " if((dot3F4(-DeltaC2,*sep))>0.0f)\n" - " {\n" - " *sep = -(*sep);\n" - " }\n" - " return true;\n" - "}\n" - "// work-in-progress\n" - "__kernel void processCompoundPairsKernel( __global const int4* gpuCompoundPairs,\n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global volatile float4* gpuCompoundSepNormalsOut,\n" - " __global volatile int* gpuHasCompoundSepNormalsOut,\n" - " int numCompoundPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i<numCompoundPairs)\n" - " {\n" - " int bodyIndexA = gpuCompoundPairs[i].x;\n" - " int bodyIndexB = gpuCompoundPairs[i].y;\n" - " int childShapeIndexA = gpuCompoundPairs[i].z;\n" - " int childShapeIndexB = gpuCompoundPairs[i].w;\n" - " \n" - " int collidableIndexA = -1;\n" - " int collidableIndexB = -1;\n" - " \n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " \n" - " float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " \n" - " if (childShapeIndexA >= 0)\n" - " {\n" - " collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " posA = newPosA;\n" - " ornA = newOrnA;\n" - " } else\n" - " {\n" - " collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " }\n" - " \n" - " if (childShapeIndexB>=0)\n" - " {\n" - " collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " } else\n" - " {\n" - " collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; \n" - " }\n" - " \n" - " gpuHasCompoundSepNormalsOut[i] = 0;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " int shapeTypeA = collidables[collidableIndexA].m_shapeType;\n" - " int shapeTypeB = collidables[collidableIndexB].m_shapeType;\n" - " \n" - " if ((shapeTypeA != SHAPE_CONVEX_HULL) || (shapeTypeB != SHAPE_CONVEX_HULL))\n" - " {\n" - " return;\n" - " }\n" - " int hasSeparatingAxis = 5;\n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " posA.w = 0.f;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " float4 sepNormal = make_float4(1,0,0,0);\n" - " bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" - " hasSeparatingAxis = 4;\n" - " if (!sepA)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,posA,ornA,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" - " if (!sepB)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else//(!sepB)\n" - " {\n" - " bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,posB,ornB,DeltaC2,vertices,uniqueEdges,faces,indices,&sepNormal,&dmin);\n" - " if (sepEE)\n" - " {\n" - " gpuCompoundSepNormalsOut[i] = sepNormal;//fastNormalize4(sepNormal);\n" - " gpuHasCompoundSepNormalsOut[i] = 1;\n" - " }//sepEE\n" - " }//(!sepB)\n" - " }//(!sepA)\n" - " \n" - " \n" - " }\n" - " \n" - "}\n" - "inline b3Float4 MyUnQuantize(const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" - "{\n" - " b3Float4 vecOut;\n" - " vecOut = b3MakeFloat4(\n" - " (float)(vecIn[0]) / (quantization.x),\n" - " (float)(vecIn[1]) / (quantization.y),\n" - " (float)(vecIn[2]) / (quantization.z),\n" - " 0.f);\n" - " vecOut += bvhAabbMin;\n" - " return vecOut;\n" - "}\n" - "inline b3Float4 MyUnQuantizeGlobal(__global const unsigned short* vecIn, b3Float4 quantization, b3Float4 bvhAabbMin)\n" - "{\n" - " b3Float4 vecOut;\n" - " vecOut = b3MakeFloat4(\n" - " (float)(vecIn[0]) / (quantization.x),\n" - " (float)(vecIn[1]) / (quantization.y),\n" - " (float)(vecIn[2]) / (quantization.z),\n" - " 0.f);\n" - " vecOut += bvhAabbMin;\n" - " return vecOut;\n" - "}\n" - "// work-in-progress\n" - "__kernel void findCompoundPairsKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global b3Aabb_t* aabbLocalSpace,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global volatile int4* gpuCompoundPairsOut,\n" - " __global volatile int* numCompoundPairsOut,\n" - " __global const b3BvhSubtreeInfo* subtrees,\n" - " __global const b3QuantizedBvhNode* quantizedNodes,\n" - " __global const b3BvhInfo* bvhInfos,\n" - " int numPairs,\n" - " int maxNumCompoundPairsCapacity\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i<numPairs)\n" - " {\n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " //once the broadphase avoids static-static pairs, we can remove this test\n" - " if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" - " {\n" - " return;\n" - " }\n" - " if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) &&(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" - " {\n" - " int bvhA = collidables[collidableIndexA].m_compoundBvhIndex;\n" - " int bvhB = collidables[collidableIndexB].m_compoundBvhIndex;\n" - " int numSubTreesA = bvhInfos[bvhA].m_numSubTrees;\n" - " int subTreesOffsetA = bvhInfos[bvhA].m_subTreeOffset;\n" - " int subTreesOffsetB = bvhInfos[bvhB].m_subTreeOffset;\n" - " int numSubTreesB = bvhInfos[bvhB].m_numSubTrees;\n" - " \n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " b3Quat ornA = rigidBodies[bodyIndexA].m_quat;\n" - " b3Quat ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " \n" - " for (int p=0;p<numSubTreesA;p++)\n" - " {\n" - " b3BvhSubtreeInfo subtreeA = subtrees[subTreesOffsetA+p];\n" - " //bvhInfos[bvhA].m_quantization\n" - " b3Float4 treeAminLocal = MyUnQuantize(subtreeA.m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" - " b3Float4 treeAmaxLocal = MyUnQuantize(subtreeA.m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" - " b3Float4 aabbAMinOut,aabbAMaxOut;\n" - " float margin=0.f;\n" - " b3TransformAabb2(treeAminLocal,treeAmaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" - " \n" - " for (int q=0;q<numSubTreesB;q++)\n" - " {\n" - " b3BvhSubtreeInfo subtreeB = subtrees[subTreesOffsetB+q];\n" - " b3Float4 treeBminLocal = MyUnQuantize(subtreeB.m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" - " b3Float4 treeBmaxLocal = MyUnQuantize(subtreeB.m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" - " b3Float4 aabbBMinOut,aabbBMaxOut;\n" - " float margin=0.f;\n" - " b3TransformAabb2(treeBminLocal,treeBmaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" - " \n" - " \n" - " bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" - " if (aabbOverlap)\n" - " {\n" - " \n" - " int startNodeIndexA = subtreeA.m_rootNodeIndex+bvhInfos[bvhA].m_nodeOffset;\n" - " int endNodeIndexA = startNodeIndexA+subtreeA.m_subtreeSize;\n" - " int startNodeIndexB = subtreeB.m_rootNodeIndex+bvhInfos[bvhB].m_nodeOffset;\n" - " int endNodeIndexB = startNodeIndexB+subtreeB.m_subtreeSize;\n" - " b3Int2 nodeStack[B3_MAX_STACK_DEPTH];\n" - " b3Int2 node0;\n" - " node0.x = startNodeIndexA;\n" - " node0.y = startNodeIndexB;\n" - " int maxStackDepth = B3_MAX_STACK_DEPTH;\n" - " int depth=0;\n" - " nodeStack[depth++]=node0;\n" - " do\n" - " {\n" - " b3Int2 node = nodeStack[--depth];\n" - " b3Float4 aMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMin,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" - " b3Float4 aMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.x].m_quantizedAabbMax,bvhInfos[bvhA].m_quantization,bvhInfos[bvhA].m_aabbMin);\n" - " b3Float4 bMinLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMin,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" - " b3Float4 bMaxLocal = MyUnQuantizeGlobal(quantizedNodes[node.y].m_quantizedAabbMax,bvhInfos[bvhB].m_quantization,bvhInfos[bvhB].m_aabbMin);\n" - " float margin=0.f;\n" - " b3Float4 aabbAMinOut,aabbAMaxOut;\n" - " b3TransformAabb2(aMinLocal,aMaxLocal, margin,posA,ornA,&aabbAMinOut,&aabbAMaxOut);\n" - " b3Float4 aabbBMinOut,aabbBMaxOut;\n" - " b3TransformAabb2(bMinLocal,bMaxLocal, margin,posB,ornB,&aabbBMinOut,&aabbBMaxOut);\n" - " \n" - " bool nodeOverlap = b3TestAabbAgainstAabb(aabbAMinOut,aabbAMaxOut,aabbBMinOut,aabbBMaxOut);\n" - " if (nodeOverlap)\n" - " {\n" - " bool isLeafA = isLeafNodeGlobal(&quantizedNodes[node.x]);\n" - " bool isLeafB = isLeafNodeGlobal(&quantizedNodes[node.y]);\n" - " bool isInternalA = !isLeafA;\n" - " bool isInternalB = !isLeafB;\n" - " //fail, even though it might hit two leaf nodes\n" - " if (depth+4>maxStackDepth && !(isLeafA && isLeafB))\n" - " {\n" - " //printf(\"Error: traversal exceeded maxStackDepth\");\n" - " continue;\n" - " }\n" - " if(isInternalA)\n" - " {\n" - " int nodeAleftChild = node.x+1;\n" - " bool isNodeALeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.x+1]);\n" - " int nodeArightChild = isNodeALeftChildLeaf? node.x+2 : node.x+1 + getEscapeIndexGlobal(&quantizedNodes[node.x+1]);\n" - " if(isInternalB)\n" - " { \n" - " int nodeBleftChild = node.y+1;\n" - " bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" - " int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" - " nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBleftChild);\n" - " nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBleftChild);\n" - " nodeStack[depth++] = b3MakeInt2(nodeAleftChild, nodeBrightChild);\n" - " nodeStack[depth++] = b3MakeInt2(nodeArightChild, nodeBrightChild);\n" - " }\n" - " else\n" - " {\n" - " nodeStack[depth++] = b3MakeInt2(nodeAleftChild,node.y);\n" - " nodeStack[depth++] = b3MakeInt2(nodeArightChild,node.y);\n" - " }\n" - " }\n" - " else\n" - " {\n" - " if(isInternalB)\n" - " {\n" - " int nodeBleftChild = node.y+1;\n" - " bool isNodeBLeftChildLeaf = isLeafNodeGlobal(&quantizedNodes[node.y+1]);\n" - " int nodeBrightChild = isNodeBLeftChildLeaf? node.y+2 : node.y+1 + getEscapeIndexGlobal(&quantizedNodes[node.y+1]);\n" - " nodeStack[depth++] = b3MakeInt2(node.x,nodeBleftChild);\n" - " nodeStack[depth++] = b3MakeInt2(node.x,nodeBrightChild);\n" - " }\n" - " else\n" - " {\n" - " int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" - " if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" - " {\n" - " int childShapeIndexA = getTriangleIndexGlobal(&quantizedNodes[node.x]);\n" - " int childShapeIndexB = getTriangleIndexGlobal(&quantizedNodes[node.y]);\n" - " gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" - " }\n" - " }\n" - " }\n" - " }\n" - " } while (depth);\n" - " }\n" - " }\n" - " }\n" - " \n" - " return;\n" - " }\n" - " if ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" - " {\n" - " if (collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) \n" - " {\n" - " int numChildrenA = collidables[collidableIndexA].m_numChildShapes;\n" - " for (int c=0;c<numChildrenA;c++)\n" - " {\n" - " int childShapeIndexA = collidables[collidableIndexA].m_shapeIndex+c;\n" - " int childColIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition;\n" - " float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation;\n" - " float4 newPosA = qtRotate(ornA,childPosA)+posA;\n" - " float4 newOrnA = qtMul(ornA,childOrnA);\n" - " int shapeIndexA = collidables[childColIndexA].m_shapeIndex;\n" - " b3Aabb_t aabbAlocal = aabbLocalSpace[shapeIndexA];\n" - " float margin = 0.f;\n" - " \n" - " b3Float4 aabbAMinWS;\n" - " b3Float4 aabbAMaxWS;\n" - " \n" - " b3TransformAabb2(aabbAlocal.m_minVec,aabbAlocal.m_maxVec,margin,\n" - " newPosA,\n" - " newOrnA,\n" - " &aabbAMinWS,&aabbAMaxWS);\n" - " \n" - " \n" - " if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" - " for (int b=0;b<numChildrenB;b++)\n" - " {\n" - " int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" - " int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" - " b3Aabb_t aabbBlocal = aabbLocalSpace[shapeIndexB];\n" - " \n" - " b3Float4 aabbBMinWS;\n" - " b3Float4 aabbBMaxWS;\n" - " \n" - " b3TransformAabb2(aabbBlocal.m_minVec,aabbBlocal.m_maxVec,margin,\n" - " newPosB,\n" - " newOrnB,\n" - " &aabbBMinWS,&aabbBMaxWS);\n" - " \n" - " \n" - " \n" - " bool aabbOverlap = b3TestAabbAgainstAabb(aabbAMinWS,aabbAMaxWS,aabbBMinWS,aabbBMaxWS);\n" - " if (aabbOverlap)\n" - " {\n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " float4 posA = newPosA;\n" - " posA.w = 0.f;\n" - " float4 posB = newPosB;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = newOrnA;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB =newOrnB;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " {//\n" - " int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" - " if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" - " {\n" - " gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,childShapeIndexB);\n" - " }\n" - " }//\n" - " }//fi(1)\n" - " } //for (int b=0\n" - " }//if (collidables[collidableIndexB].\n" - " else//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " if (1)\n" - " {\n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " float4 posA = newPosA;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = newOrnA;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " {\n" - " int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" - " if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" - " {\n" - " gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,childShapeIndexA,-1);\n" - " }//if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" - " }//\n" - " }//fi (1)\n" - " }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " }//for (int b=0;b<numChildrenB;b++) \n" - " return;\n" - " }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONCAVE_TRIMESH) \n" - " && (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" - " {\n" - " int numChildrenB = collidables[collidableIndexB].m_numChildShapes;\n" - " for (int b=0;b<numChildrenB;b++)\n" - " {\n" - " int childShapeIndexB = collidables[collidableIndexB].m_shapeIndex+b;\n" - " int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 ornB = rigidBodies[bodyIndexB].m_quat;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = qtRotate(ornB,childPosB)+posB;\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " int shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" - " //////////////////////////////////////\n" - " if (1)\n" - " {\n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = newPosB;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB =newOrnB;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " {//\n" - " int compoundPairIdx = atomic_inc(numCompoundPairsOut);\n" - " if (compoundPairIdx<maxNumCompoundPairsCapacity)\n" - " {\n" - " gpuCompoundPairsOut[compoundPairIdx] = (int4)(bodyIndexA,bodyIndexB,-1,childShapeIndexB);\n" - " }//fi (compoundPairIdx<maxNumCompoundPairsCapacity)\n" - " }//\n" - " }//fi (1) \n" - " }//for (int b=0;b<numChildrenB;b++)\n" - " return;\n" - " }//if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " return;\n" - " }//fi ((collidables[collidableIndexA].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS) ||(collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS))\n" - " }//i<numPairs\n" - "}\n" - "// work-in-progress\n" - "__kernel void findSeparatingAxisKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global volatile float4* separatingNormals,\n" - " __global volatile int* hasSeparatingAxis,\n" - " int numPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " \n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " //once the broadphase avoids static-static pairs, we can remove this test\n" - " if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " return;\n" - " }\n" - " \n" - " if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " return;\n" - " }\n" - " \n" - " if ((collidables[collidableIndexA].m_shapeType==SHAPE_CONCAVE_TRIMESH))\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " return;\n" - " }\n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " float4 sepNormal;\n" - " \n" - " bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,\n" - " indices,&sepNormal,&dmin);\n" - " hasSeparatingAxis[i] = 4;\n" - " if (!sepA)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" - " posA,ornA,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,\n" - " indices,&sepNormal,&dmin);\n" - " if (!sepB)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " bool sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,\n" - " indices,&sepNormal,&dmin);\n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis[i] = 1;\n" - " separatingNormals[i] = sepNormal;\n" - " }\n" - " }\n" - " }\n" - " \n" - " }\n" - "}\n" - "__kernel void findSeparatingAxisVertexFaceKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global volatile float4* separatingNormals,\n" - " __global volatile int* hasSeparatingAxis,\n" - " __global float* dmins,\n" - " int numPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " \n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " hasSeparatingAxis[i] = 0; \n" - " \n" - " //once the broadphase avoids static-static pairs, we can remove this test\n" - " if ((rigidBodies[bodyIndexA].m_invMass==0) &&(rigidBodies[bodyIndexB].m_invMass==0))\n" - " {\n" - " return;\n" - " }\n" - " \n" - " if ((collidables[collidableIndexA].m_shapeType!=SHAPE_CONVEX_HULL) ||(collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL))\n" - " {\n" - " return;\n" - " }\n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " float dmin = FLT_MAX;\n" - " dmins[i] = dmin;\n" - " \n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " float4 sepNormal;\n" - " \n" - " bool sepA = findSeparatingAxis( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,\n" - " indices,&sepNormal,&dmin);\n" - " hasSeparatingAxis[i] = 4;\n" - " if (!sepA)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " bool sepB = findSeparatingAxis( &convexShapes[shapeIndexB],&convexShapes[shapeIndexA],posB,ornB,\n" - " posA,ornA,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,\n" - " indices,&sepNormal,&dmin);\n" - " if (sepB)\n" - " {\n" - " dmins[i] = dmin;\n" - " hasSeparatingAxis[i] = 1;\n" - " separatingNormals[i] = sepNormal;\n" - " }\n" - " }\n" - " \n" - " }\n" - "}\n" - "__kernel void findSeparatingAxisEdgeEdgeKernel( __global const int4* pairs, \n" - " __global const BodyData* rigidBodies, \n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global btAabbCL* aabbs,\n" - " __global float4* separatingNormals,\n" - " __global int* hasSeparatingAxis,\n" - " __global float* dmins,\n" - " __global const float4* unitSphereDirections,\n" - " int numUnitSphereDirections,\n" - " int numPairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " \n" - " if (i<numPairs)\n" - " {\n" - " if (hasSeparatingAxis[i])\n" - " {\n" - " \n" - " int bodyIndexA = pairs[i].x;\n" - " int bodyIndexB = pairs[i].y;\n" - " \n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " \n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " \n" - " \n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " \n" - " float dmin = dmins[i];\n" - " \n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 c0local = convexShapes[shapeIndexA].m_localCenter;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " float4 sepNormal = separatingNormals[i];\n" - " \n" - " \n" - " \n" - " bool sepEE = false;\n" - " int numEdgeEdgeDirections = convexShapes[shapeIndexA].m_numUniqueEdges*convexShapes[shapeIndexB].m_numUniqueEdges;\n" - " if (numEdgeEdgeDirections<=numUnitSphereDirections)\n" - " {\n" - " sepEE = findSeparatingAxisEdgeEdge( &convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,\n" - " indices,&sepNormal,&dmin);\n" - " \n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis[i] = 1;\n" - " separatingNormals[i] = sepNormal;\n" - " }\n" - " }\n" - " /*\n" - " ///else case is a separate kernel, to make Mac OSX OpenCL compiler happy\n" - " else\n" - " {\n" - " sepEE = findSeparatingAxisUnitSphere(&convexShapes[shapeIndexA], &convexShapes[shapeIndexB],posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " vertices,unitSphereDirections,numUnitSphereDirections,\n" - " &sepNormal,&dmin);\n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis[i] = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis[i] = 1;\n" - " separatingNormals[i] = sepNormal;\n" - " }\n" - " }\n" - " */\n" - " } //if (hasSeparatingAxis[i])\n" - " }//(i<numPairs)\n" - "}\n" - "inline int findClippingFaces(const float4 separatingNormal,\n" - " const ConvexPolyhedronCL* hullA, \n" - " __global const ConvexPolyhedronCL* hullB,\n" - " const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB,\n" - " __global float4* worldVertsA1,\n" - " __global float4* worldNormalsA1,\n" - " __global float4* worldVertsB1,\n" - " int capacityWorldVerts,\n" - " const float minDist, float maxDist,\n" - " const float4* verticesA,\n" - " const btGpuFace* facesA,\n" - " const int* indicesA,\n" - " __global const float4* verticesB,\n" - " __global const btGpuFace* facesB,\n" - " __global const int* indicesB,\n" - " __global int4* clippingFaces, int pairIndex)\n" - "{\n" - " int numContactsOut = 0;\n" - " int numWorldVertsB1= 0;\n" - " \n" - " \n" - " int closestFaceB=0;\n" - " float dmax = -FLT_MAX;\n" - " \n" - " {\n" - " for(int face=0;face<hullB->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x,\n" - " facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f);\n" - " const float4 WorldNormal = qtRotate(ornB, Normal);\n" - " float d = dot3F4(WorldNormal,separatingNormal);\n" - " if (d > dmax)\n" - " {\n" - " dmax = d;\n" - " closestFaceB = face;\n" - " }\n" - " }\n" - " }\n" - " \n" - " {\n" - " const btGpuFace polyB = facesB[hullB->m_faceOffset+closestFaceB];\n" - " int numVertices = polyB.m_numIndices;\n" - " if (numVertices>capacityWorldVerts)\n" - " numVertices = capacityWorldVerts;\n" - " \n" - " for(int e0=0;e0<numVertices;e0++)\n" - " {\n" - " if (e0<capacityWorldVerts)\n" - " {\n" - " const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]];\n" - " worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB);\n" - " }\n" - " }\n" - " }\n" - " \n" - " int closestFaceA=0;\n" - " {\n" - " float dmin = FLT_MAX;\n" - " for(int face=0;face<hullA->m_numFaces;face++)\n" - " {\n" - " const float4 Normal = make_float4(\n" - " facesA[hullA->m_faceOffset+face].m_plane.x,\n" - " facesA[hullA->m_faceOffset+face].m_plane.y,\n" - " facesA[hullA->m_faceOffset+face].m_plane.z,\n" - " 0.f);\n" - " const float4 faceANormalWS = qtRotate(ornA,Normal);\n" - " \n" - " float d = dot3F4(faceANormalWS,separatingNormal);\n" - " if (d < dmin)\n" - " {\n" - " dmin = d;\n" - " closestFaceA = face;\n" - " worldNormalsA1[pairIndex] = faceANormalWS;\n" - " }\n" - " }\n" - " }\n" - " \n" - " int numVerticesA = facesA[hullA->m_faceOffset+closestFaceA].m_numIndices;\n" - " if (numVerticesA>capacityWorldVerts)\n" - " numVerticesA = capacityWorldVerts;\n" - " \n" - " for(int e0=0;e0<numVerticesA;e0++)\n" - " {\n" - " if (e0<capacityWorldVerts)\n" - " {\n" - " const float4 a = verticesA[hullA->m_vertexOffset+indicesA[facesA[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]];\n" - " worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA);\n" - " }\n" - " }\n" - " \n" - " clippingFaces[pairIndex].x = closestFaceA;\n" - " clippingFaces[pairIndex].y = closestFaceB;\n" - " clippingFaces[pairIndex].z = numVerticesA;\n" - " clippingFaces[pairIndex].w = numWorldVertsB1;\n" - " \n" - " \n" - " return numContactsOut;\n" - "}\n" - "// work-in-progress\n" - "__kernel void findConcaveSeparatingAxisKernel( __global int4* concavePairs,\n" - " __global const BodyData* rigidBodies,\n" - " __global const btCollidableGpu* collidables,\n" - " __global const ConvexPolyhedronCL* convexShapes, \n" - " __global const float4* vertices,\n" - " __global const float4* uniqueEdges,\n" - " __global const btGpuFace* faces,\n" - " __global const int* indices,\n" - " __global const btGpuChildShape* gpuChildShapes,\n" - " __global btAabbCL* aabbs,\n" - " __global float4* concaveSeparatingNormalsOut,\n" - " __global int* concaveHasSeparatingNormals,\n" - " __global int4* clippingFacesOut,\n" - " __global float4* worldVertsA1GPU,\n" - " __global float4* worldNormalsAGPU,\n" - " __global float4* worldVertsB1GPU,\n" - " int vertexFaceCapacity,\n" - " int numConcavePairs\n" - " )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConcavePairs)\n" - " return;\n" - " concaveHasSeparatingNormals[i] = 0;\n" - " int pairIdx = i;\n" - " int bodyIndexA = concavePairs[i].x;\n" - " int bodyIndexB = concavePairs[i].y;\n" - " int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx;\n" - " int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx;\n" - " int shapeIndexA = collidables[collidableIndexA].m_shapeIndex;\n" - " int shapeIndexB = collidables[collidableIndexB].m_shapeIndex;\n" - " if (collidables[collidableIndexB].m_shapeType!=SHAPE_CONVEX_HULL&&\n" - " collidables[collidableIndexB].m_shapeType!=SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " concavePairs[pairIdx].w = -1;\n" - " return;\n" - " }\n" - " int numFacesA = convexShapes[shapeIndexA].m_numFaces;\n" - " int numActualConcaveConvexTests = 0;\n" - " \n" - " int f = concavePairs[i].z;\n" - " \n" - " bool overlap = false;\n" - " \n" - " ConvexPolyhedronCL convexPolyhedronA;\n" - " //add 3 vertices of the triangle\n" - " convexPolyhedronA.m_numVertices = 3;\n" - " convexPolyhedronA.m_vertexOffset = 0;\n" - " float4 localCenter = make_float4(0.f,0.f,0.f,0.f);\n" - " btGpuFace face = faces[convexShapes[shapeIndexA].m_faceOffset+f];\n" - " float4 triMinAabb, triMaxAabb;\n" - " btAabbCL triAabb;\n" - " triAabb.m_min = make_float4(1e30f,1e30f,1e30f,0.f);\n" - " triAabb.m_max = make_float4(-1e30f,-1e30f,-1e30f,0.f);\n" - " \n" - " float4 verticesA[3];\n" - " for (int i=0;i<3;i++)\n" - " {\n" - " int index = indices[face.m_indexOffset+i];\n" - " float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index];\n" - " verticesA[i] = vert;\n" - " localCenter += vert;\n" - " \n" - " triAabb.m_min = min(triAabb.m_min,vert); \n" - " triAabb.m_max = max(triAabb.m_max,vert); \n" - " }\n" - " overlap = true;\n" - " overlap = (triAabb.m_min.x > aabbs[bodyIndexB].m_max.x || triAabb.m_max.x < aabbs[bodyIndexB].m_min.x) ? false : overlap;\n" - " overlap = (triAabb.m_min.z > aabbs[bodyIndexB].m_max.z || triAabb.m_max.z < aabbs[bodyIndexB].m_min.z) ? false : overlap;\n" - " overlap = (triAabb.m_min.y > aabbs[bodyIndexB].m_max.y || triAabb.m_max.y < aabbs[bodyIndexB].m_min.y) ? false : overlap;\n" - " \n" - " if (overlap)\n" - " {\n" - " float dmin = FLT_MAX;\n" - " int hasSeparatingAxis=5;\n" - " float4 sepAxis=make_float4(1,2,3,4);\n" - " int localCC=0;\n" - " numActualConcaveConvexTests++;\n" - " //a triangle has 3 unique edges\n" - " convexPolyhedronA.m_numUniqueEdges = 3;\n" - " convexPolyhedronA.m_uniqueEdgesOffset = 0;\n" - " float4 uniqueEdgesA[3];\n" - " \n" - " uniqueEdgesA[0] = (verticesA[1]-verticesA[0]);\n" - " uniqueEdgesA[1] = (verticesA[2]-verticesA[1]);\n" - " uniqueEdgesA[2] = (verticesA[0]-verticesA[2]);\n" - " convexPolyhedronA.m_faceOffset = 0;\n" - " \n" - " float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f);\n" - " \n" - " btGpuFace facesA[TRIANGLE_NUM_CONVEX_FACES];\n" - " int indicesA[3+3+2+2+2];\n" - " int curUsedIndices=0;\n" - " int fidx=0;\n" - " //front size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[0] = 0;\n" - " indicesA[1] = 1;\n" - " indicesA[2] = 2;\n" - " curUsedIndices+=3;\n" - " float c = face.m_plane.w;\n" - " facesA[fidx].m_plane.x = normal.x;\n" - " facesA[fidx].m_plane.y = normal.y;\n" - " facesA[fidx].m_plane.z = normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " //back size of triangle\n" - " {\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[3]=2;\n" - " indicesA[4]=1;\n" - " indicesA[5]=0;\n" - " curUsedIndices+=3;\n" - " float c = dot(normal,verticesA[0]);\n" - " float c1 = -face.m_plane.w;\n" - " facesA[fidx].m_plane.x = -normal.x;\n" - " facesA[fidx].m_plane.y = -normal.y;\n" - " facesA[fidx].m_plane.z = -normal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " facesA[fidx].m_numIndices=3;\n" - " }\n" - " fidx++;\n" - " bool addEdgePlanes = true;\n" - " if (addEdgePlanes)\n" - " {\n" - " int numVertices=3;\n" - " int prevVertex = numVertices-1;\n" - " for (int i=0;i<numVertices;i++)\n" - " {\n" - " float4 v0 = verticesA[i];\n" - " float4 v1 = verticesA[prevVertex];\n" - " \n" - " float4 edgeNormal = normalize(cross(normal,v1-v0));\n" - " float c = -dot(edgeNormal,v0);\n" - " facesA[fidx].m_numIndices = 2;\n" - " facesA[fidx].m_indexOffset=curUsedIndices;\n" - " indicesA[curUsedIndices++]=i;\n" - " indicesA[curUsedIndices++]=prevVertex;\n" - " \n" - " facesA[fidx].m_plane.x = edgeNormal.x;\n" - " facesA[fidx].m_plane.y = edgeNormal.y;\n" - " facesA[fidx].m_plane.z = edgeNormal.z;\n" - " facesA[fidx].m_plane.w = c;\n" - " fidx++;\n" - " prevVertex = i;\n" - " }\n" - " }\n" - " convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES;\n" - " convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f);\n" - " float4 posA = rigidBodies[bodyIndexA].m_pos;\n" - " posA.w = 0.f;\n" - " float4 posB = rigidBodies[bodyIndexB].m_pos;\n" - " posB.w = 0.f;\n" - " float4 ornA = rigidBodies[bodyIndexA].m_quat;\n" - " float4 ornB =rigidBodies[bodyIndexB].m_quat;\n" - " \n" - " ///////////////////\n" - " ///compound shape support\n" - " if (collidables[collidableIndexB].m_shapeType==SHAPE_COMPOUND_OF_CONVEX_HULLS)\n" - " {\n" - " int compoundChild = concavePairs[pairIdx].w;\n" - " int childShapeIndexB = compoundChild;//collidables[collidableIndexB].m_shapeIndex+compoundChild;\n" - " int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex;\n" - " float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition;\n" - " float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation;\n" - " float4 newPosB = transform(&childPosB,&posB,&ornB);\n" - " float4 newOrnB = qtMul(ornB,childOrnB);\n" - " posB = newPosB;\n" - " ornB = newOrnB;\n" - " shapeIndexB = collidables[childColIndexB].m_shapeIndex;\n" - " }\n" - " //////////////////\n" - " float4 c0local = convexPolyhedronA.m_localCenter;\n" - " float4 c0 = transform(&c0local, &posA, &ornA);\n" - " float4 c1local = convexShapes[shapeIndexB].m_localCenter;\n" - " float4 c1 = transform(&c1local,&posB,&ornB);\n" - " const float4 DeltaC2 = c0 - c1;\n" - " bool sepA = findSeparatingAxisLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " verticesA,uniqueEdgesA,facesA,indicesA,\n" - " vertices,uniqueEdges,faces,indices,\n" - " &sepAxis,&dmin);\n" - " hasSeparatingAxis = 4;\n" - " if (!sepA)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " bool sepB = findSeparatingAxisLocalB( &convexShapes[shapeIndexB],&convexPolyhedronA,\n" - " posB,ornB,\n" - " posA,ornA,\n" - " DeltaC2,\n" - " vertices,uniqueEdges,faces,indices,\n" - " verticesA,uniqueEdgesA,facesA,indicesA,\n" - " &sepAxis,&dmin);\n" - " if (!sepB)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " bool sepEE = findSeparatingAxisEdgeEdgeLocalA( &convexPolyhedronA, &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " DeltaC2,\n" - " verticesA,uniqueEdgesA,facesA,indicesA,\n" - " vertices,uniqueEdges,faces,indices,\n" - " &sepAxis,&dmin);\n" - " \n" - " if (!sepEE)\n" - " {\n" - " hasSeparatingAxis = 0;\n" - " } else\n" - " {\n" - " hasSeparatingAxis = 1;\n" - " }\n" - " }\n" - " } \n" - " \n" - " if (hasSeparatingAxis)\n" - " {\n" - " sepAxis.w = dmin;\n" - " concaveSeparatingNormalsOut[pairIdx]=sepAxis;\n" - " concaveHasSeparatingNormals[i]=1;\n" - " float minDist = -1e30f;\n" - " float maxDist = 0.02f;\n" - " \n" - " findClippingFaces(sepAxis,\n" - " &convexPolyhedronA,\n" - " &convexShapes[shapeIndexB],\n" - " posA,ornA,\n" - " posB,ornB,\n" - " worldVertsA1GPU,\n" - " worldNormalsAGPU,\n" - " worldVertsB1GPU,\n" - " vertexFaceCapacity,\n" - " minDist, maxDist,\n" - " verticesA,\n" - " facesA,\n" - " indicesA,\n" - " vertices,\n" - " faces,\n" - " indices,\n" - " clippingFacesOut, pairIdx);\n" - " } else\n" - " { \n" - " //mark this pair as in-active\n" - " concavePairs[pairIdx].w = -1;\n" - " }\n" - " }\n" - " else\n" - " { \n" - " //mark this pair as in-active\n" - " concavePairs[pairIdx].w = -1;\n" - " }\n" - " \n" - " concavePairs[pairIdx].z = -1;//now z is used for existing/persistent contacts\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp deleted file mode 100644 index c0e11bfb26..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada -//Host-code rewritten by Erwin Coumans - -#define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl" -#define KERNEL0 "SearchSortDataLowerKernel" -#define KERNEL1 "SearchSortDataUpperKernel" -#define KERNEL2 "SubtractKernel" - -#include "b3BoundSearchCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3LauncherCL.h" -#include "kernels/BoundSearchKernelsCL.h" - -b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize) - : m_context(ctx), - m_device(device), - m_queue(queue) -{ - const char* additionalMacros = ""; - //const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* kernelSource = boundSearchKernelsCL; - - cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, BOUNDSEARCH_PATH); - b3Assert(boundSearchProg); - - m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg, additionalMacros); - b3Assert(m_lowerSortDataKernel); - - m_upperSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg, additionalMacros); - b3Assert(m_upperSortDataKernel); - - m_subtractKernel = 0; - - if (maxSize) - { - m_subtractKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg, additionalMacros); - b3Assert(m_subtractKernel); - } - - //m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST ); - - m_lower = (maxSize == 0) ? 0 : new b3OpenCLArray<unsigned int>(ctx, queue, maxSize); - m_upper = (maxSize == 0) ? 0 : new b3OpenCLArray<unsigned int>(ctx, queue, maxSize); - - m_filler = new b3FillCL(ctx, device, queue); -} - -b3BoundSearchCL::~b3BoundSearchCL() -{ - delete m_lower; - delete m_upper; - delete m_filler; - - clReleaseKernel(m_lowerSortDataKernel); - clReleaseKernel(m_upperSortDataKernel); - clReleaseKernel(m_subtractKernel); -} - -void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option) -{ - b3Int4 constBuffer; - constBuffer.x = nSrc; - constBuffer.y = nDst; - - if (option == BOUND_LOWER) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_lowerSortDataKernel, "m_lowerSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nSrc); - launcher.setConst(nDst); - - launcher.launch1D(nSrc, 64); - } - else if (option == BOUND_UPPER) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_upperSortDataKernel, "m_upperSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nSrc); - launcher.setConst(nDst); - - launcher.launch1D(nSrc, 64); - } - else if (option == COUNT) - { - b3Assert(m_lower); - b3Assert(m_upper); - b3Assert(m_lower->capacity() <= (int)nDst); - b3Assert(m_upper->capacity() <= (int)nDst); - - int zero = 0; - m_filler->execute(*m_lower, zero, nDst); - m_filler->execute(*m_upper, zero, nDst); - - execute(src, nSrc, *m_lower, nDst, BOUND_LOWER); - execute(src, nSrc, *m_upper, nDst, BOUND_UPPER); - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_upper->getBufferCL(), true), b3BufferInfoCL(m_lower->getBufferCL(), true), b3BufferInfoCL(dst.getBufferCL())}; - - b3LauncherCL launcher(m_queue, m_subtractKernel, "m_subtractKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(nSrc); - launcher.setConst(nDst); - - launcher.launch1D(nDst, 64); - } - } - else - { - b3Assert(0); - } -} - -void b3BoundSearchCL::executeHost(b3AlignedObjectArray<b3SortData>& src, int nSrc, - b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option) -{ - for (int i = 0; i < nSrc - 1; i++) - b3Assert(src[i].m_key <= src[i + 1].m_key); - - b3SortData minData, zeroData, maxData; - minData.m_key = -1; - minData.m_value = -1; - zeroData.m_key = 0; - zeroData.m_value = 0; - maxData.m_key = nDst; - maxData.m_value = nDst; - - if (option == BOUND_LOWER) - { - for (int i = 0; i < nSrc; i++) - { - b3SortData& iData = (i == 0) ? minData : src[i - 1]; - b3SortData& jData = (i == nSrc) ? maxData : src[i]; - - if (iData.m_key != jData.m_key) - { - int k = jData.m_key; - { - dst[k] = i; - } - } - } - } - else if (option == BOUND_UPPER) - { - for (int i = 1; i < nSrc + 1; i++) - { - b3SortData& iData = src[i - 1]; - b3SortData& jData = (i == nSrc) ? maxData : src[i]; - - if (iData.m_key != jData.m_key) - { - int k = iData.m_key; - { - dst[k] = i; - } - } - } - } - else if (option == COUNT) - { - b3AlignedObjectArray<unsigned int> lower; - lower.resize(nDst); - b3AlignedObjectArray<unsigned int> upper; - upper.resize(nDst); - - for (int i = 0; i < nDst; i++) - { - lower[i] = upper[i] = 0; - } - - executeHost(src, nSrc, lower, nDst, BOUND_LOWER); - executeHost(src, nSrc, upper, nDst, BOUND_UPPER); - - for (int i = 0; i < nDst; i++) - { - dst[i] = upper[i] - lower[i]; - } - } - else - { - b3Assert(0); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h deleted file mode 100644 index 0d633e3d23..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h +++ /dev/null @@ -1,64 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#ifndef B3_BOUNDSEARCH_H -#define B3_BOUNDSEARCH_H - -#pragma once - -/*#include <Adl/Adl.h> -#include <AdlPrimitives/Math/Math.h> -#include <AdlPrimitives/Sort/SortData.h> -#include <AdlPrimitives/Fill/Fill.h> -*/ - -#include "b3OpenCLArray.h" -#include "b3FillCL.h" -#include "b3RadixSort32CL.h" //for b3SortData (perhaps move it?) -class b3BoundSearchCL -{ -public: - enum Option - { - BOUND_LOWER, - BOUND_UPPER, - COUNT, - }; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - cl_kernel m_lowerSortDataKernel; - cl_kernel m_upperSortDataKernel; - cl_kernel m_subtractKernel; - - b3OpenCLArray<b3Int4>* m_constbtOpenCLArray; - b3OpenCLArray<unsigned int>* m_lower; - b3OpenCLArray<unsigned int>* m_upper; - - b3FillCL* m_filler; - - b3BoundSearchCL(cl_context context, cl_device_id device, cl_command_queue queue, int size); - - virtual ~b3BoundSearchCL(); - - // src has to be src[i].m_key <= src[i+1].m_key - void execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER); - - void executeHost(b3AlignedObjectArray<b3SortData>& src, int nSrc, b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option = BOUND_LOWER); -}; - -#endif //B3_BOUNDSEARCH_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h deleted file mode 100644 index 35fc467b20..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BufferInfoCL.h +++ /dev/null @@ -1,18 +0,0 @@ - -#ifndef B3_BUFFER_INFO_CL_H -#define B3_BUFFER_INFO_CL_H - -#include "b3OpenCLArray.h" - -struct b3BufferInfoCL -{ - //b3BufferInfoCL(){} - - // template<typename T> - b3BufferInfoCL(cl_mem buff, bool isReadOnly = false) : m_clBuffer(buff), m_isReadOnly(isReadOnly) {} - - cl_mem m_clBuffer; - bool m_isReadOnly; -}; - -#endif //B3_BUFFER_INFO_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp deleted file mode 100644 index bd25bb2101..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.cpp +++ /dev/null @@ -1,119 +0,0 @@ -#include "b3FillCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3BufferInfoCL.h" -#include "b3LauncherCL.h" - -#define FILL_CL_PROGRAM_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl" - -#include "kernels/FillKernelsCL.h" - -b3FillCL::b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue) - : m_commandQueue(queue) -{ - const char* kernelSource = fillKernelsCL; - cl_int pErrNum; - const char* additionalMacros = ""; - - cl_program fillProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, FILL_CL_PROGRAM_PATH); - b3Assert(fillProg); - - m_fillIntKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillIntKernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillIntKernel); - - m_fillUnsignedIntKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillUnsignedIntKernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillIntKernel); - - m_fillFloatKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillFloatKernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillFloatKernel); - - m_fillKernelInt2 = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "FillInt2Kernel", &pErrNum, fillProg, additionalMacros); - b3Assert(m_fillKernelInt2); -} - -b3FillCL::~b3FillCL() -{ - clReleaseKernel(m_fillKernelInt2); - clReleaseKernel(m_fillIntKernel); - clReleaseKernel(m_fillUnsignedIntKernel); - clReleaseKernel(m_fillFloatKernel); -} - -void b3FillCL::execute(b3OpenCLArray<float>& src, const float value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3LauncherCL launcher(m_commandQueue, m_fillFloatKernel, "m_fillFloatKernel"); - launcher.setBuffer(src.getBufferCL()); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - launcher.launch1D(n); - } -} - -void b3FillCL::execute(b3OpenCLArray<int>& src, const int value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3LauncherCL launcher(m_commandQueue, m_fillIntKernel, "m_fillIntKernel"); - launcher.setBuffer(src.getBufferCL()); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - launcher.launch1D(n); - } -} - -void b3FillCL::execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_fillUnsignedIntKernel, "m_fillUnsignedIntKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - launcher.launch1D(n); - } -} - -void b3FillCL::executeHost(b3AlignedObjectArray<b3Int2>& src, const b3Int2& value, int n, int offset) -{ - for (int i = 0; i < n; i++) - { - src[i + offset] = value; - } -} - -void b3FillCL::executeHost(b3AlignedObjectArray<int>& src, const int value, int n, int offset) -{ - for (int i = 0; i < n; i++) - { - src[i + offset] = value; - } -} - -void b3FillCL::execute(b3OpenCLArray<b3Int2>& src, const b3Int2& value, int n, int offset) -{ - b3Assert(n > 0); - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src.getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_fillKernelInt2, "m_fillKernelInt2"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(n); - launcher.setConst(value); - launcher.setConst(offset); - - //( constBuffer ); - launcher.launch1D(n); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h deleted file mode 100644 index c92c3e5119..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3FillCL.h +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef B3_FILL_CL_H -#define B3_FILL_CL_H - -#include "b3OpenCLArray.h" -#include "Bullet3Common/b3Scalar.h" - -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" - -class b3FillCL -{ - cl_command_queue m_commandQueue; - - cl_kernel m_fillKernelInt2; - cl_kernel m_fillIntKernel; - cl_kernel m_fillUnsignedIntKernel; - cl_kernel m_fillFloatKernel; - -public: - struct b3ConstData - { - union { - b3Int4 m_data; - b3UnsignedInt4 m_UnsignedData; - }; - int m_offset; - int m_n; - int m_padding[2]; - }; - -protected: -public: - b3FillCL(cl_context ctx, cl_device_id device, cl_command_queue queue); - - virtual ~b3FillCL(); - - void execute(b3OpenCLArray<unsigned int>& src, const unsigned int value, int n, int offset = 0); - - void execute(b3OpenCLArray<int>& src, const int value, int n, int offset = 0); - - void execute(b3OpenCLArray<float>& src, const float value, int n, int offset = 0); - - void execute(b3OpenCLArray<b3Int2>& src, const b3Int2& value, int n, int offset = 0); - - void executeHost(b3AlignedObjectArray<b3Int2>& src, const b3Int2& value, int n, int offset); - - void executeHost(b3AlignedObjectArray<int>& src, const int value, int n, int offset); - - // void execute(b3OpenCLArray<b3Int4>& src, const b3Int4& value, int n, int offset = 0); -}; - -#endif //B3_FILL_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp deleted file mode 100644 index c97d02eb45..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.cpp +++ /dev/null @@ -1,296 +0,0 @@ -#include "b3LauncherCL.h" - -bool gDebugLauncherCL = false; - -b3LauncherCL::b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name) - : m_commandQueue(queue), - m_kernel(kernel), - m_idx(0), - m_enableSerialization(false), - m_name(name) -{ - if (gDebugLauncherCL) - { - static int counter = 0; - printf("[%d] Prepare to launch OpenCL kernel %s\n", counter++, name); - } - - m_serializationSizeInBytes = sizeof(int); -} - -b3LauncherCL::~b3LauncherCL() -{ - for (int i = 0; i < m_arrays.size(); i++) - { - delete (m_arrays[i]); - } - - m_arrays.clear(); - if (gDebugLauncherCL) - { - static int counter = 0; - printf("[%d] Finished launching OpenCL kernel %s\n", counter++, m_name); - } -} - -void b3LauncherCL::setBuffer(cl_mem clBuffer) -{ - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo(kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert(err == CL_SUCCESS); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes += sizeof(b3KernelArgData); - m_serializationSizeInBytes += param_value; - } - cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &clBuffer); - b3Assert(status == CL_SUCCESS); -} - -void b3LauncherCL::setBuffers(b3BufferInfoCL* buffInfo, int n) -{ - for (int i = 0; i < n; i++) - { - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 1; - kernelArg.m_clBuffer = buffInfo[i].m_clBuffer; - - cl_mem_info param_name = CL_MEM_SIZE; - size_t param_value; - size_t sizeInBytes = sizeof(size_t); - size_t actualSizeInBytes; - cl_int err; - err = clGetMemObjectInfo(kernelArg.m_clBuffer, - param_name, - sizeInBytes, - ¶m_value, - &actualSizeInBytes); - - b3Assert(err == CL_SUCCESS); - kernelArg.m_argSizeInBytes = param_value; - - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes += sizeof(b3KernelArgData); - m_serializationSizeInBytes += param_value; - } - cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &buffInfo[i].m_clBuffer); - b3Assert(status == CL_SUCCESS); - } -} - -struct b3KernelArgDataUnaligned -{ - int m_isBuffer; - int m_argIndex; - int m_argSizeInBytes; - int m_unusedPadding; - union { - cl_mem m_clBuffer; - unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; - }; -}; -#include <string.h> - -int b3LauncherCL::deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx) -{ - int index = 0; - - int numArguments = *(int*)&buf[index]; - index += sizeof(int); - - for (int i = 0; i < numArguments; i++) - { - b3KernelArgDataUnaligned* arg = (b3KernelArgDataUnaligned*)&buf[index]; - - index += sizeof(b3KernelArgData); - if (arg->m_isBuffer) - { - b3OpenCLArray<unsigned char>* clData = new b3OpenCLArray<unsigned char>(ctx, m_commandQueue, arg->m_argSizeInBytes); - clData->resize(arg->m_argSizeInBytes); - - clData->copyFromHostPointer(&buf[index], arg->m_argSizeInBytes); - - arg->m_clBuffer = clData->getBufferCL(); - - m_arrays.push_back(clData); - - cl_int status = clSetKernelArg(m_kernel, m_idx++, sizeof(cl_mem), &arg->m_clBuffer); - b3Assert(status == CL_SUCCESS); - index += arg->m_argSizeInBytes; - } - else - { - cl_int status = clSetKernelArg(m_kernel, m_idx++, arg->m_argSizeInBytes, &arg->m_argData); - b3Assert(status == CL_SUCCESS); - } - b3KernelArgData b; - memcpy(&b, arg, sizeof(b3KernelArgDataUnaligned)); - m_kernelArguments.push_back(b); - } - m_serializationSizeInBytes = index; - return index; -} - -int b3LauncherCL::validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx) -{ - int index = 0; - - int numArguments = *(int*)&goldBuffer[index]; - index += sizeof(int); - - if (numArguments != m_kernelArguments.size()) - { - printf("failed validation: expected %d arguments, found %d\n", numArguments, m_kernelArguments.size()); - return -1; - } - - for (int ii = 0; ii < numArguments; ii++) - { - b3KernelArgData* argGold = (b3KernelArgData*)&goldBuffer[index]; - - if (m_kernelArguments[ii].m_argSizeInBytes != argGold->m_argSizeInBytes) - { - printf("failed validation: argument %d sizeInBytes expected: %d, found %d\n", ii, argGold->m_argSizeInBytes, m_kernelArguments[ii].m_argSizeInBytes); - return -2; - } - - { - int expected = argGold->m_isBuffer; - int found = m_kernelArguments[ii].m_isBuffer; - - if (expected != found) - { - printf("failed validation: argument %d isBuffer expected: %d, found %d\n", ii, expected, found); - return -3; - } - } - index += sizeof(b3KernelArgData); - - if (argGold->m_isBuffer) - { - unsigned char* memBuf = (unsigned char*)malloc(m_kernelArguments[ii].m_argSizeInBytes); - unsigned char* goldBuf = &goldBuffer[index]; - for (int j = 0; j < m_kernelArguments[j].m_argSizeInBytes; j++) - { - memBuf[j] = 0xaa; - } - - cl_int status = 0; - status = clEnqueueReadBuffer(m_commandQueue, m_kernelArguments[ii].m_clBuffer, CL_TRUE, 0, m_kernelArguments[ii].m_argSizeInBytes, - memBuf, 0, 0, 0); - b3Assert(status == CL_SUCCESS); - clFinish(m_commandQueue); - - for (int b = 0; b < m_kernelArguments[ii].m_argSizeInBytes; b++) - { - int expected = goldBuf[b]; - int found = memBuf[b]; - if (expected != found) - { - printf("failed validation: argument %d OpenCL data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -4; - } - } - - index += argGold->m_argSizeInBytes; - } - else - { - //compare content - for (int b = 0; b < m_kernelArguments[ii].m_argSizeInBytes; b++) - { - int expected = argGold->m_argData[b]; - int found = m_kernelArguments[ii].m_argData[b]; - if (expected != found) - { - printf("failed validation: argument %d const data at byte position %d expected: %d, found %d\n", - ii, b, expected, found); - return -5; - } - } - } - } - return index; -} - -int b3LauncherCL::serializeArguments(unsigned char* destBuffer, int destBufferCapacity) -{ - //initialize to known values - for (int i = 0; i < destBufferCapacity; i++) - destBuffer[i] = 0xec; - - assert(destBufferCapacity >= m_serializationSizeInBytes); - - //todo: use the b3Serializer for this to allow for 32/64bit, endianness etc - int numArguments = m_kernelArguments.size(); - int curBufferSize = 0; - int* dest = (int*)&destBuffer[curBufferSize]; - *dest = numArguments; - curBufferSize += sizeof(int); - - for (int i = 0; i < this->m_kernelArguments.size(); i++) - { - b3KernelArgData* arg = (b3KernelArgData*)&destBuffer[curBufferSize]; - *arg = m_kernelArguments[i]; - curBufferSize += sizeof(b3KernelArgData); - if (arg->m_isBuffer == 1) - { - //copy the OpenCL buffer content - cl_int status = 0; - status = clEnqueueReadBuffer(m_commandQueue, arg->m_clBuffer, 0, 0, arg->m_argSizeInBytes, - &destBuffer[curBufferSize], 0, 0, 0); - b3Assert(status == CL_SUCCESS); - clFinish(m_commandQueue); - curBufferSize += arg->m_argSizeInBytes; - } - } - return curBufferSize; -} - -void b3LauncherCL::serializeToFile(const char* fileName, int numWorkItems) -{ - int num = numWorkItems; - int buffSize = getSerializationBufferSize(); - unsigned char* buf = new unsigned char[buffSize + sizeof(int)]; - for (int i = 0; i < buffSize + 1; i++) - { - unsigned char* ptr = (unsigned char*)&buf[i]; - *ptr = 0xff; - } - // int actualWrite = serializeArguments(buf,buffSize); - - // unsigned char* cptr = (unsigned char*)&buf[buffSize]; - // printf("buf[buffSize] = %d\n",*cptr); - - assert(buf[buffSize] == 0xff); //check for buffer overrun - int* ptr = (int*)&buf[buffSize]; - - *ptr = num; - - FILE* f = fopen(fileName, "wb"); - fwrite(buf, buffSize + sizeof(int), 1, f); - fclose(f); - - delete[] buf; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h deleted file mode 100644 index 18e9c1db2b..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h +++ /dev/null @@ -1,128 +0,0 @@ - -#ifndef B3_LAUNCHER_CL_H -#define B3_LAUNCHER_CL_H - -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3MinMax.h" -#include "b3OpenCLArray.h" -#include <stdio.h> - -#define B3_DEBUG_SERIALIZE_CL - -#ifdef _WIN32 -#pragma warning(disable : 4996) -#endif -#define B3_CL_MAX_ARG_SIZE 16 -B3_ATTRIBUTE_ALIGNED16(struct) -b3KernelArgData -{ - int m_isBuffer; - int m_argIndex; - int m_argSizeInBytes; - int m_unusedPadding; - union { - cl_mem m_clBuffer; - unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; - }; -}; - -class b3LauncherCL -{ - cl_command_queue m_commandQueue; - cl_kernel m_kernel; - int m_idx; - - b3AlignedObjectArray<b3KernelArgData> m_kernelArguments; - int m_serializationSizeInBytes; - bool m_enableSerialization; - - const char* m_name; - -public: - b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays; - - b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name); - - virtual ~b3LauncherCL(); - - void setBuffer(cl_mem clBuffer); - - void setBuffers(b3BufferInfoCL* buffInfo, int n); - - int getSerializationBufferSize() const - { - return m_serializationSizeInBytes; - } - - int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx); - - inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx); - - int serializeArguments(unsigned char* destBuffer, int destBufferCapacity); - - int getNumArguments() const - { - return m_kernelArguments.size(); - } - - b3KernelArgData getArgument(int index) - { - return m_kernelArguments[index]; - } - - void serializeToFile(const char* fileName, int numWorkItems); - - template <typename T> - inline void setConst(const T& consts) - { - int sz = sizeof(T); - b3Assert(sz <= B3_CL_MAX_ARG_SIZE); - - if (m_enableSerialization) - { - b3KernelArgData kernelArg; - kernelArg.m_argIndex = m_idx; - kernelArg.m_isBuffer = 0; - T* destArg = (T*)kernelArg.m_argData; - *destArg = consts; - kernelArg.m_argSizeInBytes = sizeof(T); - m_kernelArguments.push_back(kernelArg); - m_serializationSizeInBytes += sizeof(b3KernelArgData); - } - - cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts); - b3Assert(status == CL_SUCCESS); - } - - inline void launch1D(int numThreads, int localSize = 64) - { - launch2D(numThreads, 1, localSize, 1); - } - - inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY) - { - size_t gRange[3] = {1, 1, 1}; - size_t lRange[3] = {1, 1, 1}; - lRange[0] = localSizeX; - lRange[1] = localSizeY; - gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1)); - gRange[0] *= lRange[0]; - gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1)); - gRange[1] *= lRange[1]; - - cl_int status = clEnqueueNDRangeKernel(m_commandQueue, - m_kernel, 2, NULL, gRange, lRange, 0, 0, 0); - if (status != CL_SUCCESS) - { - printf("Error: OpenCL status = %d\n", status); - } - b3Assert(status == CL_SUCCESS); - } - - void enableSerialization(bool serialize) - { - m_enableSerialization = serialize; - } -}; - -#endif //B3_LAUNCHER_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h deleted file mode 100644 index e837cceb66..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h +++ /dev/null @@ -1,300 +0,0 @@ -#ifndef B3_OPENCL_ARRAY_H -#define B3_OPENCL_ARRAY_H - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -template <typename T> -class b3OpenCLArray -{ - size_t m_size; - size_t m_capacity; - cl_mem m_clBuffer; - - cl_context m_clContext; - cl_command_queue m_commandQueue; - - bool m_ownsMemory; - - bool m_allowGrowingCapacity; - - void deallocate() - { - if (m_clBuffer && m_ownsMemory) - { - clReleaseMemObject(m_clBuffer); - } - m_clBuffer = 0; - m_capacity = 0; - } - - b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src); - - B3_FORCE_INLINE size_t allocSize(size_t size) - { - return (size ? size * 2 : 1); - } - -public: - b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity = 0, bool allowGrowingCapacity = true) - : m_size(0), m_capacity(0), m_clBuffer(0), m_clContext(ctx), m_commandQueue(queue), m_ownsMemory(true), m_allowGrowingCapacity(true) - { - if (initialCapacity) - { - reserve(initialCapacity); - } - m_allowGrowingCapacity = allowGrowingCapacity; - } - - ///this is an error-prone method with no error checking, be careful! - void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements) - { - deallocate(); - m_ownsMemory = false; - m_allowGrowingCapacity = false; - m_clBuffer = buffer; - m_size = sizeInElements; - m_capacity = sizeInElements; - } - - // we could enable this assignment, but need to make sure to avoid accidental deep copies - // b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src) - // { - // copyFromArray(src); - // return *this; - // } - - cl_mem getBufferCL() const - { - return m_clBuffer; - } - - virtual ~b3OpenCLArray() - { - deallocate(); - m_size = 0; - m_capacity = 0; - } - - B3_FORCE_INLINE bool push_back(const T& _Val, bool waitForCompletion = true) - { - bool result = true; - size_t sz = size(); - if (sz == capacity()) - { - result = reserve(allocSize(size())); - } - copyFromHostPointer(&_Val, 1, sz, waitForCompletion); - m_size++; - return result; - } - - B3_FORCE_INLINE T forcedAt(size_t n) const - { - b3Assert(n >= 0); - b3Assert(n < capacity()); - T elem; - copyToHostPointer(&elem, 1, n, true); - return elem; - } - - B3_FORCE_INLINE T at(size_t n) const - { - b3Assert(n >= 0); - b3Assert(n < size()); - T elem; - copyToHostPointer(&elem, 1, n, true); - return elem; - } - - B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents = true) - { - bool result = true; - size_t curSize = size(); - - if (newsize < curSize) - { - //leave the OpenCL memory for now - } - else - { - if (newsize > size()) - { - result = reserve(newsize, copyOldContents); - } - - //leave new data uninitialized (init in debug mode?) - //for (size_t i=curSize;i<newsize;i++) ... - } - - if (result) - { - m_size = newsize; - } - else - { - m_size = 0; - } - return result; - } - - B3_FORCE_INLINE size_t size() const - { - return m_size; - } - - B3_FORCE_INLINE size_t capacity() const - { - return m_capacity; - } - - B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents = true) - { - bool result = true; - // determine new minimum length of allocated storage - if (capacity() < _Count) - { // not enough room, reallocate - - if (m_allowGrowingCapacity) - { - cl_int ciErrNum; - //create a new OpenCL buffer - size_t memSizeInBytes = sizeof(T) * _Count; - cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum); - if (ciErrNum != CL_SUCCESS) - { - b3Error("OpenCL out-of-memory\n"); - _Count = 0; - result = false; - } -//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS -#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS - unsigned char* src = (unsigned char*)malloc(memSizeInBytes); - for (size_t i = 0; i < memSizeInBytes; i++) - src[i] = 0xbb; - ciErrNum = clEnqueueWriteBuffer(m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0, 0, 0); - b3Assert(ciErrNum == CL_SUCCESS); - clFinish(m_commandQueue); - free(src); -#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS - - if (result) - { - if (copyOldContents) - copyToCL(buf, size()); - } - - //deallocate the old buffer - deallocate(); - - m_clBuffer = buf; - - m_capacity = _Count; - } - else - { - //fail: assert and - b3Assert(0); - deallocate(); - result = false; - } - } - return result; - } - - void copyToCL(cl_mem destination, size_t numElements, size_t firstElem = 0, size_t dstOffsetInElems = 0) const - { - if (numElements <= 0) - return; - - b3Assert(m_clBuffer); - b3Assert(destination); - - //likely some error, destination is same as source - b3Assert(m_clBuffer != destination); - - b3Assert((firstElem + numElements) <= m_size); - - cl_int status = 0; - - b3Assert(numElements > 0); - b3Assert(numElements <= m_size); - - size_t srcOffsetBytes = sizeof(T) * firstElem; - size_t dstOffsetInBytes = sizeof(T) * dstOffsetInElems; - - status = clEnqueueCopyBuffer(m_commandQueue, m_clBuffer, destination, - srcOffsetBytes, dstOffsetInBytes, sizeof(T) * numElements, 0, 0, 0); - - b3Assert(status == CL_SUCCESS); - } - - void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion = true) - { - size_t newSize = srcArray.size(); - - bool copyOldContents = false; - resize(newSize, copyOldContents); - if (newSize) - copyFromHostPointer(&srcArray[0], newSize, 0, waitForCompletion); - } - - void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem = 0, bool waitForCompletion = true) - { - b3Assert(numElems + destFirstElem <= capacity()); - - if (numElems + destFirstElem) - { - cl_int status = 0; - size_t sizeInBytes = sizeof(T) * numElems; - status = clEnqueueWriteBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * destFirstElem, sizeInBytes, - src, 0, 0, 0); - b3Assert(status == CL_SUCCESS); - if (waitForCompletion) - clFinish(m_commandQueue); - } - else - { - b3Error("copyFromHostPointer invalid range\n"); - } - } - - void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion = true) const - { - destArray.resize(this->size()); - if (size()) - copyToHostPointer(&destArray[0], size(), 0, waitForCompletion); - } - - void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem = 0, bool waitForCompletion = true) const - { - b3Assert(numElem + srcFirstElem <= capacity()); - - if (numElem + srcFirstElem <= capacity()) - { - cl_int status = 0; - status = clEnqueueReadBuffer(m_commandQueue, m_clBuffer, 0, sizeof(T) * srcFirstElem, sizeof(T) * numElem, - destPtr, 0, 0, 0); - b3Assert(status == CL_SUCCESS); - - if (waitForCompletion) - clFinish(m_commandQueue); - } - else - { - b3Error("copyToHostPointer invalid range\n"); - } - } - - void copyFromOpenCLArray(const b3OpenCLArray& src) - { - size_t newSize = src.size(); - resize(newSize); - if (size()) - { - src.copyToCL(m_clBuffer, size()); - } - } -}; - -#endif //B3_OPENCL_ARRAY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp deleted file mode 100644 index 822b511633..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "b3PrefixScanCL.h" -#include "b3FillCL.h" -#define B3_PREFIXSCAN_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl" - -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/PrefixScanKernelsCL.h" - -b3PrefixScanCL::b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) - : m_commandQueue(queue) -{ - const char* scanKernelSource = prefixScanKernelsCL; - cl_int pErrNum; - char* additionalMacros = 0; - - m_workBuffer = new b3OpenCLArray<unsigned int>(ctx, queue, size); - cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, scanKernelSource, &pErrNum, additionalMacros, B3_PREFIXSCAN_PROG_PATH); - b3Assert(scanProg); - - m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_localScanKernel); - m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_blockSumKernel); - m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_propagationKernel); -} - -b3PrefixScanCL::~b3PrefixScanCL() -{ - delete m_workBuffer; - clReleaseKernel(m_localScanKernel); - clReleaseKernel(m_blockSumKernel); - clReleaseKernel(m_propagationKernel); -} - -template <class T> -T b3NextPowerOf2(T n) -{ - n -= 1; - for (int i = 0; i < sizeof(T) * 8; i++) - n = n | (n >> i); - return n + 1; -} - -void b3PrefixScanCL::execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum) -{ - // b3Assert( data->m_option == EXCLUSIVE ); - const unsigned int numBlocks = (const unsigned int)((n + BLOCK_SIZE * 2 - 1) / (BLOCK_SIZE * 2)); - - dst.resize(src.size()); - m_workBuffer->resize(src.size()); - - b3Int4 constBuffer; - constBuffer.x = n; - constBuffer.y = numBlocks; - constBuffer.z = (int)b3NextPowerOf2(numBlocks); - - b3OpenCLArray<unsigned int>* srcNative = &src; - b3OpenCLArray<unsigned int>* dstNative = &dst; - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(srcNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_localScanKernel, "m_localScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(numBlocks * BLOCK_SIZE, BLOCK_SIZE); - } - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_blockSumKernel, "m_blockSumKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(BLOCK_SIZE, BLOCK_SIZE); - } - - if (numBlocks > 1) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_propagationKernel, "m_propagationKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D((numBlocks - 1) * BLOCK_SIZE, BLOCK_SIZE); - } - - if (sum) - { - clFinish(m_commandQueue); - dstNative->copyToHostPointer(sum, 1, n - 1, true); - } -} - -void b3PrefixScanCL::executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum) -{ - unsigned int s = 0; - //if( data->m_option == EXCLUSIVE ) - { - for (int i = 0; i < n; i++) - { - dst[i] = s; - s += src[i]; - } - } - /*else - { - for(int i=0; i<n; i++) - { - s += hSrc[i]; - hDst[i] = s; - } - } - */ - - if (sum) - { - *sum = dst[n - 1]; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h deleted file mode 100644 index 346efa0c73..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h +++ /dev/null @@ -1,35 +0,0 @@ - -#ifndef B3_PREFIX_SCAN_CL_H -#define B3_PREFIX_SCAN_CL_H - -#include "b3OpenCLArray.h" -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -class b3PrefixScanCL -{ - enum - { - BLOCK_SIZE = 128 - }; - - // Option m_option; - - cl_command_queue m_commandQueue; - - cl_kernel m_localScanKernel; - cl_kernel m_blockSumKernel; - cl_kernel m_propagationKernel; - - b3OpenCLArray<unsigned int>* m_workBuffer; - -public: - b3PrefixScanCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size = 0); - - virtual ~b3PrefixScanCL(); - - void execute(b3OpenCLArray<unsigned int>& src, b3OpenCLArray<unsigned int>& dst, int n, unsigned int* sum = 0); - void executeHost(b3AlignedObjectArray<unsigned int>& src, b3AlignedObjectArray<unsigned int>& dst, int n, unsigned int* sum = 0); -}; - -#endif //B3_PREFIX_SCAN_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp deleted file mode 100644 index 1cac97c988..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "b3PrefixScanFloat4CL.h" -#include "b3FillCL.h" -#define B3_PREFIXSCAN_FLOAT4_PROG_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl" - -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "kernels/PrefixScanKernelsFloat4CL.h" - -b3PrefixScanFloat4CL::b3PrefixScanFloat4CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size) - : m_commandQueue(queue) -{ - const char* scanKernelSource = prefixScanKernelsFloat4CL; - cl_int pErrNum; - char* additionalMacros = 0; - - m_workBuffer = new b3OpenCLArray<b3Vector3>(ctx, queue, size); - cl_program scanProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, scanKernelSource, &pErrNum, additionalMacros, B3_PREFIXSCAN_FLOAT4_PROG_PATH); - b3Assert(scanProg); - - m_localScanKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "LocalScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_localScanKernel); - m_blockSumKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "TopLevelScanKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_blockSumKernel); - m_propagationKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, scanKernelSource, "AddOffsetKernel", &pErrNum, scanProg, additionalMacros); - b3Assert(m_propagationKernel); -} - -b3PrefixScanFloat4CL::~b3PrefixScanFloat4CL() -{ - delete m_workBuffer; - clReleaseKernel(m_localScanKernel); - clReleaseKernel(m_blockSumKernel); - clReleaseKernel(m_propagationKernel); -} - -template <class T> -T b3NextPowerOf2(T n) -{ - n -= 1; - for (int i = 0; i < sizeof(T) * 8; i++) - n = n | (n >> i); - return n + 1; -} - -void b3PrefixScanFloat4CL::execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<b3Vector3>& dst, int n, b3Vector3* sum) -{ - // b3Assert( data->m_option == EXCLUSIVE ); - const unsigned int numBlocks = (const unsigned int)((n + BLOCK_SIZE * 2 - 1) / (BLOCK_SIZE * 2)); - - dst.resize(src.size()); - m_workBuffer->resize(src.size()); - - b3Int4 constBuffer; - constBuffer.x = n; - constBuffer.y = numBlocks; - constBuffer.z = (int)b3NextPowerOf2(numBlocks); - - b3OpenCLArray<b3Vector3>* srcNative = &src; - b3OpenCLArray<b3Vector3>* dstNative = &dst; - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(srcNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_localScanKernel, "m_localScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(numBlocks * BLOCK_SIZE, BLOCK_SIZE); - } - - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_workBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_commandQueue, m_blockSumKernel, "m_blockSumKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D(BLOCK_SIZE, BLOCK_SIZE); - } - - if (numBlocks > 1) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(dstNative->getBufferCL()), b3BufferInfoCL(m_workBuffer->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_propagationKernel, "m_propagationKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(constBuffer); - launcher.launch1D((numBlocks - 1) * BLOCK_SIZE, BLOCK_SIZE); - } - - if (sum) - { - clFinish(m_commandQueue); - dstNative->copyToHostPointer(sum, 1, n - 1, true); - } -} - -void b3PrefixScanFloat4CL::executeHost(b3AlignedObjectArray<b3Vector3>& src, b3AlignedObjectArray<b3Vector3>& dst, int n, b3Vector3* sum) -{ - b3Vector3 s = b3MakeVector3(0, 0, 0); - //if( data->m_option == EXCLUSIVE ) - { - for (int i = 0; i < n; i++) - { - dst[i] = s; - s += src[i]; - } - } - /*else - { - for(int i=0; i<n; i++) - { - s += hSrc[i]; - hDst[i] = s; - } - } - */ - - if (sum) - { - *sum = dst[n - 1]; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h deleted file mode 100644 index 122b0bfd68..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h +++ /dev/null @@ -1,36 +0,0 @@ - -#ifndef B3_PREFIX_SCAN_CL_H -#define B3_PREFIX_SCAN_CL_H - -#include "b3OpenCLArray.h" -#include "b3BufferInfoCL.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -class b3PrefixScanFloat4CL -{ - enum - { - BLOCK_SIZE = 128 - }; - - // Option m_option; - - cl_command_queue m_commandQueue; - - cl_kernel m_localScanKernel; - cl_kernel m_blockSumKernel; - cl_kernel m_propagationKernel; - - b3OpenCLArray<b3Vector3>* m_workBuffer; - -public: - b3PrefixScanFloat4CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int size = 0); - - virtual ~b3PrefixScanFloat4CL(); - - void execute(b3OpenCLArray<b3Vector3>& src, b3OpenCLArray<b3Vector3>& dst, int n, b3Vector3* sum = 0); - void executeHost(b3AlignedObjectArray<b3Vector3>& src, b3AlignedObjectArray<b3Vector3>& dst, int n, b3Vector3* sum); -}; - -#endif //B3_PREFIX_SCAN_CL_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp deleted file mode 100644 index e86af6583f..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.cpp +++ /dev/null @@ -1,646 +0,0 @@ - -#include "b3RadixSort32CL.h" -#include "b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3PrefixScanCL.h" -#include "b3FillCL.h" - -#define RADIXSORT32_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl" - -#include "kernels/RadixSort32KernelsCL.h" - -b3RadixSort32CL::b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity) - : m_commandQueue(queue) -{ - b3OpenCLDeviceInfo info; - b3OpenCLUtils::getDeviceInfo(device, &info); - m_deviceCPU = (info.m_deviceType & CL_DEVICE_TYPE_CPU) != 0; - - m_workBuffer1 = new b3OpenCLArray<unsigned int>(ctx, queue); - m_workBuffer2 = new b3OpenCLArray<unsigned int>(ctx, queue); - m_workBuffer3 = new b3OpenCLArray<b3SortData>(ctx, queue); - m_workBuffer3a = new b3OpenCLArray<unsigned int>(ctx, queue); - m_workBuffer4 = new b3OpenCLArray<b3SortData>(ctx, queue); - m_workBuffer4a = new b3OpenCLArray<unsigned int>(ctx, queue); - - if (initialCapacity > 0) - { - m_workBuffer1->resize(initialCapacity); - m_workBuffer3->resize(initialCapacity); - m_workBuffer3a->resize(initialCapacity); - m_workBuffer4->resize(initialCapacity); - m_workBuffer4a->resize(initialCapacity); - } - - m_scan = new b3PrefixScanCL(ctx, device, queue); - m_fill = new b3FillCL(ctx, device, queue); - - const char* additionalMacros = ""; - - cl_int pErrNum; - const char* kernelSource = radixSort32KernelsCL; - - cl_program sortProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, kernelSource, &pErrNum, additionalMacros, RADIXSORT32_PATH); - b3Assert(sortProg); - - m_streamCountSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "StreamCountSortDataKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_streamCountSortDataKernel); - - m_streamCountKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "StreamCountKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_streamCountKernel); - - if (m_deviceCPU) - { - m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterSortDataKernelSerial", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterSortDataKernel); - m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterKernelSerial", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterKernel); - } - else - { - m_sortAndScatterSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterSortDataKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterSortDataKernel); - m_sortAndScatterKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "SortAndScatterKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_sortAndScatterKernel); - } - - m_prefixScanKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, kernelSource, "PrefixScanKernel", &pErrNum, sortProg, additionalMacros); - b3Assert(m_prefixScanKernel); -} - -b3RadixSort32CL::~b3RadixSort32CL() -{ - delete m_scan; - delete m_fill; - delete m_workBuffer1; - delete m_workBuffer2; - delete m_workBuffer3; - delete m_workBuffer3a; - delete m_workBuffer4; - delete m_workBuffer4a; - - clReleaseKernel(m_streamCountSortDataKernel); - clReleaseKernel(m_streamCountKernel); - clReleaseKernel(m_sortAndScatterSortDataKernel); - clReleaseKernel(m_sortAndScatterKernel); - clReleaseKernel(m_prefixScanKernel); -} - -void b3RadixSort32CL::executeHost(b3AlignedObjectArray<b3SortData>& inout, int sortBits /* = 32 */) -{ - int n = inout.size(); - const int BITS_PER_PASS = 8; - const int NUM_TABLES = (1 << BITS_PER_PASS); - - int tables[NUM_TABLES]; - int counter[NUM_TABLES]; - - b3SortData* src = &inout[0]; - b3AlignedObjectArray<b3SortData> workbuffer; - workbuffer.resize(inout.size()); - b3SortData* dst = &workbuffer[0]; - - int count = 0; - for (int startBit = 0; startBit < sortBits; startBit += BITS_PER_PASS) - { - for (int i = 0; i < NUM_TABLES; i++) - { - tables[i] = 0; - } - - for (int i = 0; i < n; i++) - { - int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES - 1); - tables[tableIdx]++; - } -//#define TEST -#ifdef TEST - printf("histogram size=%d\n", NUM_TABLES); - for (int i = 0; i < NUM_TABLES; i++) - { - if (tables[i] != 0) - { - printf("tables[%d]=%d]\n", i, tables[i]); - } - } -#endif //TEST \ - // prefix scan - int sum = 0; - for (int i = 0; i < NUM_TABLES; i++) - { - int iData = tables[i]; - tables[i] = sum; - sum += iData; - counter[i] = 0; - } - - // distribute - for (int i = 0; i < n; i++) - { - int tableIdx = (src[i].m_key >> startBit) & (NUM_TABLES - 1); - - dst[tables[tableIdx] + counter[tableIdx]] = src[i]; - counter[tableIdx]++; - } - - b3Swap(src, dst); - count++; - } - - if (count & 1) - { - b3Assert(0); //need to copy - } -} - -void b3RadixSort32CL::executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */) -{ - b3AlignedObjectArray<b3SortData> inout; - keyValuesInOut.copyToHost(inout); - - executeHost(inout, sortBits); - - keyValuesInOut.copyFromHost(inout); -} - -void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn, - b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits) -{ -} - -//#define DEBUG_RADIXSORT -//#define DEBUG_RADIXSORT2 - -void b3RadixSort32CL::execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits /* = 32 */) -{ - int originalSize = keyValuesInOut.size(); - int workingSize = originalSize; - - int dataAlignment = DATA_ALIGNMENT; - -#ifdef DEBUG_RADIXSORT2 - b3AlignedObjectArray<b3SortData> test2; - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } -#endif //DEBUG_RADIXSORT2 - - b3OpenCLArray<b3SortData>* src = 0; - - if (workingSize % dataAlignment) - { - workingSize += dataAlignment - (workingSize % dataAlignment); - m_workBuffer4->copyFromOpenCLArray(keyValuesInOut); - m_workBuffer4->resize(workingSize); - b3SortData fillValue; - fillValue.m_key = 0xffffffff; - fillValue.m_value = 0xffffffff; - -#define USE_BTFILL -#ifdef USE_BTFILL - m_fill->execute((b3OpenCLArray<b3Int2>&)*m_workBuffer4, (b3Int2&)fillValue, workingSize - originalSize, originalSize); -#else - //fill the remaining bits (very slow way, todo: fill on GPU/OpenCL side) - - for (int i = originalSize; i < workingSize; i++) - { - m_workBuffer4->copyFromHostPointer(&fillValue, 1, i); - } -#endif //USE_BTFILL - - src = m_workBuffer4; - } - else - { - src = &keyValuesInOut; - m_workBuffer4->resize(0); - } - - b3Assert(workingSize % DATA_ALIGNMENT == 0); - int minCap = NUM_BUCKET * NUM_WGS; - - int n = workingSize; - - m_workBuffer1->resize(minCap); - m_workBuffer3->resize(workingSize); - - // ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); - b3Assert(BITS_PER_PASS == 4); - b3Assert(WG_SIZE == 64); - b3Assert((sortBits & 0x3) == 0); - - b3OpenCLArray<b3SortData>* dst = m_workBuffer3; - - b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1; - b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2; - - int nWGs = NUM_WGS; - b3ConstData cdata; - - { - int blockSize = ELEMENTS_PER_WORK_ITEM * WG_SIZE; //set at 256 - int nBlocks = (n + blockSize - 1) / (blockSize); - cdata.m_n = n; - cdata.m_nWGs = NUM_WGS; - cdata.m_startBit = 0; - cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1) / cdata.m_nWGs; - if (nBlocks < NUM_WGS) - { - cdata.m_nBlocksPerWG = 1; - nWGs = nBlocks; - } - } - - int count = 0; - for (int ib = 0; ib < sortBits; ib += 4) - { -#ifdef DEBUG_RADIXSORT2 - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - if (test2[i].m_key != test2[i].m_value) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } - } -#endif //DEBUG_RADIXSORT2 - - cdata.m_startBit = ib; - - if (src->size()) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_streamCountSortDataKernel, "m_streamCountSortDataKernel"); - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - - int num = NUM_WGS * WG_SIZE; - launcher.launch1D(num, WG_SIZE); - } - -#ifdef DEBUG_RADIXSORT - b3AlignedObjectArray<unsigned int> testHist; - srcHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n", ib, testHist.size()); - for (int i = 0; i < testHist.size(); i++) - { - if (testHist[i] != 0) - printf("testHist[%d]=%d\n", i, testHist[i]); - } -#endif //DEBUG_RADIXSORT - -//fast prefix scan is not working properly on Mac OSX yet -#ifdef __APPLE__ - bool fastScan = false; -#else - bool fastScan = !m_deviceCPU; //only use fast scan on GPU -#endif - - if (fastScan) - { // prefix scan group histogram - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_prefixScanKernel, "m_prefixScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(128, 128); - destHisto = srcHisto; - } - else - { - //unsigned int sum; //for debugging - m_scan->execute(*srcHisto, *destHisto, 1920, 0); //,&sum); - } - -#ifdef DEBUG_RADIXSORT - destHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n", ib, testHist.size()); - for (int i = 0; i < testHist.size(); i++) - { - if (testHist[i] != 0) - printf("testHist[%d]=%d\n", i, testHist[i]); - } - - for (int i = 0; i < testHist.size(); i += NUM_WGS) - { - printf("testHist[%d]=%d\n", i / NUM_WGS, testHist[i]); - } - -#endif //DEBUG_RADIXSORT - -#define USE_GPU -#ifdef USE_GPU - - if (src->size()) - { // local sort and distribute - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(destHisto->getBufferCL(), true), b3BufferInfoCL(dst->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_sortAndScatterSortDataKernel, "m_sortAndScatterSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nWGs * WG_SIZE, WG_SIZE); - } -#else - { -#define NUM_TABLES 16 -//#define SEQUENTIAL -#ifdef SEQUENTIAL - int counter2[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - int tables[NUM_TABLES]; - int startBit = ib; - - destHisto->copyToHost(testHist); - b3AlignedObjectArray<b3SortData> srcHost; - b3AlignedObjectArray<b3SortData> dstHost; - dstHost.resize(src->size()); - - src->copyToHost(srcHost); - - for (int i = 0; i < NUM_TABLES; i++) - { - tables[i] = testHist[i * NUM_WGS]; - } - - // distribute - for (int i = 0; i < n; i++) - { - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES - 1); - - dstHost[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; - counter2[tableIdx]++; - } - -#else - - int counter2[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - int tables[NUM_TABLES]; - b3AlignedObjectArray<b3SortData> dstHostOK; - dstHostOK.resize(src->size()); - - destHisto->copyToHost(testHist); - b3AlignedObjectArray<b3SortData> srcHost; - src->copyToHost(srcHost); - - int blockSize = 256; - int nBlocksPerWG = cdata.m_nBlocksPerWG; - int startBit = ib; - - { - for (int i = 0; i < NUM_TABLES; i++) - { - tables[i] = testHist[i * NUM_WGS]; - } - - // distribute - for (int i = 0; i < n; i++) - { - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES - 1); - - dstHostOK[tables[tableIdx] + counter2[tableIdx]] = srcHost[i]; - counter2[tableIdx]++; - } - } - - b3AlignedObjectArray<b3SortData> dstHost; - dstHost.resize(src->size()); - - int counter[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - for (int wgIdx = 0; wgIdx < NUM_WGS; wgIdx++) - { - int counter[NUM_TABLES] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - - int nBlocks = (n) / blockSize - nBlocksPerWG * wgIdx; - - for (int iblock = 0; iblock < b3Min(cdata.m_nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx = 0; lIdx < 64; lIdx++) - { - int addr = iblock * blockSize + blockSize * cdata.m_nBlocksPerWG * wgIdx + ELEMENTS_PER_WORK_ITEM * lIdx; - - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for (int j = 0; j < ELEMENTS_PER_WORK_ITEM; j++) - { - if (addr + j < n) - { - // printf ("addr+j=%d\n", addr+j); - - int i = addr + j; - - int tableIdx = (srcHost[i].m_key >> startBit) & (NUM_TABLES - 1); - - int destIndex = testHist[tableIdx * NUM_WGS + wgIdx] + counter[tableIdx]; - - b3SortData ok = dstHostOK[destIndex]; - - if (ok.m_key != srcHost[i].m_key) - { - printf("ok.m_key = %d, srcHost[i].m_key = %d\n", ok.m_key, srcHost[i].m_key); - printf("(ok.m_value = %d, srcHost[i].m_value = %d)\n", ok.m_value, srcHost[i].m_value); - } - if (ok.m_value != srcHost[i].m_value) - { - printf("ok.m_value = %d, srcHost[i].m_value = %d\n", ok.m_value, srcHost[i].m_value); - printf("(ok.m_key = %d, srcHost[i].m_key = %d)\n", ok.m_key, srcHost[i].m_key); - } - - dstHost[destIndex] = srcHost[i]; - counter[tableIdx]++; - } - } - } - } - } - -#endif //SEQUENTIAL - - dst->copyFromHost(dstHost); - } -#endif //USE_GPU - -#ifdef DEBUG_RADIXSORT - destHisto->copyToHost(testHist); - printf("ib = %d, testHist size = %d, non zero elements:\n", ib, testHist.size()); - for (int i = 0; i < testHist.size(); i++) - { - if (testHist[i] != 0) - printf("testHist[%d]=%d\n", i, testHist[i]); - } -#endif //DEBUG_RADIXSORT - b3Swap(src, dst); - b3Swap(srcHisto, destHisto); - -#ifdef DEBUG_RADIXSORT2 - keyValuesInOut.copyToHost(test2); - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - if (test2[i].m_key != test2[i].m_value) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } - } -#endif //DEBUG_RADIXSORT2 - - count++; - } - - if (count & 1) - { - b3Assert(0); //need to copy from workbuffer to keyValuesInOut - } - - if (m_workBuffer4->size()) - { - m_workBuffer4->resize(originalSize); - keyValuesInOut.copyFromOpenCLArray(*m_workBuffer4); - } - -#ifdef DEBUG_RADIXSORT - keyValuesInOut.copyToHost(test2); - - printf("numElem = %d\n", test2.size()); - for (int i = 0; i < test2.size(); i++) - { - printf("test2[%d].m_key=%d\n", i, test2[i].m_key); - printf("test2[%d].m_value=%d\n", i, test2[i].m_value); - } -#endif -} - -void b3RadixSort32CL::execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits /* = 32 */) -{ - int originalSize = keysInOut.size(); - int workingSize = originalSize; - - int dataAlignment = DATA_ALIGNMENT; - - b3OpenCLArray<unsigned int>* src = 0; - - if (workingSize % dataAlignment) - { - workingSize += dataAlignment - (workingSize % dataAlignment); - m_workBuffer4a->copyFromOpenCLArray(keysInOut); - m_workBuffer4a->resize(workingSize); - unsigned int fillValue = 0xffffffff; - - m_fill->execute(*m_workBuffer4a, fillValue, workingSize - originalSize, originalSize); - - src = m_workBuffer4a; - } - else - { - src = &keysInOut; - m_workBuffer4a->resize(0); - } - - b3Assert(workingSize % DATA_ALIGNMENT == 0); - int minCap = NUM_BUCKET * NUM_WGS; - - int n = workingSize; - - m_workBuffer1->resize(minCap); - m_workBuffer3->resize(workingSize); - m_workBuffer3a->resize(workingSize); - - // ADLASSERT( ELEMENTS_PER_WORK_ITEM == 4 ); - b3Assert(BITS_PER_PASS == 4); - b3Assert(WG_SIZE == 64); - b3Assert((sortBits & 0x3) == 0); - - b3OpenCLArray<unsigned int>* dst = m_workBuffer3a; - - b3OpenCLArray<unsigned int>* srcHisto = m_workBuffer1; - b3OpenCLArray<unsigned int>* destHisto = m_workBuffer2; - - int nWGs = NUM_WGS; - b3ConstData cdata; - - { - int blockSize = ELEMENTS_PER_WORK_ITEM * WG_SIZE; //set at 256 - int nBlocks = (n + blockSize - 1) / (blockSize); - cdata.m_n = n; - cdata.m_nWGs = NUM_WGS; - cdata.m_startBit = 0; - cdata.m_nBlocksPerWG = (nBlocks + cdata.m_nWGs - 1) / cdata.m_nWGs; - if (nBlocks < NUM_WGS) - { - cdata.m_nBlocksPerWG = 1; - nWGs = nBlocks; - } - } - - int count = 0; - for (int ib = 0; ib < sortBits; ib += 4) - { - cdata.m_startBit = ib; - - if (src->size()) - { - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_streamCountKernel, "m_streamCountKernel"); - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - - int num = NUM_WGS * WG_SIZE; - launcher.launch1D(num, WG_SIZE); - } - -//fast prefix scan is not working properly on Mac OSX yet -#ifdef __APPLE__ - bool fastScan = false; -#else - bool fastScan = !m_deviceCPU; -#endif - - if (fastScan) - { // prefix scan group histogram - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(srcHisto->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_prefixScanKernel, "m_prefixScanKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(128, 128); - destHisto = srcHisto; - } - else - { - //unsigned int sum; //for debugging - m_scan->execute(*srcHisto, *destHisto, 1920, 0); //,&sum); - } - - if (src->size()) - { // local sort and distribute - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(src->getBufferCL(), true), b3BufferInfoCL(destHisto->getBufferCL(), true), b3BufferInfoCL(dst->getBufferCL())}; - b3LauncherCL launcher(m_commandQueue, m_sortAndScatterKernel, "m_sortAndScatterKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nWGs * WG_SIZE, WG_SIZE); - } - - b3Swap(src, dst); - b3Swap(srcHisto, destHisto); - - count++; - } - - if (count & 1) - { - b3Assert(0); //need to copy from workbuffer to keyValuesInOut - } - - if (m_workBuffer4a->size()) - { - m_workBuffer4a->resize(originalSize); - keysInOut.copyFromOpenCLArray(*m_workBuffer4a); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h deleted file mode 100644 index 69caf182d7..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h +++ /dev/null @@ -1,84 +0,0 @@ - -#ifndef B3_RADIXSORT32_H -#define B3_RADIXSORT32_H - -#include "b3OpenCLArray.h" - -struct b3SortData -{ - union { - unsigned int m_key; - unsigned int x; - }; - - union { - unsigned int m_value; - unsigned int y; - }; -}; -#include "b3BufferInfoCL.h" - -class b3RadixSort32CL -{ - b3OpenCLArray<unsigned int>* m_workBuffer1; - b3OpenCLArray<unsigned int>* m_workBuffer2; - - b3OpenCLArray<b3SortData>* m_workBuffer3; - b3OpenCLArray<b3SortData>* m_workBuffer4; - - b3OpenCLArray<unsigned int>* m_workBuffer3a; - b3OpenCLArray<unsigned int>* m_workBuffer4a; - - cl_command_queue m_commandQueue; - - cl_kernel m_streamCountSortDataKernel; - cl_kernel m_streamCountKernel; - - cl_kernel m_prefixScanKernel; - cl_kernel m_sortAndScatterSortDataKernel; - cl_kernel m_sortAndScatterKernel; - - bool m_deviceCPU; - - class b3PrefixScanCL* m_scan; - class b3FillCL* m_fill; - -public: - struct b3ConstData - { - int m_n; - int m_nWGs; - int m_startBit; - int m_nBlocksPerWG; - }; - enum - { - DATA_ALIGNMENT = 256, - WG_SIZE = 64, - BLOCK_SIZE = 256, - ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE / WG_SIZE), - BITS_PER_PASS = 4, - NUM_BUCKET = (1 << BITS_PER_PASS), - // if you change this, change nPerWI in kernel as well - NUM_WGS = 20 * 6, // cypress - // NUM_WGS = 24*6, // cayman - // NUM_WGS = 32*4, // nv - }; - -private: -public: - b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity = 0); - - virtual ~b3RadixSort32CL(); - - void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn, - b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32); - - ///keys only - void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32); - - void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32); - void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32); - void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32); -}; -#endif //B3_RADIXSORT32_H diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl deleted file mode 100644 index f3b4a1e8a7..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl +++ /dev/null @@ -1,106 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -typedef struct -{ - u32 m_key; - u32 m_value; -}SortData; - - - -typedef struct -{ - u32 m_nSrc; - u32 m_nDst; - u32 m_padding[2]; -} ConstBuffer; - - - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nSrc ) - { - SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1); - SortData end; end.m_key = nDst; end.m_value = nDst; - - SortData iData = (gIdx==0)? first: src[gIdx-1]; - SortData jData = (gIdx==nSrc)? end: src[gIdx]; - - if( iData.m_key != jData.m_key ) - { -// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++) - u32 k = jData.m_key; - { - dst[k] = gIdx; - } - } - } -} - - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX+1; - - if( gIdx < nSrc+1 ) - { - SortData first; first.m_key = 0; first.m_value = 0; - SortData end; end.m_key = nDst; end.m_value = nDst; - - SortData iData = src[gIdx-1]; - SortData jData = (gIdx==nSrc)? end: src[gIdx]; - - if( iData.m_key != jData.m_key ) - { - u32 k = iData.m_key; - { - dst[k] = gIdx; - } - } - } -} - -__attribute__((reqd_work_group_size(64,1,1))) -__kernel -void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, - unsigned int nSrc, unsigned int nDst) -{ - int gIdx = GET_GLOBAL_IDX; - - - if( gIdx < nDst ) - { - C[gIdx] = A[gIdx] - B[gIdx]; - } -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h deleted file mode 100644 index 1758dd41e3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernelsCL.h +++ /dev/null @@ -1,86 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* boundSearchKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "typedef struct\n" - "{\n" - " u32 m_key; \n" - " u32 m_value;\n" - "}SortData;\n" - "typedef struct\n" - "{\n" - " u32 m_nSrc;\n" - " u32 m_nDst;\n" - " u32 m_padding[2];\n" - "} ConstBuffer;\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "__kernel\n" - "void SearchSortDataLowerKernel(__global SortData* src, __global u32 *dst, \n" - " unsigned int nSrc, unsigned int nDst)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nSrc )\n" - " {\n" - " SortData first; first.m_key = (u32)(-1); first.m_value = (u32)(-1);\n" - " SortData end; end.m_key = nDst; end.m_value = nDst;\n" - " SortData iData = (gIdx==0)? first: src[gIdx-1];\n" - " SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" - " if( iData.m_key != jData.m_key )\n" - " {\n" - "// for(u32 k=iData.m_key+1; k<=min(jData.m_key, nDst-1); k++)\n" - " u32 k = jData.m_key;\n" - " {\n" - " dst[k] = gIdx;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "__kernel\n" - "void SearchSortDataUpperKernel(__global SortData* src, __global u32 *dst, \n" - " unsigned int nSrc, unsigned int nDst)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX+1;\n" - " if( gIdx < nSrc+1 )\n" - " {\n" - " SortData first; first.m_key = 0; first.m_value = 0;\n" - " SortData end; end.m_key = nDst; end.m_value = nDst;\n" - " SortData iData = src[gIdx-1];\n" - " SortData jData = (gIdx==nSrc)? end: src[gIdx];\n" - " if( iData.m_key != jData.m_key )\n" - " {\n" - " u32 k = iData.m_key;\n" - " {\n" - " dst[k] = gIdx;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "__kernel\n" - "void SubtractKernel(__global u32* A, __global u32 *B, __global u32 *C, \n" - " unsigned int nSrc, unsigned int nDst)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nDst )\n" - " {\n" - " C[gIdx] = A[gIdx] - B[gIdx];\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl deleted file mode 100644 index 2eee5752ec..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernels.cl +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -typedef struct -{ - int m_n; - int m_padding[3]; -} ConstBuffer; - - - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy1F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float4 a0 = src[gIdx]; - - dst[ gIdx ] = a0; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy2F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( 2*gIdx <= cb.m_n ) - { - float4 a0 = src[gIdx*2+0]; - float4 a1 = src[gIdx*2+1]; - - dst[ gIdx*2+0 ] = a0; - dst[ gIdx*2+1 ] = a1; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void Copy4F4Kernel(__global float4* dst, __global float4* src, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( 4*gIdx <= cb.m_n ) - { - int idx0 = gIdx*4+0; - int idx1 = gIdx*4+1; - int idx2 = gIdx*4+2; - int idx3 = gIdx*4+3; - - float4 a0 = src[idx0]; - float4 a1 = src[idx1]; - float4 a2 = src[idx2]; - float4 a3 = src[idx3]; - - dst[ idx0 ] = a0; - dst[ idx1 ] = a1; - dst[ idx2 ] = a2; - dst[ idx3 ] = a3; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void CopyF1Kernel(__global float* dstF1, __global float* srcF1, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float a0 = srcF1[gIdx]; - - dstF1[ gIdx ] = a0; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, - ConstBuffer cb) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < cb.m_n ) - { - float2 a0 = srcF2[gIdx]; - - dstF2[ gIdx ] = a0; - } -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h deleted file mode 100644 index 33c9279462..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/CopyKernelsCL.h +++ /dev/null @@ -1,131 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* copyKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "\n" - "#define make_uint4 (uint4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "\n" - "typedef struct\n" - "{\n" - " int m_n;\n" - " int m_padding[3];\n" - "} ConstBuffer;\n" - "\n" - "\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void Copy1F4Kernel(__global float4* dst, __global float4* src, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( gIdx < cb.m_n )\n" - " {\n" - " float4 a0 = src[gIdx];\n" - "\n" - " dst[ gIdx ] = a0;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void Copy2F4Kernel(__global float4* dst, __global float4* src, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( 2*gIdx <= cb.m_n )\n" - " {\n" - " float4 a0 = src[gIdx*2+0];\n" - " float4 a1 = src[gIdx*2+1];\n" - "\n" - " dst[ gIdx*2+0 ] = a0;\n" - " dst[ gIdx*2+1 ] = a1;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void Copy4F4Kernel(__global float4* dst, __global float4* src, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( 4*gIdx <= cb.m_n )\n" - " {\n" - " int idx0 = gIdx*4+0;\n" - " int idx1 = gIdx*4+1;\n" - " int idx2 = gIdx*4+2;\n" - " int idx3 = gIdx*4+3;\n" - "\n" - " float4 a0 = src[idx0];\n" - " float4 a1 = src[idx1];\n" - " float4 a2 = src[idx2];\n" - " float4 a3 = src[idx3];\n" - "\n" - " dst[ idx0 ] = a0;\n" - " dst[ idx1 ] = a1;\n" - " dst[ idx2 ] = a2;\n" - " dst[ idx3 ] = a3;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void CopyF1Kernel(__global float* dstF1, __global float* srcF1, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( gIdx < cb.m_n )\n" - " {\n" - " float a0 = srcF1[gIdx];\n" - "\n" - " dstF1[ gIdx ] = a0;\n" - " }\n" - "}\n" - "\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void CopyF2Kernel(__global float2* dstF2, __global float2* srcF2, \n" - " ConstBuffer cb)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - "\n" - " if( gIdx < cb.m_n )\n" - " {\n" - " float2 a0 = srcF2[gIdx];\n" - "\n" - " dstF2[ gIdx ] = a0;\n" - " }\n" - "}\n" - "\n" - "\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl deleted file mode 100644 index 71c31075dd..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernels.cl +++ /dev/null @@ -1,107 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -typedef struct -{ - union - { - int4 m_data; - uint4 m_unsignedData; - float m_floatData; - }; - int m_offset; - int m_n; - int m_padding[2]; -} ConstBuffer; - - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num_elements ) - { - dstInt[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num_elements ) - { - dstFloat[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt[ offset+gIdx ] = value; - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt2[ gIdx + offset] = make_int2( value.x, value.y ); - } -} - -__kernel -__attribute__((reqd_work_group_size(64,1,1))) -void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < num ) - { - dstInt4[ offset+gIdx ] = value; - } -} - diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h deleted file mode 100644 index 983e652270..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/FillKernelsCL.h +++ /dev/null @@ -1,90 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* fillKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define make_uint4 (uint4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "typedef struct\n" - "{\n" - " union\n" - " {\n" - " int4 m_data;\n" - " uint4 m_unsignedData;\n" - " float m_floatData;\n" - " };\n" - " int m_offset;\n" - " int m_n;\n" - " int m_padding[2];\n" - "} ConstBuffer;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillIntKernel(__global int* dstInt, int num_elements, int value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num_elements )\n" - " {\n" - " dstInt[ offset+gIdx ] = value;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillFloatKernel(__global float* dstFloat, int num_elements, float value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num_elements )\n" - " {\n" - " dstFloat[ offset+gIdx ] = value;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillUnsignedIntKernel(__global unsigned int* dstInt, const int num, const unsigned int value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num )\n" - " {\n" - " dstInt[ offset+gIdx ] = value;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillInt2Kernel(__global int2* dstInt2, const int num, const int2 value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num )\n" - " {\n" - " dstInt2[ gIdx + offset] = make_int2( value.x, value.y );\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(64,1,1)))\n" - "void FillInt4Kernel(__global int4* dstInt4, const int num, const int4 value, const int offset)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < num )\n" - " {\n" - " dstInt4[ offset+gIdx ] = value;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl deleted file mode 100644 index c9da79854a..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanFloat4Kernels.cl +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -// takahiro end -#define WG_SIZE 128 -#define m_numElems x -#define m_numBlocks y -#define m_numScanBlocks z - -/*typedef struct -{ - uint m_numElems; - uint m_numBlocks; - uint m_numScanBlocks; - uint m_padding[1]; -} ConstBuffer; -*/ - -float4 ScanExclusiveFloat4(__local float4* data, u32 n, int lIdx, int lSize) -{ - float4 blocksum; - int offset = 1; - for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1) - { - GROUP_LDS_BARRIER; - for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - data[bi] += data[ai]; - } - } - - GROUP_LDS_BARRIER; - - if( lIdx == 0 ) - { - blocksum = data[ n-1 ]; - data[ n-1 ] = 0; - } - - GROUP_LDS_BARRIER; - - offset >>= 1; - for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 ) - { - GROUP_LDS_BARRIER; - for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize ) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - float4 temp = data[ai]; - data[ai] = data[bi]; - data[bi] += temp; - } - } - GROUP_LDS_BARRIER; - - return blocksum; -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void LocalScanKernel(__global float4* dst, __global float4* src, __global float4* sumBuffer, uint4 cb) -{ - __local float4 ldsData[WG_SIZE*2]; - - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - - ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0; - ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0; - - float4 sum = ScanExclusiveFloat4(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE); - - if( lIdx == 0 ) - sumBuffer[GET_GROUP_IDX] = sum; - - if( (2*gIdx) < cb.m_numElems ) - { - dst[2*gIdx] = ldsData[2*lIdx]; - } - if( (2*gIdx + 1) < cb.m_numElems ) - { - dst[2*gIdx + 1] = ldsData[2*lIdx + 1]; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void AddOffsetKernel(__global float4* dst, __global float4* blockSum, uint4 cb) -{ - const u32 blockSize = WG_SIZE*2; - - int myIdx = GET_GROUP_IDX+1; - int lIdx = GET_LOCAL_IDX; - - float4 iBlockSum = blockSum[myIdx]; - - int endValue = min((myIdx+1)*(blockSize), cb.m_numElems); - for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE) - { - dst[i] += iBlockSum; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void TopLevelScanKernel(__global float4* dst, uint4 cb) -{ - __local float4 ldsData[2048]; - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - int lSize = GET_GROUP_SIZE; - - for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize ) - { - ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0; - } - - GROUP_LDS_BARRIER; - - float4 sum = ScanExclusiveFloat4(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE); - - for(int i=lIdx; i<cb.m_numBlocks; i+=lSize ) - { - dst[i] = ldsData[i]; - } - - if( gIdx == 0 ) - { - dst[cb.m_numBlocks] = sum; - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl deleted file mode 100644 index 963cc1e48e..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernels.cl +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) - -// takahiro end -#define WG_SIZE 128 -#define m_numElems x -#define m_numBlocks y -#define m_numScanBlocks z - -/*typedef struct -{ - uint m_numElems; - uint m_numBlocks; - uint m_numScanBlocks; - uint m_padding[1]; -} ConstBuffer; -*/ - -u32 ScanExclusive(__local u32* data, u32 n, int lIdx, int lSize) -{ - u32 blocksum; - int offset = 1; - for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1) - { - GROUP_LDS_BARRIER; - for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - data[bi] += data[ai]; - } - } - - GROUP_LDS_BARRIER; - - if( lIdx == 0 ) - { - blocksum = data[ n-1 ]; - data[ n-1 ] = 0; - } - - GROUP_LDS_BARRIER; - - offset >>= 1; - for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 ) - { - GROUP_LDS_BARRIER; - for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize ) - { - int ai = offset*(2*iIdx+1)-1; - int bi = offset*(2*iIdx+2)-1; - u32 temp = data[ai]; - data[ai] = data[bi]; - data[bi] += temp; - } - } - GROUP_LDS_BARRIER; - - return blocksum; -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void LocalScanKernel(__global u32* dst, __global u32 *src, __global u32 *sumBuffer, - uint4 cb) -{ - __local u32 ldsData[WG_SIZE*2]; - - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - - ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0; - ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0; - - u32 sum = ScanExclusive(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE); - - if( lIdx == 0 ) sumBuffer[GET_GROUP_IDX] = sum; - - if( (2*gIdx) < cb.m_numElems ) - { - dst[2*gIdx] = ldsData[2*lIdx]; - } - if( (2*gIdx + 1) < cb.m_numElems ) - { - dst[2*gIdx + 1] = ldsData[2*lIdx + 1]; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void AddOffsetKernel(__global u32 *dst, __global u32 *blockSum, uint4 cb) -{ - const u32 blockSize = WG_SIZE*2; - - int myIdx = GET_GROUP_IDX+1; - int lIdx = GET_LOCAL_IDX; - - u32 iBlockSum = blockSum[myIdx]; - - int endValue = min((myIdx+1)*(blockSize), cb.m_numElems); - for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE) - { - dst[i] += iBlockSum; - } -} - -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -__kernel -void TopLevelScanKernel(__global u32* dst, uint4 cb) -{ - __local u32 ldsData[2048]; - int gIdx = GET_GLOBAL_IDX; - int lIdx = GET_LOCAL_IDX; - int lSize = GET_GROUP_SIZE; - - for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize ) - { - ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0; - } - - GROUP_LDS_BARRIER; - - u32 sum = ScanExclusive(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE); - - for(int i=lIdx; i<cb.m_numBlocks; i+=lSize ) - { - dst[i] = ldsData[i]; - } - - if( gIdx == 0 ) - { - dst[cb.m_numBlocks] = sum; - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h deleted file mode 100644 index fc5e7b865c..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsCL.h +++ /dev/null @@ -1,128 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* prefixScanKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "// takahiro end\n" - "#define WG_SIZE 128 \n" - "#define m_numElems x\n" - "#define m_numBlocks y\n" - "#define m_numScanBlocks z\n" - "/*typedef struct\n" - "{\n" - " uint m_numElems;\n" - " uint m_numBlocks;\n" - " uint m_numScanBlocks;\n" - " uint m_padding[1];\n" - "} ConstBuffer;\n" - "*/\n" - "u32 ScanExclusive(__local u32* data, u32 n, int lIdx, int lSize)\n" - "{\n" - " u32 blocksum;\n" - " int offset = 1;\n" - " for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1)\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize)\n" - " {\n" - " int ai = offset*(2*iIdx+1)-1;\n" - " int bi = offset*(2*iIdx+2)-1;\n" - " data[bi] += data[ai];\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " if( lIdx == 0 )\n" - " {\n" - " blocksum = data[ n-1 ];\n" - " data[ n-1 ] = 0;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " offset >>= 1;\n" - " for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 )\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize )\n" - " {\n" - " int ai = offset*(2*iIdx+1)-1;\n" - " int bi = offset*(2*iIdx+2)-1;\n" - " u32 temp = data[ai];\n" - " data[ai] = data[bi];\n" - " data[bi] += temp;\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " return blocksum;\n" - "}\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "__kernel\n" - "void LocalScanKernel(__global u32* dst, __global u32 *src, __global u32 *sumBuffer,\n" - " uint4 cb)\n" - "{\n" - " __local u32 ldsData[WG_SIZE*2];\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n" - " ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n" - " u32 sum = ScanExclusive(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" - " if( lIdx == 0 ) sumBuffer[GET_GROUP_IDX] = sum;\n" - " if( (2*gIdx) < cb.m_numElems )\n" - " {\n" - " dst[2*gIdx] = ldsData[2*lIdx];\n" - " }\n" - " if( (2*gIdx + 1) < cb.m_numElems )\n" - " {\n" - " dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "__kernel\n" - "void AddOffsetKernel(__global u32 *dst, __global u32 *blockSum, uint4 cb)\n" - "{\n" - " const u32 blockSize = WG_SIZE*2;\n" - " int myIdx = GET_GROUP_IDX+1;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " u32 iBlockSum = blockSum[myIdx];\n" - " int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n" - " for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE)\n" - " {\n" - " dst[i] += iBlockSum;\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "__kernel\n" - "void TopLevelScanKernel(__global u32* dst, uint4 cb)\n" - "{\n" - " __local u32 ldsData[2048];\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " int lSize = GET_GROUP_SIZE;\n" - " for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize )\n" - " {\n" - " ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " u32 sum = ScanExclusive(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" - " for(int i=lIdx; i<cb.m_numBlocks; i+=lSize )\n" - " {\n" - " dst[i] = ldsData[i];\n" - " }\n" - " if( gIdx == 0 )\n" - " {\n" - " dst[cb.m_numBlocks] = sum;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsFloat4CL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsFloat4CL.h deleted file mode 100644 index 15d1bc5195..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/PrefixScanKernelsFloat4CL.h +++ /dev/null @@ -1,128 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* prefixScanKernelsFloat4CL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "// takahiro end\n" - "#define WG_SIZE 128 \n" - "#define m_numElems x\n" - "#define m_numBlocks y\n" - "#define m_numScanBlocks z\n" - "/*typedef struct\n" - "{\n" - " uint m_numElems;\n" - " uint m_numBlocks;\n" - " uint m_numScanBlocks;\n" - " uint m_padding[1];\n" - "} ConstBuffer;\n" - "*/\n" - "float4 ScanExclusiveFloat4(__local float4* data, u32 n, int lIdx, int lSize)\n" - "{\n" - " float4 blocksum;\n" - " int offset = 1;\n" - " for(int nActive=n>>1; nActive>0; nActive>>=1, offset<<=1)\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for(int iIdx=lIdx; iIdx<nActive; iIdx+=lSize)\n" - " {\n" - " int ai = offset*(2*iIdx+1)-1;\n" - " int bi = offset*(2*iIdx+2)-1;\n" - " data[bi] += data[ai];\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " if( lIdx == 0 )\n" - " {\n" - " blocksum = data[ n-1 ];\n" - " data[ n-1 ] = 0;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " offset >>= 1;\n" - " for(int nActive=1; nActive<n; nActive<<=1, offset>>=1 )\n" - " {\n" - " GROUP_LDS_BARRIER;\n" - " for( int iIdx = lIdx; iIdx<nActive; iIdx += lSize )\n" - " {\n" - " int ai = offset*(2*iIdx+1)-1;\n" - " int bi = offset*(2*iIdx+2)-1;\n" - " float4 temp = data[ai];\n" - " data[ai] = data[bi];\n" - " data[bi] += temp;\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " return blocksum;\n" - "}\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "__kernel\n" - "void LocalScanKernel(__global float4* dst, __global float4* src, __global float4* sumBuffer, uint4 cb)\n" - "{\n" - " __local float4 ldsData[WG_SIZE*2];\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " ldsData[2*lIdx] = ( 2*gIdx < cb.m_numElems )? src[2*gIdx]: 0;\n" - " ldsData[2*lIdx + 1] = ( 2*gIdx+1 < cb.m_numElems )? src[2*gIdx + 1]: 0;\n" - " float4 sum = ScanExclusiveFloat4(ldsData, WG_SIZE*2, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" - " if( lIdx == 0 ) \n" - " sumBuffer[GET_GROUP_IDX] = sum;\n" - " if( (2*gIdx) < cb.m_numElems )\n" - " {\n" - " dst[2*gIdx] = ldsData[2*lIdx];\n" - " }\n" - " if( (2*gIdx + 1) < cb.m_numElems )\n" - " {\n" - " dst[2*gIdx + 1] = ldsData[2*lIdx + 1];\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "__kernel\n" - "void AddOffsetKernel(__global float4* dst, __global float4* blockSum, uint4 cb)\n" - "{\n" - " const u32 blockSize = WG_SIZE*2;\n" - " int myIdx = GET_GROUP_IDX+1;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " float4 iBlockSum = blockSum[myIdx];\n" - " int endValue = min((myIdx+1)*(blockSize), cb.m_numElems);\n" - " for(int i=myIdx*blockSize+lIdx; i<endValue; i+=GET_GROUP_SIZE)\n" - " {\n" - " dst[i] += iBlockSum;\n" - " }\n" - "}\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "__kernel\n" - "void TopLevelScanKernel(__global float4* dst, uint4 cb)\n" - "{\n" - " __local float4 ldsData[2048];\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " int lSize = GET_GROUP_SIZE;\n" - " for(int i=lIdx; i<cb.m_numScanBlocks; i+=lSize )\n" - " {\n" - " ldsData[i] = (i<cb.m_numBlocks)? dst[i]:0;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " float4 sum = ScanExclusiveFloat4(ldsData, cb.m_numScanBlocks, GET_LOCAL_IDX, GET_GROUP_SIZE);\n" - " for(int i=lIdx; i<cb.m_numBlocks; i+=lSize )\n" - " {\n" - " dst[i] = ldsData[i];\n" - " }\n" - " if( gIdx == 0 )\n" - " {\n" - " dst[cb.m_numBlocks] = sum;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl deleted file mode 100644 index 7402e2f3b3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32Kernels.cl +++ /dev/null @@ -1,1071 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Author Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable - -typedef unsigned int u32; -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AtomAdd(x, value) atom_add(&(x), value) - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - - -#define make_uint4 (uint4) -#define make_uint2 (uint2) -#define make_int2 (int2) - -#define WG_SIZE 64 -#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE) -#define BITS_PER_PASS 4 -#define NUM_BUCKET (1<<BITS_PER_PASS) -typedef uchar u8; - -// this isn't optimization for VLIW. But just reducing writes. -#define USE_2LEVEL_REDUCE 1 - -//#define CHECK_BOUNDARY 1 - -//#define NV_GPU 1 - - -// Cypress -#define nPerWI 16 -// Cayman -//#define nPerWI 20 - -#define m_n x -#define m_nWGs y -#define m_startBit z -#define m_nBlocksPerWG w - -/* -typedef struct -{ - int m_n; - int m_nWGs; - int m_startBit; - int m_nBlocksPerWG; -} ConstBuffer; -*/ - -typedef struct -{ - unsigned int m_key; - unsigned int m_value; -} SortDataCL; - - -uint prefixScanVectorEx( uint4* data ) -{ - u32 sum = 0; - u32 tmp = data[0].x; - data[0].x = sum; - sum += tmp; - tmp = data[0].y; - data[0].y = sum; - sum += tmp; - tmp = data[0].z; - data[0].z = sum; - sum += tmp; - tmp = data[0].w; - data[0].w = sum; - sum += tmp; - return sum; -} - -u32 localPrefixSum( u32 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory, int wgSize /*64 or 128*/ ) -{ - { // Set data - sorterSharedMemory[lIdx] = 0; - sorterSharedMemory[lIdx+wgSize] = pData; - } - - GROUP_LDS_BARRIER; - - { // Prefix sum - int idx = 2*lIdx + (wgSize+1); -#if defined(USE_2LEVEL_REDUCE) - if( lIdx < 64 ) - { - u32 u0, u1, u2; - u0 = sorterSharedMemory[idx-3]; - u1 = sorterSharedMemory[idx-2]; - u2 = sorterSharedMemory[idx-1]; - AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); - GROUP_MEM_FENCE; - - u0 = sorterSharedMemory[idx-12]; - u1 = sorterSharedMemory[idx-8]; - u2 = sorterSharedMemory[idx-4]; - AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); - GROUP_MEM_FENCE; - - u0 = sorterSharedMemory[idx-48]; - u1 = sorterSharedMemory[idx-32]; - u2 = sorterSharedMemory[idx-16]; - AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); - GROUP_MEM_FENCE; - if( wgSize > 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; - GROUP_MEM_FENCE; - } - - sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - } -#else - if( lIdx < 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-1]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-4]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-8]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-16]; - GROUP_MEM_FENCE; - sorterSharedMemory[idx] += sorterSharedMemory[idx-32]; - GROUP_MEM_FENCE; - if( wgSize > 64 ) - { - sorterSharedMemory[idx] += sorterSharedMemory[idx-64]; - GROUP_MEM_FENCE; - } - - sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2]; - GROUP_MEM_FENCE; - } -#endif - } - - GROUP_LDS_BARRIER; - - *totalSum = sorterSharedMemory[wgSize*2-1]; - u32 addValue = sorterSharedMemory[lIdx+wgSize-1]; - return addValue; -} - -//__attribute__((reqd_work_group_size(128,1,1))) -uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory ) -{ - u32 s4 = prefixScanVectorEx( &pData ); - u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 ); - return pData + make_uint4( rank, rank, rank, rank ); -} - - -//__attribute__((reqd_work_group_size(64,1,1))) -uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory ) -{ - u32 s4 = prefixScanVectorEx( &pData ); - u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 ); - return pData + make_uint4( rank, rank, rank, rank ); -} - -u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;} - -u32 bit8Scan(u32 v) -{ - return (v<<8) + (v<<16) + (v<<24); -} - -//=== - - - - -#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx] - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb ) -{ - __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - for(int i=0; i<NUM_BUCKET; i++) - { - MY_HISTOGRAM(i) = 0; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - u32 localKey; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - { -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - { - localKey = (gSrc[addr+i]>>startBit) & 0xf; -#if defined(NV_GPU) - MY_HISTOGRAM( localKey )++; -#else - AtomInc( MY_HISTOGRAM( localKey ) ); -#endif - } - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<GET_GROUP_SIZE; i++) - { - sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE]; - } - histogramOut[lIdx*nWGs+wgIdx] = sum; - } -} - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void StreamCountSortDataKernel( __global SortDataCL* gSrc, __global u32* histogramOut, int4 cb ) -{ - __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - for(int i=0; i<NUM_BUCKET; i++) - { - MY_HISTOGRAM(i) = 0; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - u32 localKey; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD - // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops - // AMD: AtomInc performs better while NV prefers ++ - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - { -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - { - localKey = (gSrc[addr+i].m_key>>startBit) & 0xf; -#if defined(NV_GPU) - MY_HISTOGRAM( localKey )++; -#else - AtomInc( MY_HISTOGRAM( localKey ) ); -#endif - } - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<GET_GROUP_SIZE; i++) - { - sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE]; - } - histogramOut[lIdx*nWGs+wgIdx] = sum; - } -} - -#define nPerLane (nPerWI/4) - -// NUM_BUCKET*nWGs < 128*nPerWI -__kernel -__attribute__((reqd_work_group_size(128,1,1))) -void PrefixScanKernel( __global u32* wHistogram1, int4 cb ) -{ - __local u32 ldsTopScanData[128*2]; - - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - const int nWGs = cb.m_nWGs; - - u32 data[nPerWI]; - for(int i=0; i<nPerWI; i++) - { - data[i] = 0; - if( (nPerWI*lIdx+i) < NUM_BUCKET*nWGs ) - data[i] = wHistogram1[nPerWI*lIdx+i]; - } - - uint4 myData = make_uint4(0,0,0,0); - - for(int i=0; i<nPerLane; i++) - { - myData.x += data[nPerLane*0+i]; - myData.y += data[nPerLane*1+i]; - myData.z += data[nPerLane*2+i]; - myData.w += data[nPerLane*3+i]; - } - - uint totalSum; - uint4 scanned = localPrefixSum128V( myData, lIdx, &totalSum, ldsTopScanData ); - -// for(int j=0; j<4; j++) // somehow it introduces a lot of branches - { int j = 0; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - { int j = 1; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - { int j = 2; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - { int j = 3; - u32 sum = 0; - for(int i=0; i<nPerLane; i++) - { - u32 tmp = data[nPerLane*j+i]; - data[nPerLane*j+i] = sum; - sum += tmp; - } - } - - for(int i=0; i<nPerLane; i++) - { - data[nPerLane*0+i] += scanned.x; - data[nPerLane*1+i] += scanned.y; - data[nPerLane*2+i] += scanned.z; - data[nPerLane*3+i] += scanned.w; - } - - for(int i=0; i<nPerWI; i++) - { - int index = nPerWI*lIdx+i; - if (index < NUM_BUCKET*nWGs) - wHistogram1[nPerWI*lIdx+i] = data[i]; - } -} - -// 4 scan, 4 exchange -void sort4Bits(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData) -{ - for(int bitIdx=0; bitIdx<BITS_PER_PASS; bitIdx++) - { - u32 mask = (1<<bitIdx); - uint4 cmpResult = make_uint4( (sortData[0]>>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask ); - uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) ); - u32 total; - prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData ); - { - uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3); - uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total ); - dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) ); - - GROUP_LDS_BARRIER; - - ldsSortData[dstAddr.x] = sortData[0]; - ldsSortData[dstAddr.y] = sortData[1]; - ldsSortData[dstAddr.z] = sortData[2]; - ldsSortData[dstAddr.w] = sortData[3]; - - GROUP_LDS_BARRIER; - - sortData[0] = ldsSortData[localAddr.x]; - sortData[1] = ldsSortData[localAddr.y]; - sortData[2] = ldsSortData[localAddr.z]; - sortData[3] = ldsSortData[localAddr.w]; - - GROUP_LDS_BARRIER; - } - } -} - -// 2 scan, 2 exchange -void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData) -{ - for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2) - { - uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, - (sortData[1]>>(startBit+ibit)) & 0x3, - (sortData[2]>>(startBit+ibit)) & 0x3, - (sortData[3]>>(startBit+ibit)) & 0x3); - - u32 key4; - u32 sKeyPacked[4] = { 0, 0, 0, 0 }; - { - sKeyPacked[0] |= 1<<(8*b.x); - sKeyPacked[1] |= 1<<(8*b.y); - sKeyPacked[2] |= 1<<(8*b.z); - sKeyPacked[3] |= 1<<(8*b.w); - - key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; - } - - u32 rankPacked; - u32 sumPacked; - { - rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); - } - - GROUP_LDS_BARRIER; - - u32 newOffset[4] = { 0,0,0,0 }; - { - u32 sumScanned = bit8Scan( sumPacked ); - - u32 scannedKeys[4]; - scannedKeys[0] = 1<<(8*b.x); - scannedKeys[1] = 1<<(8*b.y); - scannedKeys[2] = 1<<(8*b.z); - scannedKeys[3] = 1<<(8*b.w); - { // 4 scans at once - u32 sum4 = 0; - for(int ie=0; ie<4; ie++) - { - u32 tmp = scannedKeys[ie]; - scannedKeys[ie] = sum4; - sum4 += tmp; - } - } - - { - u32 sumPlusRank = sumScanned + rankPacked; - { u32 ie = b.x; - scannedKeys[0] += sumPlusRank; - newOffset[0] = unpack4Key( scannedKeys[0], ie ); - } - { u32 ie = b.y; - scannedKeys[1] += sumPlusRank; - newOffset[1] = unpack4Key( scannedKeys[1], ie ); - } - { u32 ie = b.z; - scannedKeys[2] += sumPlusRank; - newOffset[2] = unpack4Key( scannedKeys[2], ie ); - } - { u32 ie = b.w; - scannedKeys[3] += sumPlusRank; - newOffset[3] = unpack4Key( scannedKeys[3], ie ); - } - } - } - - - GROUP_LDS_BARRIER; - - { - ldsSortData[newOffset[0]] = sortData[0]; - ldsSortData[newOffset[1]] = sortData[1]; - ldsSortData[newOffset[2]] = sortData[2]; - ldsSortData[newOffset[3]] = sortData[3]; - - GROUP_LDS_BARRIER; - - u32 dstAddr = 4*lIdx; - sortData[0] = ldsSortData[dstAddr+0]; - sortData[1] = ldsSortData[dstAddr+1]; - sortData[2] = ldsSortData[dstAddr+2]; - sortData[3] = ldsSortData[dstAddr+3]; - - GROUP_LDS_BARRIER; - } - } -} - -#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key] - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb ) -{ - __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local u32 localHistogramToCarry[NUM_BUCKET]; - __local u32 localHistogram[NUM_BUCKET*2]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int startBit = cb.m_startBit; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - if( lIdx < (NUM_BUCKET) ) - { - localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; - } - - GROUP_LDS_BARRIER; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - u32 myHistogram = 0; - - u32 sortData[ELEMENTS_PER_WORK_ITEM]; - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - sortData[i] = ( addr+i < n )? gSrc[ addr+i ] : 0xffffffff; -#else - sortData[i] = gSrc[ addr+i ]; -#endif - - sort4Bits(sortData, startBit, lIdx, ldsSortData); - - u32 keys[ELEMENTS_PER_WORK_ITEM]; - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - keys[i] = (sortData[i]>>startBit) & 0xf; - - { // create histogram - u32 setIdx = lIdx/16; - if( lIdx < NUM_BUCKET ) - { - localHistogram[lIdx] = 0; - } - ldsSortData[lIdx] = 0; - GROUP_LDS_BARRIER; - - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - -#if defined(NV_GPU) - SET_HISTOGRAM( setIdx, keys[i] )++; -#else - AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) ); -#endif - - GROUP_LDS_BARRIER; - - uint hIdx = NUM_BUCKET+lIdx; - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<WG_SIZE/16; i++) - { - sum += SET_HISTOGRAM( i, lIdx ); - } - myHistogram = sum; - localHistogram[hIdx] = sum; - } - GROUP_LDS_BARRIER; - -#if defined(USE_2LEVEL_REDUCE) - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - - u32 u0, u1, u2; - u0 = localHistogram[hIdx-3]; - u1 = localHistogram[hIdx-2]; - u2 = localHistogram[hIdx-1]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - u0 = localHistogram[hIdx-12]; - u1 = localHistogram[hIdx-8]; - u2 = localHistogram[hIdx-4]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - } -#else - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-2]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-4]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-8]; - GROUP_MEM_FENCE; - } -#endif - GROUP_LDS_BARRIER; - } - - { - for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++) - { - int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie; - int binIdx = keys[ie]; - int groupOffset = localHistogramToCarry[binIdx]; - int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx]; -#if defined(CHECK_BOUNDARY) - if( addr+ie < n ) -#endif - gDst[ groupOffset + myIdx ] = sortData[ie]; - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - localHistogramToCarry[lIdx] += myHistogram; - } - GROUP_LDS_BARRIER; - } -} - -// 2 scan, 2 exchange -void sort4Bits1KeyValue(u32 sortData[4], int sortVal[4], int startBit, int lIdx, __local u32* ldsSortData, __local int *ldsSortVal) -{ - for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2) - { - uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, - (sortData[1]>>(startBit+ibit)) & 0x3, - (sortData[2]>>(startBit+ibit)) & 0x3, - (sortData[3]>>(startBit+ibit)) & 0x3); - - u32 key4; - u32 sKeyPacked[4] = { 0, 0, 0, 0 }; - { - sKeyPacked[0] |= 1<<(8*b.x); - sKeyPacked[1] |= 1<<(8*b.y); - sKeyPacked[2] |= 1<<(8*b.z); - sKeyPacked[3] |= 1<<(8*b.w); - - key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3]; - } - - u32 rankPacked; - u32 sumPacked; - { - rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE ); - } - - GROUP_LDS_BARRIER; - - u32 newOffset[4] = { 0,0,0,0 }; - { - u32 sumScanned = bit8Scan( sumPacked ); - - u32 scannedKeys[4]; - scannedKeys[0] = 1<<(8*b.x); - scannedKeys[1] = 1<<(8*b.y); - scannedKeys[2] = 1<<(8*b.z); - scannedKeys[3] = 1<<(8*b.w); - { // 4 scans at once - u32 sum4 = 0; - for(int ie=0; ie<4; ie++) - { - u32 tmp = scannedKeys[ie]; - scannedKeys[ie] = sum4; - sum4 += tmp; - } - } - - { - u32 sumPlusRank = sumScanned + rankPacked; - { u32 ie = b.x; - scannedKeys[0] += sumPlusRank; - newOffset[0] = unpack4Key( scannedKeys[0], ie ); - } - { u32 ie = b.y; - scannedKeys[1] += sumPlusRank; - newOffset[1] = unpack4Key( scannedKeys[1], ie ); - } - { u32 ie = b.z; - scannedKeys[2] += sumPlusRank; - newOffset[2] = unpack4Key( scannedKeys[2], ie ); - } - { u32 ie = b.w; - scannedKeys[3] += sumPlusRank; - newOffset[3] = unpack4Key( scannedKeys[3], ie ); - } - } - } - - - GROUP_LDS_BARRIER; - - { - ldsSortData[newOffset[0]] = sortData[0]; - ldsSortData[newOffset[1]] = sortData[1]; - ldsSortData[newOffset[2]] = sortData[2]; - ldsSortData[newOffset[3]] = sortData[3]; - - ldsSortVal[newOffset[0]] = sortVal[0]; - ldsSortVal[newOffset[1]] = sortVal[1]; - ldsSortVal[newOffset[2]] = sortVal[2]; - ldsSortVal[newOffset[3]] = sortVal[3]; - - GROUP_LDS_BARRIER; - - u32 dstAddr = 4*lIdx; - sortData[0] = ldsSortData[dstAddr+0]; - sortData[1] = ldsSortData[dstAddr+1]; - sortData[2] = ldsSortData[dstAddr+2]; - sortData[3] = ldsSortData[dstAddr+3]; - - sortVal[0] = ldsSortVal[dstAddr+0]; - sortVal[1] = ldsSortVal[dstAddr+1]; - sortVal[2] = ldsSortVal[dstAddr+2]; - sortVal[3] = ldsSortVal[dstAddr+3]; - - GROUP_LDS_BARRIER; - } - } -} - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb) -{ - __local int ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16]; - __local u32 localHistogramToCarry[NUM_BUCKET]; - __local u32 localHistogram[NUM_BUCKET*2]; - - u32 gIdx = GET_GLOBAL_IDX; - u32 lIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int startBit = cb.m_startBit; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - if( lIdx < (NUM_BUCKET) ) - { - localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx]; - } - - GROUP_LDS_BARRIER; - - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = n/blockSize - nBlocksPerWG*wgIdx; - - int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize) - { - - u32 myHistogram = 0; - - int sortData[ELEMENTS_PER_WORK_ITEM]; - int sortVal[ELEMENTS_PER_WORK_ITEM]; - - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - { - sortData[i] = ( addr+i < n )? gSrc[ addr+i ].m_key : 0xffffffff; - sortVal[i] = ( addr+i < n )? gSrc[ addr+i ].m_value : 0xffffffff; - } -#else - { - sortData[i] = gSrc[ addr+i ].m_key; - sortVal[i] = gSrc[ addr+i ].m_value; - } -#endif - - sort4Bits1KeyValue(sortData, sortVal, startBit, lIdx, ldsSortData, ldsSortVal); - - u32 keys[ELEMENTS_PER_WORK_ITEM]; - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) - keys[i] = (sortData[i]>>startBit) & 0xf; - - { // create histogram - u32 setIdx = lIdx/16; - if( lIdx < NUM_BUCKET ) - { - localHistogram[lIdx] = 0; - } - ldsSortData[lIdx] = 0; - GROUP_LDS_BARRIER; - - for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++) -#if defined(CHECK_BOUNDARY) - if( addr+i < n ) -#endif - -#if defined(NV_GPU) - SET_HISTOGRAM( setIdx, keys[i] )++; -#else - AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) ); -#endif - - GROUP_LDS_BARRIER; - - uint hIdx = NUM_BUCKET+lIdx; - if( lIdx < NUM_BUCKET ) - { - u32 sum = 0; - for(int i=0; i<WG_SIZE/16; i++) - { - sum += SET_HISTOGRAM( i, lIdx ); - } - myHistogram = sum; - localHistogram[hIdx] = sum; - } - GROUP_LDS_BARRIER; - -#if defined(USE_2LEVEL_REDUCE) - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - - u32 u0, u1, u2; - u0 = localHistogram[hIdx-3]; - u1 = localHistogram[hIdx-2]; - u2 = localHistogram[hIdx-1]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - u0 = localHistogram[hIdx-12]; - u1 = localHistogram[hIdx-8]; - u2 = localHistogram[hIdx-4]; - AtomAdd( localHistogram[hIdx], u0 + u1 + u2 ); - GROUP_MEM_FENCE; - } -#else - if( lIdx < NUM_BUCKET ) - { - localHistogram[hIdx] = localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-1]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-2]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-4]; - GROUP_MEM_FENCE; - localHistogram[hIdx] += localHistogram[hIdx-8]; - GROUP_MEM_FENCE; - } -#endif - GROUP_LDS_BARRIER; - } - - { - for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++) - { - int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie; - int binIdx = keys[ie]; - int groupOffset = localHistogramToCarry[binIdx]; - int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx]; -#if defined(CHECK_BOUNDARY) - if( addr+ie < n ) - { - if ((groupOffset + myIdx)<n) - { - if (sortData[ie]==sortVal[ie]) - { - - SortDataCL tmp; - tmp.m_key = sortData[ie]; - tmp.m_value = sortVal[ie]; - if (tmp.m_key == tmp.m_value) - gDst[groupOffset + myIdx ] = tmp; - } - - } - } -#else - if ((groupOffset + myIdx)<n) - { - gDst[ groupOffset + myIdx ].m_key = sortData[ie]; - gDst[ groupOffset + myIdx ].m_value = sortVal[ie]; - } -#endif - } - } - - GROUP_LDS_BARRIER; - - if( lIdx < NUM_BUCKET ) - { - localHistogramToCarry[lIdx] += myHistogram; - } - GROUP_LDS_BARRIER; - } -} - - - - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterSortDataKernelSerial( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb) -{ - - u32 gIdx = GET_GLOBAL_IDX; - u32 realLocalIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - int counter[NUM_BUCKET]; - - if (realLocalIdx>0) - return; - - for (int c=0;c<NUM_BUCKET;c++) - counter[c]=0; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx=0;lIdx<WG_SIZE;lIdx++) - { - int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++) - { - int i = addr2+j; - if( i < n ) - { - int tableIdx; - tableIdx = (gSrc[i].m_key>>startBit) & 0xf;//0xf = NUM_TABLES-1 - gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i]; - counter[tableIdx] ++; - } - } - } - } - -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb ) -{ - - u32 gIdx = GET_GLOBAL_IDX; - u32 realLocalIdx = GET_LOCAL_IDX; - u32 wgIdx = GET_GROUP_IDX; - u32 wgSize = GET_GROUP_SIZE; - const int startBit = cb.m_startBit; - const int n = cb.m_n; - const int nWGs = cb.m_nWGs; - const int nBlocksPerWG = cb.m_nBlocksPerWG; - - int counter[NUM_BUCKET]; - - if (realLocalIdx>0) - return; - - for (int c=0;c<NUM_BUCKET;c++) - counter[c]=0; - - const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE; - - int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx; - - for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++) - { - for (int lIdx=0;lIdx<WG_SIZE;lIdx++) - { - int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx; - - for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++) - { - int i = addr2+j; - if( i < n ) - { - int tableIdx; - tableIdx = (gSrc[i]>>startBit) & 0xf;//0xf = NUM_TABLES-1 - gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i]; - counter[tableIdx] ++; - } - } - } - } - -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h deleted file mode 100644 index fb4bdda303..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/kernels/RadixSort32KernelsCL.h +++ /dev/null @@ -1,909 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* radixSort32KernelsCL = - "/*\n" - "Bullet Continuous Collision Detection and Physics Library\n" - "Copyright (c) 2011 Advanced Micro Devices, Inc. http://bulletphysics.org\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Author Takahiro Harada\n" - "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "typedef unsigned int u32;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_uint4 (uint4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define WG_SIZE 64\n" - "#define ELEMENTS_PER_WORK_ITEM (256/WG_SIZE)\n" - "#define BITS_PER_PASS 4\n" - "#define NUM_BUCKET (1<<BITS_PER_PASS)\n" - "typedef uchar u8;\n" - "// this isn't optimization for VLIW. But just reducing writes. \n" - "#define USE_2LEVEL_REDUCE 1\n" - "//#define CHECK_BOUNDARY 1\n" - "//#define NV_GPU 1\n" - "// Cypress\n" - "#define nPerWI 16\n" - "// Cayman\n" - "//#define nPerWI 20\n" - "#define m_n x\n" - "#define m_nWGs y\n" - "#define m_startBit z\n" - "#define m_nBlocksPerWG w\n" - "/*\n" - "typedef struct\n" - "{\n" - " int m_n;\n" - " int m_nWGs;\n" - " int m_startBit;\n" - " int m_nBlocksPerWG;\n" - "} ConstBuffer;\n" - "*/\n" - "typedef struct\n" - "{\n" - " unsigned int m_key;\n" - " unsigned int m_value;\n" - "} SortDataCL;\n" - "uint prefixScanVectorEx( uint4* data )\n" - "{\n" - " u32 sum = 0;\n" - " u32 tmp = data[0].x;\n" - " data[0].x = sum;\n" - " sum += tmp;\n" - " tmp = data[0].y;\n" - " data[0].y = sum;\n" - " sum += tmp;\n" - " tmp = data[0].z;\n" - " data[0].z = sum;\n" - " sum += tmp;\n" - " tmp = data[0].w;\n" - " data[0].w = sum;\n" - " sum += tmp;\n" - " return sum;\n" - "}\n" - "u32 localPrefixSum( u32 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory, int wgSize /*64 or 128*/ )\n" - "{\n" - " { // Set data\n" - " sorterSharedMemory[lIdx] = 0;\n" - " sorterSharedMemory[lIdx+wgSize] = pData;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " { // Prefix sum\n" - " int idx = 2*lIdx + (wgSize+1);\n" - "#if defined(USE_2LEVEL_REDUCE)\n" - " if( lIdx < 64 )\n" - " {\n" - " u32 u0, u1, u2;\n" - " u0 = sorterSharedMemory[idx-3];\n" - " u1 = sorterSharedMemory[idx-2];\n" - " u2 = sorterSharedMemory[idx-1];\n" - " AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n" - " GROUP_MEM_FENCE;\n" - " u0 = sorterSharedMemory[idx-12];\n" - " u1 = sorterSharedMemory[idx-8];\n" - " u2 = sorterSharedMemory[idx-4];\n" - " AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n" - " GROUP_MEM_FENCE;\n" - " u0 = sorterSharedMemory[idx-48];\n" - " u1 = sorterSharedMemory[idx-32];\n" - " u2 = sorterSharedMemory[idx-16];\n" - " AtomAdd( sorterSharedMemory[idx], u0+u1+u2 ); \n" - " GROUP_MEM_FENCE;\n" - " if( wgSize > 64 )\n" - " {\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - " sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#else\n" - " if( lIdx < 64 )\n" - " {\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-1];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-2]; \n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-4];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-8];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-16];\n" - " GROUP_MEM_FENCE;\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-32];\n" - " GROUP_MEM_FENCE;\n" - " if( wgSize > 64 )\n" - " {\n" - " sorterSharedMemory[idx] += sorterSharedMemory[idx-64];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - " sorterSharedMemory[idx-1] += sorterSharedMemory[idx-2];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#endif\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " *totalSum = sorterSharedMemory[wgSize*2-1];\n" - " u32 addValue = sorterSharedMemory[lIdx+wgSize-1];\n" - " return addValue;\n" - "}\n" - "//__attribute__((reqd_work_group_size(128,1,1)))\n" - "uint4 localPrefixSum128V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n" - "{\n" - " u32 s4 = prefixScanVectorEx( &pData );\n" - " u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 128 );\n" - " return pData + make_uint4( rank, rank, rank, rank );\n" - "}\n" - "//__attribute__((reqd_work_group_size(64,1,1)))\n" - "uint4 localPrefixSum64V( uint4 pData, uint lIdx, uint* totalSum, __local u32* sorterSharedMemory )\n" - "{\n" - " u32 s4 = prefixScanVectorEx( &pData );\n" - " u32 rank = localPrefixSum( s4, lIdx, totalSum, sorterSharedMemory, 64 );\n" - " return pData + make_uint4( rank, rank, rank, rank );\n" - "}\n" - "u32 unpack4Key( u32 key, int keyIdx ){ return (key>>(keyIdx*8)) & 0xff;}\n" - "u32 bit8Scan(u32 v)\n" - "{\n" - " return (v<<8) + (v<<16) + (v<<24);\n" - "}\n" - "//===\n" - "#define MY_HISTOGRAM(idx) localHistogramMat[(idx)*WG_SIZE+lIdx]\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void StreamCountKernel( __global u32* gSrc, __global u32* histogramOut, int4 cb )\n" - "{\n" - " __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int startBit = cb.m_startBit;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " for(int i=0; i<NUM_BUCKET; i++)\n" - " {\n" - " MY_HISTOGRAM(i) = 0;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " u32 localKey;\n" - " int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" - " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" - " {\n" - " // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD\n" - " // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops\n" - " // AMD: AtomInc performs better while NV prefers ++\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - " {\n" - "#if defined(CHECK_BOUNDARY)\n" - " if( addr+i < n )\n" - "#endif\n" - " {\n" - " localKey = (gSrc[addr+i]>>startBit) & 0xf;\n" - "#if defined(NV_GPU)\n" - " MY_HISTOGRAM( localKey )++;\n" - "#else\n" - " AtomInc( MY_HISTOGRAM( localKey ) );\n" - "#endif\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " u32 sum = 0;\n" - " for(int i=0; i<GET_GROUP_SIZE; i++)\n" - " {\n" - " sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE];\n" - " }\n" - " histogramOut[lIdx*nWGs+wgIdx] = sum;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void StreamCountSortDataKernel( __global SortDataCL* gSrc, __global u32* histogramOut, int4 cb )\n" - "{\n" - " __local u32 localHistogramMat[NUM_BUCKET*WG_SIZE];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int startBit = cb.m_startBit;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " for(int i=0; i<NUM_BUCKET; i++)\n" - " {\n" - " MY_HISTOGRAM(i) = 0;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " u32 localKey;\n" - " int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" - " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" - " {\n" - " // MY_HISTOGRAM( localKeys.x ) ++ is much expensive than atomic add as it requires read and write while atomics can just add on AMD\n" - " // Using registers didn't perform well. It seems like use localKeys to address requires a lot of alu ops\n" - " // AMD: AtomInc performs better while NV prefers ++\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - " {\n" - "#if defined(CHECK_BOUNDARY)\n" - " if( addr+i < n )\n" - "#endif\n" - " {\n" - " localKey = (gSrc[addr+i].m_key>>startBit) & 0xf;\n" - "#if defined(NV_GPU)\n" - " MY_HISTOGRAM( localKey )++;\n" - "#else\n" - " AtomInc( MY_HISTOGRAM( localKey ) );\n" - "#endif\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " u32 sum = 0;\n" - " for(int i=0; i<GET_GROUP_SIZE; i++)\n" - " {\n" - " sum += localHistogramMat[lIdx*WG_SIZE+(i+lIdx)%GET_GROUP_SIZE];\n" - " }\n" - " histogramOut[lIdx*nWGs+wgIdx] = sum;\n" - " }\n" - "}\n" - "#define nPerLane (nPerWI/4)\n" - "// NUM_BUCKET*nWGs < 128*nPerWI\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(128,1,1)))\n" - "void PrefixScanKernel( __global u32* wHistogram1, int4 cb )\n" - "{\n" - " __local u32 ldsTopScanData[128*2];\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " const int nWGs = cb.m_nWGs;\n" - " u32 data[nPerWI];\n" - " for(int i=0; i<nPerWI; i++)\n" - " {\n" - " data[i] = 0;\n" - " if( (nPerWI*lIdx+i) < NUM_BUCKET*nWGs )\n" - " data[i] = wHistogram1[nPerWI*lIdx+i];\n" - " }\n" - " uint4 myData = make_uint4(0,0,0,0);\n" - " for(int i=0; i<nPerLane; i++)\n" - " {\n" - " myData.x += data[nPerLane*0+i];\n" - " myData.y += data[nPerLane*1+i];\n" - " myData.z += data[nPerLane*2+i];\n" - " myData.w += data[nPerLane*3+i];\n" - " }\n" - " uint totalSum;\n" - " uint4 scanned = localPrefixSum128V( myData, lIdx, &totalSum, ldsTopScanData );\n" - "// for(int j=0; j<4; j++) // somehow it introduces a lot of branches\n" - " { int j = 0;\n" - " u32 sum = 0;\n" - " for(int i=0; i<nPerLane; i++)\n" - " {\n" - " u32 tmp = data[nPerLane*j+i];\n" - " data[nPerLane*j+i] = sum;\n" - " sum += tmp;\n" - " }\n" - " }\n" - " { int j = 1;\n" - " u32 sum = 0;\n" - " for(int i=0; i<nPerLane; i++)\n" - " {\n" - " u32 tmp = data[nPerLane*j+i];\n" - " data[nPerLane*j+i] = sum;\n" - " sum += tmp;\n" - " }\n" - " }\n" - " { int j = 2;\n" - " u32 sum = 0;\n" - " for(int i=0; i<nPerLane; i++)\n" - " {\n" - " u32 tmp = data[nPerLane*j+i];\n" - " data[nPerLane*j+i] = sum;\n" - " sum += tmp;\n" - " }\n" - " }\n" - " { int j = 3;\n" - " u32 sum = 0;\n" - " for(int i=0; i<nPerLane; i++)\n" - " {\n" - " u32 tmp = data[nPerLane*j+i];\n" - " data[nPerLane*j+i] = sum;\n" - " sum += tmp;\n" - " }\n" - " }\n" - " for(int i=0; i<nPerLane; i++)\n" - " {\n" - " data[nPerLane*0+i] += scanned.x;\n" - " data[nPerLane*1+i] += scanned.y;\n" - " data[nPerLane*2+i] += scanned.z;\n" - " data[nPerLane*3+i] += scanned.w;\n" - " }\n" - " for(int i=0; i<nPerWI; i++)\n" - " {\n" - " int index = nPerWI*lIdx+i;\n" - " if (index < NUM_BUCKET*nWGs)\n" - " wHistogram1[nPerWI*lIdx+i] = data[i];\n" - " }\n" - "}\n" - "// 4 scan, 4 exchange\n" - "void sort4Bits(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n" - "{\n" - " for(int bitIdx=0; bitIdx<BITS_PER_PASS; bitIdx++)\n" - " {\n" - " u32 mask = (1<<bitIdx);\n" - " uint4 cmpResult = make_uint4( (sortData[0]>>startBit) & mask, (sortData[1]>>startBit) & mask, (sortData[2]>>startBit) & mask, (sortData[3]>>startBit) & mask );\n" - " uint4 prefixSum = SELECT_UINT4( make_uint4(1,1,1,1), make_uint4(0,0,0,0), cmpResult != make_uint4(0,0,0,0) );\n" - " u32 total;\n" - " prefixSum = localPrefixSum64V( prefixSum, lIdx, &total, ldsSortData );\n" - " {\n" - " uint4 localAddr = make_uint4(lIdx*4+0,lIdx*4+1,lIdx*4+2,lIdx*4+3);\n" - " uint4 dstAddr = localAddr - prefixSum + make_uint4( total, total, total, total );\n" - " dstAddr = SELECT_UINT4( prefixSum, dstAddr, cmpResult != make_uint4(0, 0, 0, 0) );\n" - " GROUP_LDS_BARRIER;\n" - " ldsSortData[dstAddr.x] = sortData[0];\n" - " ldsSortData[dstAddr.y] = sortData[1];\n" - " ldsSortData[dstAddr.z] = sortData[2];\n" - " ldsSortData[dstAddr.w] = sortData[3];\n" - " GROUP_LDS_BARRIER;\n" - " sortData[0] = ldsSortData[localAddr.x];\n" - " sortData[1] = ldsSortData[localAddr.y];\n" - " sortData[2] = ldsSortData[localAddr.z];\n" - " sortData[3] = ldsSortData[localAddr.w];\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " }\n" - "}\n" - "// 2 scan, 2 exchange\n" - "void sort4Bits1(u32 sortData[4], int startBit, int lIdx, __local u32* ldsSortData)\n" - "{\n" - " for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2)\n" - " {\n" - " uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, \n" - " (sortData[1]>>(startBit+ibit)) & 0x3, \n" - " (sortData[2]>>(startBit+ibit)) & 0x3, \n" - " (sortData[3]>>(startBit+ibit)) & 0x3);\n" - " u32 key4;\n" - " u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" - " {\n" - " sKeyPacked[0] |= 1<<(8*b.x);\n" - " sKeyPacked[1] |= 1<<(8*b.y);\n" - " sKeyPacked[2] |= 1<<(8*b.z);\n" - " sKeyPacked[3] |= 1<<(8*b.w);\n" - " key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" - " }\n" - " u32 rankPacked;\n" - " u32 sumPacked;\n" - " {\n" - " rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " u32 newOffset[4] = { 0,0,0,0 };\n" - " {\n" - " u32 sumScanned = bit8Scan( sumPacked );\n" - " u32 scannedKeys[4];\n" - " scannedKeys[0] = 1<<(8*b.x);\n" - " scannedKeys[1] = 1<<(8*b.y);\n" - " scannedKeys[2] = 1<<(8*b.z);\n" - " scannedKeys[3] = 1<<(8*b.w);\n" - " { // 4 scans at once\n" - " u32 sum4 = 0;\n" - " for(int ie=0; ie<4; ie++)\n" - " {\n" - " u32 tmp = scannedKeys[ie];\n" - " scannedKeys[ie] = sum4;\n" - " sum4 += tmp;\n" - " }\n" - " }\n" - " {\n" - " u32 sumPlusRank = sumScanned + rankPacked;\n" - " { u32 ie = b.x;\n" - " scannedKeys[0] += sumPlusRank;\n" - " newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" - " }\n" - " { u32 ie = b.y;\n" - " scannedKeys[1] += sumPlusRank;\n" - " newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" - " }\n" - " { u32 ie = b.z;\n" - " scannedKeys[2] += sumPlusRank;\n" - " newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" - " }\n" - " { u32 ie = b.w;\n" - " scannedKeys[3] += sumPlusRank;\n" - " newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " {\n" - " ldsSortData[newOffset[0]] = sortData[0];\n" - " ldsSortData[newOffset[1]] = sortData[1];\n" - " ldsSortData[newOffset[2]] = sortData[2];\n" - " ldsSortData[newOffset[3]] = sortData[3];\n" - " GROUP_LDS_BARRIER;\n" - " u32 dstAddr = 4*lIdx;\n" - " sortData[0] = ldsSortData[dstAddr+0];\n" - " sortData[1] = ldsSortData[dstAddr+1];\n" - " sortData[2] = ldsSortData[dstAddr+2];\n" - " sortData[3] = ldsSortData[dstAddr+3];\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " }\n" - "}\n" - "#define SET_HISTOGRAM(setIdx, key) ldsSortData[(setIdx)*NUM_BUCKET+key]\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterKernel( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" - "{\n" - " __local u32 ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" - " __local u32 localHistogramToCarry[NUM_BUCKET];\n" - " __local u32 localHistogram[NUM_BUCKET*2];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int startBit = cb.m_startBit;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " if( lIdx < (NUM_BUCKET) )\n" - " {\n" - " localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" - " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" - " {\n" - " u32 myHistogram = 0;\n" - " u32 sortData[ELEMENTS_PER_WORK_ITEM];\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - "#if defined(CHECK_BOUNDARY)\n" - " sortData[i] = ( addr+i < n )? gSrc[ addr+i ] : 0xffffffff;\n" - "#else\n" - " sortData[i] = gSrc[ addr+i ];\n" - "#endif\n" - " sort4Bits(sortData, startBit, lIdx, ldsSortData);\n" - " u32 keys[ELEMENTS_PER_WORK_ITEM];\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - " keys[i] = (sortData[i]>>startBit) & 0xf;\n" - " { // create histogram\n" - " u32 setIdx = lIdx/16;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[lIdx] = 0;\n" - " }\n" - " ldsSortData[lIdx] = 0;\n" - " GROUP_LDS_BARRIER;\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - "#if defined(CHECK_BOUNDARY)\n" - " if( addr+i < n )\n" - "#endif\n" - "#if defined(NV_GPU)\n" - " SET_HISTOGRAM( setIdx, keys[i] )++;\n" - "#else\n" - " AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) );\n" - "#endif\n" - " \n" - " GROUP_LDS_BARRIER;\n" - " \n" - " uint hIdx = NUM_BUCKET+lIdx;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " u32 sum = 0;\n" - " for(int i=0; i<WG_SIZE/16; i++)\n" - " {\n" - " sum += SET_HISTOGRAM( i, lIdx );\n" - " }\n" - " myHistogram = sum;\n" - " localHistogram[hIdx] = sum;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - "#if defined(USE_2LEVEL_REDUCE)\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[hIdx] = localHistogram[hIdx-1];\n" - " GROUP_MEM_FENCE;\n" - " u32 u0, u1, u2;\n" - " u0 = localHistogram[hIdx-3];\n" - " u1 = localHistogram[hIdx-2];\n" - " u2 = localHistogram[hIdx-1];\n" - " AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" - " GROUP_MEM_FENCE;\n" - " u0 = localHistogram[hIdx-12];\n" - " u1 = localHistogram[hIdx-8];\n" - " u2 = localHistogram[hIdx-4];\n" - " AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#else\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[hIdx] = localHistogram[hIdx-1];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-1];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-2];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-4];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-8];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#endif\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " {\n" - " for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++)\n" - " {\n" - " int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie;\n" - " int binIdx = keys[ie];\n" - " int groupOffset = localHistogramToCarry[binIdx];\n" - " int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx];\n" - "#if defined(CHECK_BOUNDARY)\n" - " if( addr+ie < n )\n" - "#endif\n" - " gDst[ groupOffset + myIdx ] = sortData[ie];\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogramToCarry[lIdx] += myHistogram;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - "}\n" - "// 2 scan, 2 exchange\n" - "void sort4Bits1KeyValue(u32 sortData[4], int sortVal[4], int startBit, int lIdx, __local u32* ldsSortData, __local int *ldsSortVal)\n" - "{\n" - " for(uint ibit=0; ibit<BITS_PER_PASS; ibit+=2)\n" - " {\n" - " uint4 b = make_uint4((sortData[0]>>(startBit+ibit)) & 0x3, \n" - " (sortData[1]>>(startBit+ibit)) & 0x3, \n" - " (sortData[2]>>(startBit+ibit)) & 0x3, \n" - " (sortData[3]>>(startBit+ibit)) & 0x3);\n" - " u32 key4;\n" - " u32 sKeyPacked[4] = { 0, 0, 0, 0 };\n" - " {\n" - " sKeyPacked[0] |= 1<<(8*b.x);\n" - " sKeyPacked[1] |= 1<<(8*b.y);\n" - " sKeyPacked[2] |= 1<<(8*b.z);\n" - " sKeyPacked[3] |= 1<<(8*b.w);\n" - " key4 = sKeyPacked[0] + sKeyPacked[1] + sKeyPacked[2] + sKeyPacked[3];\n" - " }\n" - " u32 rankPacked;\n" - " u32 sumPacked;\n" - " {\n" - " rankPacked = localPrefixSum( key4, lIdx, &sumPacked, ldsSortData, WG_SIZE );\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " u32 newOffset[4] = { 0,0,0,0 };\n" - " {\n" - " u32 sumScanned = bit8Scan( sumPacked );\n" - " u32 scannedKeys[4];\n" - " scannedKeys[0] = 1<<(8*b.x);\n" - " scannedKeys[1] = 1<<(8*b.y);\n" - " scannedKeys[2] = 1<<(8*b.z);\n" - " scannedKeys[3] = 1<<(8*b.w);\n" - " { // 4 scans at once\n" - " u32 sum4 = 0;\n" - " for(int ie=0; ie<4; ie++)\n" - " {\n" - " u32 tmp = scannedKeys[ie];\n" - " scannedKeys[ie] = sum4;\n" - " sum4 += tmp;\n" - " }\n" - " }\n" - " {\n" - " u32 sumPlusRank = sumScanned + rankPacked;\n" - " { u32 ie = b.x;\n" - " scannedKeys[0] += sumPlusRank;\n" - " newOffset[0] = unpack4Key( scannedKeys[0], ie );\n" - " }\n" - " { u32 ie = b.y;\n" - " scannedKeys[1] += sumPlusRank;\n" - " newOffset[1] = unpack4Key( scannedKeys[1], ie );\n" - " }\n" - " { u32 ie = b.z;\n" - " scannedKeys[2] += sumPlusRank;\n" - " newOffset[2] = unpack4Key( scannedKeys[2], ie );\n" - " }\n" - " { u32 ie = b.w;\n" - " scannedKeys[3] += sumPlusRank;\n" - " newOffset[3] = unpack4Key( scannedKeys[3], ie );\n" - " }\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " {\n" - " ldsSortData[newOffset[0]] = sortData[0];\n" - " ldsSortData[newOffset[1]] = sortData[1];\n" - " ldsSortData[newOffset[2]] = sortData[2];\n" - " ldsSortData[newOffset[3]] = sortData[3];\n" - " ldsSortVal[newOffset[0]] = sortVal[0];\n" - " ldsSortVal[newOffset[1]] = sortVal[1];\n" - " ldsSortVal[newOffset[2]] = sortVal[2];\n" - " ldsSortVal[newOffset[3]] = sortVal[3];\n" - " GROUP_LDS_BARRIER;\n" - " u32 dstAddr = 4*lIdx;\n" - " sortData[0] = ldsSortData[dstAddr+0];\n" - " sortData[1] = ldsSortData[dstAddr+1];\n" - " sortData[2] = ldsSortData[dstAddr+2];\n" - " sortData[3] = ldsSortData[dstAddr+3];\n" - " sortVal[0] = ldsSortVal[dstAddr+0];\n" - " sortVal[1] = ldsSortVal[dstAddr+1];\n" - " sortVal[2] = ldsSortVal[dstAddr+2];\n" - " sortVal[3] = ldsSortVal[dstAddr+3];\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterSortDataKernel( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n" - "{\n" - " __local int ldsSortData[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" - " __local int ldsSortVal[WG_SIZE*ELEMENTS_PER_WORK_ITEM+16];\n" - " __local u32 localHistogramToCarry[NUM_BUCKET];\n" - " __local u32 localHistogram[NUM_BUCKET*2];\n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 lIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int startBit = cb.m_startBit;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " if( lIdx < (NUM_BUCKET) )\n" - " {\n" - " localHistogramToCarry[lIdx] = rHistogram[lIdx*nWGs + wgIdx];\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " int nBlocks = n/blockSize - nBlocksPerWG*wgIdx;\n" - " int addr = blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++, addr+=blockSize)\n" - " {\n" - " u32 myHistogram = 0;\n" - " int sortData[ELEMENTS_PER_WORK_ITEM];\n" - " int sortVal[ELEMENTS_PER_WORK_ITEM];\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - "#if defined(CHECK_BOUNDARY)\n" - " {\n" - " sortData[i] = ( addr+i < n )? gSrc[ addr+i ].m_key : 0xffffffff;\n" - " sortVal[i] = ( addr+i < n )? gSrc[ addr+i ].m_value : 0xffffffff;\n" - " }\n" - "#else\n" - " {\n" - " sortData[i] = gSrc[ addr+i ].m_key;\n" - " sortVal[i] = gSrc[ addr+i ].m_value;\n" - " }\n" - "#endif\n" - " sort4Bits1KeyValue(sortData, sortVal, startBit, lIdx, ldsSortData, ldsSortVal);\n" - " u32 keys[ELEMENTS_PER_WORK_ITEM];\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - " keys[i] = (sortData[i]>>startBit) & 0xf;\n" - " { // create histogram\n" - " u32 setIdx = lIdx/16;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[lIdx] = 0;\n" - " }\n" - " ldsSortData[lIdx] = 0;\n" - " GROUP_LDS_BARRIER;\n" - " for(int i=0; i<ELEMENTS_PER_WORK_ITEM; i++)\n" - "#if defined(CHECK_BOUNDARY)\n" - " if( addr+i < n )\n" - "#endif\n" - "#if defined(NV_GPU)\n" - " SET_HISTOGRAM( setIdx, keys[i] )++;\n" - "#else\n" - " AtomInc( SET_HISTOGRAM( setIdx, keys[i] ) );\n" - "#endif\n" - " \n" - " GROUP_LDS_BARRIER;\n" - " \n" - " uint hIdx = NUM_BUCKET+lIdx;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " u32 sum = 0;\n" - " for(int i=0; i<WG_SIZE/16; i++)\n" - " {\n" - " sum += SET_HISTOGRAM( i, lIdx );\n" - " }\n" - " myHistogram = sum;\n" - " localHistogram[hIdx] = sum;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - "#if defined(USE_2LEVEL_REDUCE)\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[hIdx] = localHistogram[hIdx-1];\n" - " GROUP_MEM_FENCE;\n" - " u32 u0, u1, u2;\n" - " u0 = localHistogram[hIdx-3];\n" - " u1 = localHistogram[hIdx-2];\n" - " u2 = localHistogram[hIdx-1];\n" - " AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" - " GROUP_MEM_FENCE;\n" - " u0 = localHistogram[hIdx-12];\n" - " u1 = localHistogram[hIdx-8];\n" - " u2 = localHistogram[hIdx-4];\n" - " AtomAdd( localHistogram[hIdx], u0 + u1 + u2 );\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#else\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogram[hIdx] = localHistogram[hIdx-1];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-1];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-2];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-4];\n" - " GROUP_MEM_FENCE;\n" - " localHistogram[hIdx] += localHistogram[hIdx-8];\n" - " GROUP_MEM_FENCE;\n" - " }\n" - "#endif\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " {\n" - " for(int ie=0; ie<ELEMENTS_PER_WORK_ITEM; ie++)\n" - " {\n" - " int dataIdx = ELEMENTS_PER_WORK_ITEM*lIdx+ie;\n" - " int binIdx = keys[ie];\n" - " int groupOffset = localHistogramToCarry[binIdx];\n" - " int myIdx = dataIdx - localHistogram[NUM_BUCKET+binIdx];\n" - "#if defined(CHECK_BOUNDARY)\n" - " if( addr+ie < n )\n" - " {\n" - " if ((groupOffset + myIdx)<n)\n" - " {\n" - " if (sortData[ie]==sortVal[ie])\n" - " {\n" - " \n" - " SortDataCL tmp;\n" - " tmp.m_key = sortData[ie];\n" - " tmp.m_value = sortVal[ie];\n" - " if (tmp.m_key == tmp.m_value)\n" - " gDst[groupOffset + myIdx ] = tmp;\n" - " }\n" - " \n" - " }\n" - " }\n" - "#else\n" - " if ((groupOffset + myIdx)<n)\n" - " {\n" - " gDst[ groupOffset + myIdx ].m_key = sortData[ie];\n" - " gDst[ groupOffset + myIdx ].m_value = sortVal[ie];\n" - " }\n" - "#endif\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " if( lIdx < NUM_BUCKET )\n" - " {\n" - " localHistogramToCarry[lIdx] += myHistogram;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterSortDataKernelSerial( __global const SortDataCL* restrict gSrc, __global const u32* rHistogram, __global SortDataCL* restrict gDst, int4 cb)\n" - "{\n" - " \n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 realLocalIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int startBit = cb.m_startBit;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " int counter[NUM_BUCKET];\n" - " \n" - " if (realLocalIdx>0)\n" - " return;\n" - " \n" - " for (int c=0;c<NUM_BUCKET;c++)\n" - " counter[c]=0;\n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " \n" - " int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" - " for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++)\n" - " {\n" - " for (int lIdx=0;lIdx<WG_SIZE;lIdx++)\n" - " {\n" - " int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " \n" - " for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++)\n" - " {\n" - " int i = addr2+j;\n" - " if( i < n )\n" - " {\n" - " int tableIdx;\n" - " tableIdx = (gSrc[i].m_key>>startBit) & 0xf;//0xf = NUM_TABLES-1\n" - " gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i];\n" - " counter[tableIdx] ++;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SortAndScatterKernelSerial( __global const u32* restrict gSrc, __global const u32* rHistogram, __global u32* restrict gDst, int4 cb )\n" - "{\n" - " \n" - " u32 gIdx = GET_GLOBAL_IDX;\n" - " u32 realLocalIdx = GET_LOCAL_IDX;\n" - " u32 wgIdx = GET_GROUP_IDX;\n" - " u32 wgSize = GET_GROUP_SIZE;\n" - " const int startBit = cb.m_startBit;\n" - " const int n = cb.m_n;\n" - " const int nWGs = cb.m_nWGs;\n" - " const int nBlocksPerWG = cb.m_nBlocksPerWG;\n" - " int counter[NUM_BUCKET];\n" - " \n" - " if (realLocalIdx>0)\n" - " return;\n" - " \n" - " for (int c=0;c<NUM_BUCKET;c++)\n" - " counter[c]=0;\n" - " const int blockSize = ELEMENTS_PER_WORK_ITEM*WG_SIZE;\n" - " \n" - " int nBlocks = (n)/blockSize - nBlocksPerWG*wgIdx;\n" - " for(int iblock=0; iblock<min(nBlocksPerWG, nBlocks); iblock++)\n" - " {\n" - " for (int lIdx=0;lIdx<WG_SIZE;lIdx++)\n" - " {\n" - " int addr2 = iblock*blockSize + blockSize*nBlocksPerWG*wgIdx + ELEMENTS_PER_WORK_ITEM*lIdx;\n" - " \n" - " for(int j=0; j<ELEMENTS_PER_WORK_ITEM; j++)\n" - " {\n" - " int i = addr2+j;\n" - " if( i < n )\n" - " {\n" - " int tableIdx;\n" - " tableIdx = (gSrc[i]>>startBit) & 0xf;//0xf = NUM_TABLES-1\n" - " gDst[rHistogram[tableIdx*nWGs+wgIdx] + counter[tableIdx]] = gSrc[i];\n" - " counter[tableIdx] ++;\n" - " }\n" - " }\n" - " }\n" - " }\n" - " \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp deleted file mode 100644 index 6571f30548..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp +++ /dev/null @@ -1,374 +0,0 @@ - -#include "b3GpuRaycast.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h" - -#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" - -#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" - -struct b3GpuRaycastInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_q; - cl_kernel m_raytraceKernel; - cl_kernel m_raytracePairsKernel; - cl_kernel m_findRayRigidPairIndexRanges; - - b3GpuParallelLinearBvh* m_plbvh; - b3RadixSort32CL* m_radixSorter; - b3FillCL* m_fill; - - //1 element per ray - b3OpenCLArray<b3RayInfo>* m_gpuRays; - b3OpenCLArray<b3RayHit>* m_gpuHitResults; - b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay; - b3OpenCLArray<int>* m_numRayRigidPairsPerRay; - - //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB - b3OpenCLArray<int>* m_gpuNumRayRigidPairs; - b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index - - int m_test; -}; - -b3GpuRaycast::b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q) -{ - m_data = new b3GpuRaycastInternalData; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_q = q; - m_data->m_raytraceKernel = 0; - m_data->m_raytracePairsKernel = 0; - m_data->m_findRayRigidPairIndexRanges = 0; - - m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q); - m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q); - m_data->m_fill = new b3FillCL(ctx, device, q); - - m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q); - m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q); - m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q); - m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q); - m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q); - m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q); - - { - cl_int errNum = 0; - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, &errNum, "", B3_RAYCAST_PATH); - b3Assert(errNum == CL_SUCCESS); - m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastPairsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "findRayRigidPairIndexRanges", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - clReleaseProgram(prog); - } -} - -b3GpuRaycast::~b3GpuRaycast() -{ - clReleaseKernel(m_data->m_raytraceKernel); - clReleaseKernel(m_data->m_raytracePairsKernel); - clReleaseKernel(m_data->m_findRayRigidPairIndexRanges); - - delete m_data->m_plbvh; - delete m_data->m_radixSorter; - delete m_data->m_fill; - - delete m_data->m_gpuRays; - delete m_data->m_gpuHitResults; - delete m_data->m_firstRayRigidPairIndexPerRay; - delete m_data->m_numRayRigidPairsPerRay; - delete m_data->m_gpuNumRayRigidPairs; - delete m_data->m_gpuRayRigidPairs; - - delete m_data; -} - -bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction) -{ - b3Vector3 rs = rayFrom - spherePos; - b3Vector3 rayDir = rayTo - rayFrom; - - float A = b3Dot(rayDir, rayDir); - float B = b3Dot(rs, rayDir); - float C = b3Dot(rs, rs) - (radius * radius); - - float D = B * B - A * C; - - if (D > 0.0) - { - float t = (-B - sqrt(D)) / A; - - if ((t >= 0.0f) && (t < hitFraction)) - { - hitFraction = t; - return true; - } - } - return false; -} - -bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly, - const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal) -{ - float exitFraction = hitFraction; - float enterFraction = -0.1f; - b3Vector3 curHitNormal = b3MakeVector3(0, 0, 0); - for (int i = 0; i < poly.m_numFaces; i++) - { - const b3GpuFace& face = faces[poly.m_faceOffset + i]; - float fromPlaneDist = b3Dot(rayFromLocal, face.m_plane) + face.m_plane.w; - float toPlaneDist = b3Dot(rayToLocal, face.m_plane) + face.m_plane.w; - if (fromPlaneDist < 0.f) - { - if (toPlaneDist >= 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist); - if (exitFraction > fraction) - { - exitFraction = fraction; - } - } - } - else - { - if (toPlaneDist < 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist); - if (enterFraction <= fraction) - { - enterFraction = fraction; - curHitNormal = face.m_plane; - curHitNormal.w = 0.f; - } - } - else - { - return false; - } - } - if (exitFraction <= enterFraction) - return false; - } - - if (enterFraction < 0.f) - return false; - - hitFraction = enterFraction; - hitNormal = curHitNormal; - return true; -} - -void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) -{ - // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); - - B3_PROFILE("castRaysHost"); - for (int r = 0; r < rays.size(); r++) - { - b3Vector3 rayFrom = rays[r].m_from; - b3Vector3 rayTo = rays[r].m_to; - float hitFraction = hitResults[r].m_hitFraction; - - int hitBodyIndex = -1; - b3Vector3 hitNormal; - - for (int b = 0; b < numBodies; b++) - { - const b3Vector3& pos = bodies[b].m_pos; - //const b3Quaternion& orn = bodies[b].m_quat; - - switch (collidables[bodies[b].m_collidableIdx].m_shapeType) - { - case SHAPE_SPHERE: - { - b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; - if (sphere_intersect(pos, radius, rayFrom, rayTo, hitFraction)) - { - hitBodyIndex = b; - b3Vector3 hitPoint; - hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction); - hitNormal = (hitPoint - bodies[b].m_pos).normalize(); - } - } - case SHAPE_CONVEX_HULL: - { - b3Transform convexWorldTransform; - convexWorldTransform.setIdentity(); - convexWorldTransform.setOrigin(bodies[b].m_pos); - convexWorldTransform.setRotation(bodies[b].m_quat); - b3Transform convexWorld2Local = convexWorldTransform.inverse(); - - b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); - b3Vector3 rayToLocal = convexWorld2Local(rayTo); - - int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; - const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; - if (rayConvex(rayFromLocal, rayToLocal, poly, narrowphaseData->m_convexFaces, hitFraction, hitNormal)) - { - hitBodyIndex = b; - } - - break; - } - default: - { - static bool once = true; - if (once) - { - once = false; - b3Warning("Raytest: unsupported shape type\n"); - } - } - } - } - if (hitBodyIndex >= 0) - { - hitResults[r].m_hitFraction = hitFraction; - hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction); - hitResults[r].m_hitNormal = hitNormal; - hitResults[r].m_hitBody = hitBodyIndex; - } - } -} -///todo: add some acceleration structure (AABBs, tree etc) -void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) -{ - //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); - - B3_PROFILE("castRaysGPU"); - - { - B3_PROFILE("raycast copyFromHost"); - m_data->m_gpuRays->copyFromHost(rays); - m_data->m_gpuHitResults->copyFromHost(hitResults); - } - - int numRays = hitResults.size(); - { - m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); - m_data->m_numRayRigidPairsPerRay->resize(numRays); - - m_data->m_gpuNumRayRigidPairs->resize(1); - m_data->m_gpuRayRigidPairs->resize(numRays * 16); - } - - //run kernel - const bool USE_BRUTE_FORCE_RAYCAST = false; - if (USE_BRUTE_FORCE_RAYCAST) - { - B3_PROFILE("raycast launch1D"); - - b3LauncherCL launcher(m_data->m_q, m_data->m_raytraceKernel, "m_raytraceKernel"); - int numRays = rays.size(); - launcher.setConst(numRays); - - launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); - launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); - - launcher.setConst(numBodies); - launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); - launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); - - launcher.launch1D(numRays); - clFinish(m_data->m_q); - } - else - { - m_data->m_plbvh->build(broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU()); - - m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); - - int numRayRigidPairs = -1; - m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); - if (numRayRigidPairs > m_data->m_gpuRayRigidPairs->size()) - { - numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); - m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); - } - - m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct - - //Sort ray-rigid pairs by ray index - { - B3_PROFILE("sort ray-rigid pairs"); - m_data->m_radixSorter->execute(*reinterpret_cast<b3OpenCLArray<b3SortData>*>(m_data->m_gpuRayRigidPairs)); - } - - //detect start,count of each ray pair - { - B3_PROFILE("detect ray-rigid pair index ranges"); - - { - B3_PROFILE("reset ray-rigid pair index ranges"); - - m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index - m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); - clFinish(m_data->m_q); - } - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL()), - - b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()), - b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numRayRigidPairs); - - launcher.launch1D(numRayRigidPairs); - clFinish(m_data->m_q); - } - - { - B3_PROFILE("ray-rigid intersection"); - - b3BufferInfoCL bufferInfo[] = - { - b3BufferInfoCL(m_data->m_gpuRays->getBufferCL()), - b3BufferInfoCL(m_data->m_gpuHitResults->getBufferCL()), - b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()), - b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL()), - - b3BufferInfoCL(narrowphaseData->m_bodyBufferGPU->getBufferCL()), - b3BufferInfoCL(narrowphaseData->m_collidablesGPU->getBufferCL()), - b3BufferInfoCL(narrowphaseData->m_convexFacesGPU->getBufferCL()), - b3BufferInfoCL(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()), - - b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); - launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(numRays); - - launcher.launch1D(numRays); - clFinish(m_data->m_q); - } - } - - //copy results - { - B3_PROFILE("raycast copyToHost"); - m_data->m_gpuHitResults->copyToHost(hitResults); - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h deleted file mode 100644 index f1f6ffd402..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef B3_GPU_RAYCAST_H -#define B3_GPU_RAYCAST_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -class b3GpuRaycast -{ -protected: - struct b3GpuRaycastInternalData* m_data; - -public: - b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q); - virtual ~b3GpuRaycast(); - - void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData); - - void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, - int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, - const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase); -}; - -#endif //B3_GPU_RAYCAST_H diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl deleted file mode 100644 index e72d96876b..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl +++ /dev/null @@ -1,439 +0,0 @@ - -#define SHAPE_CONVEX_HULL 3 -#define SHAPE_PLANE 4 -#define SHAPE_CONCAVE_TRIMESH 5 -#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 -#define SHAPE_SPHERE 7 - - -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; - -typedef struct -{ - float m_hitFraction; - int m_hitResult0; - int m_hitResult1; - int m_hitResult2; - float4 m_hitPoint; - float4 m_hitNormal; -} b3RayHit; - -typedef struct -{ - float4 m_pos; - float4 m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_collidableIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct Collidable -{ - union { - int m_numChildShapes; - int m_bvhIndex; - }; - float m_radius; - int m_shapeType; - int m_shapeIndex; -} Collidable; - - -typedef struct -{ - float4 m_localCenter; - float4 m_extents; - float4 mC; - float4 mE; - - float m_radius; - int m_faceOffset; - int m_numFaces; - int m_numVertices; - - int m_vertexOffset; - int m_uniqueEdgesOffset; - int m_numUniqueEdges; - int m_unused; - -} ConvexPolyhedronCL; - -typedef struct -{ - float4 m_plane; - int m_indexOffset; - int m_numIndices; -} b3GpuFace; - - - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline - Quaternion qtMul(Quaternion a, Quaternion b); - -__inline - Quaternion qtNormalize(Quaternion in); - - -__inline - Quaternion qtInvert(Quaternion q); - - -__inline - float dot3F4(float4 a, float4 b) -{ - float4 a1 = (float4)(a.xyz,0.f); - float4 b1 = (float4)(b.xyz,0.f); - return dot(a1, b1); -} - - -__inline - Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross( a, b ); - ans += a.w*b+b.w*a; - // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline - Quaternion qtNormalize(Quaternion in) -{ - return fast_normalize(in); - // in /= length( in ); - // return in; -} -__inline - float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(q,vcpy); - out = qtMul(out,qInv); - return out; -} - -__inline - Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline - float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - -void trInverse(float4 translationIn, Quaternion orientationIn, - float4* translationOut, Quaternion* orientationOut) -{ - *orientationOut = qtInvert(orientationIn); - *translationOut = qtRotate(*orientationOut, -translationIn); -} - - - - - -bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset, - __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal) -{ - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - bool result = true; - - float exitFraction = hitFraction[0]; - float enterFraction = -0.3f; - float4 curHitNormal = (float4)(0,0,0,0); - for (int i=0;i<numFaces && result;i++) - { - b3GpuFace face = faces[faceOffset+i]; - float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w; - float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w; - if (fromPlaneDist<0.f) - { - if (toPlaneDist >= 0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (exitFraction>fraction) - { - exitFraction = fraction; - } - } - } else - { - if (toPlaneDist<0.f) - { - float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); - if (enterFraction <= fraction) - { - enterFraction = fraction; - curHitNormal = face.m_plane; - curHitNormal.w = 0.f; - } - } else - { - result = false; - } - } - if (exitFraction <= enterFraction) - result = false; - } - - if (enterFraction < 0.f) - { - result = false; - } - - if (result) - { - hitFraction[0] = enterFraction; - hitNormal[0] = curHitNormal; - } - return result; -} - - - - - - -bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction) -{ - float4 rs = rayFrom - spherePos; - rs.w = 0.f; - float4 rayDir = rayTo-rayFrom; - rayDir.w = 0.f; - float A = dot(rayDir,rayDir); - float B = dot(rs, rayDir); - float C = dot(rs, rs) - (radius * radius); - - float D = B * B - A*C; - - if (D > 0.0f) - { - float t = (-B - sqrt(D))/A; - - if ( (t >= 0.0f) && (t < (*hitFraction)) ) - { - *hitFraction = t; - return true; - } - } - return false; -} - -float4 setInterpolate3(float4 from, float4 to, float t) -{ - float s = 1.0f - t; - float4 result; - result = s * from + t * to; - result.w = 0.f; - return result; -} - -__kernel void rayCastKernel( - int numRays, - const __global b3RayInfo* rays, - __global b3RayHit* hitResults, - const int numBodies, - __global Body* bodies, - __global Collidable* collidables, - __global const b3GpuFace* faces, - __global const ConvexPolyhedronCL* convexShapes ) -{ - - int i = get_global_id(0); - if (i>=numRays) - return; - - hitResults[i].m_hitFraction = 1.f; - - float4 rayFrom = rays[i].m_from; - float4 rayTo = rays[i].m_to; - float hitFraction = 1.f; - float4 hitPoint; - float4 hitNormal; - int hitBodyIndex= -1; - - int cachedCollidableIndex = -1; - Collidable cachedCollidable; - - for (int b=0;b<numBodies;b++) - { - if (hitResults[i].m_hitResult2==b) - continue; - Body body = bodies[b]; - float4 pos = body.m_pos; - float4 orn = body.m_quat; - if (cachedCollidableIndex != body.m_collidableIdx) - { - cachedCollidableIndex = body.m_collidableIdx; - cachedCollidable = collidables[cachedCollidableIndex]; - } - if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL) - { - - float4 invPos = (float4)(0,0,0,0); - float4 invOrn = (float4)(0,0,0,0); - float4 rayFromLocal = (float4)(0,0,0,0); - float4 rayToLocal = (float4)(0,0,0,0); - invOrn = qtInvert(orn); - invPos = qtRotate(invOrn, -pos); - rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; - rayToLocal = qtRotate( invOrn, rayTo) + invPos; - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces; - int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset; - if (numFaces) - { - if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) - { - hitBodyIndex = b; - - } - } - } - if (cachedCollidable.m_shapeType == SHAPE_SPHERE) - { - float radius = cachedCollidable.m_radius; - - if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) - { - hitBodyIndex = b; - hitNormal = (float4) (hitPoint-bodies[b].m_pos); - } - } - } - - if (hitBodyIndex>=0) - { - hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction); - hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = hitPoint; - hitResults[i].m_hitNormal = normalize(hitNormal); - hitResults[i].m_hitResult0 = hitBodyIndex; - } - -} - - -__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, - __global int* out_firstRayRigidPairIndexPerRay, - __global int* out_numRayRigidPairsPerRay, - int numRayRigidPairs) -{ - int rayRigidPairIndex = get_global_id(0); - if (rayRigidPairIndex >= numRayRigidPairs) return; - - int rayIndex = rayRigidPairs[rayRigidPairIndex].x; - - atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex); - atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]); -} - -__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, - __global b3RayHit* hitResults, - __global int* firstRayRigidPairIndexPerRay, - __global int* numRayRigidPairsPerRay, - - __global Body* bodies, - __global Collidable* collidables, - __global const b3GpuFace* faces, - __global const ConvexPolyhedronCL* convexShapes, - - __global int2* rayRigidPairs, - int numRays) -{ - int i = get_global_id(0); - if (i >= numRays) return; - - float4 rayFrom = rays[i].m_from; - float4 rayTo = rays[i].m_to; - - hitResults[i].m_hitFraction = 1.f; - - float hitFraction = 1.f; - float4 hitPoint; - float4 hitNormal; - int hitBodyIndex = -1; - - // - for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair) - { - int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i]; - int b = rayRigidPairs[rayRigidPairIndex].y; - - if (hitResults[i].m_hitResult2 == b) continue; - - Body body = bodies[b]; - Collidable rigidCollidable = collidables[body.m_collidableIdx]; - - float4 pos = body.m_pos; - float4 orn = body.m_quat; - - if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL) - { - float4 invPos = (float4)(0,0,0,0); - float4 invOrn = (float4)(0,0,0,0); - float4 rayFromLocal = (float4)(0,0,0,0); - float4 rayToLocal = (float4)(0,0,0,0); - invOrn = qtInvert(orn); - invPos = qtRotate(invOrn, -pos); - rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; - rayToLocal = qtRotate( invOrn, rayTo) + invPos; - rayFromLocal.w = 0.f; - rayToLocal.w = 0.f; - int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces; - int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset; - - if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) - { - hitBodyIndex = b; - hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); - } - } - - if (rigidCollidable.m_shapeType == SHAPE_SPHERE) - { - float radius = rigidCollidable.m_radius; - - if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) - { - hitBodyIndex = b; - hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); - hitNormal = (float4) (hitPoint - bodies[b].m_pos); - } - } - } - - if (hitBodyIndex >= 0) - { - hitResults[i].m_hitFraction = hitFraction; - hitResults[i].m_hitPoint = hitPoint; - hitResults[i].m_hitNormal = normalize(hitNormal); - hitResults[i].m_hitResult0 = hitBodyIndex; - } - -} diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h deleted file mode 100644 index 94f6a8eb9f..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h +++ /dev/null @@ -1,380 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* rayCastKernelCL = - "#define SHAPE_CONVEX_HULL 3\n" - "#define SHAPE_PLANE 4\n" - "#define SHAPE_CONCAVE_TRIMESH 5\n" - "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" - "#define SHAPE_SPHERE 7\n" - "typedef struct\n" - "{\n" - " float4 m_from;\n" - " float4 m_to;\n" - "} b3RayInfo;\n" - "typedef struct\n" - "{\n" - " float m_hitFraction;\n" - " int m_hitResult0;\n" - " int m_hitResult1;\n" - " int m_hitResult2;\n" - " float4 m_hitPoint;\n" - " float4 m_hitNormal;\n" - "} b3RayHit;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " float4 m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " unsigned int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " float m_radius;\n" - " int m_shapeType;\n" - " int m_shapeIndex;\n" - "} Collidable;\n" - "typedef struct \n" - "{\n" - " float4 m_localCenter;\n" - " float4 m_extents;\n" - " float4 mC;\n" - " float4 mE;\n" - " float m_radius;\n" - " int m_faceOffset;\n" - " int m_numFaces;\n" - " int m_numVertices;\n" - " int m_vertexOffset;\n" - " int m_uniqueEdgesOffset;\n" - " int m_numUniqueEdges;\n" - " int m_unused;\n" - "} ConvexPolyhedronCL;\n" - "typedef struct\n" - "{\n" - " float4 m_plane;\n" - " int m_indexOffset;\n" - " int m_numIndices;\n" - "} b3GpuFace;\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - " Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - " Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - " Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - " float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = (float4)(a.xyz,0.f);\n" - " float4 b1 = (float4)(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - " Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross( a, b );\n" - " ans += a.w*b+b.w*a;\n" - " // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - " Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fast_normalize(in);\n" - " // in /= length( in );\n" - " // return in;\n" - "}\n" - "__inline\n" - " float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(q,vcpy);\n" - " out = qtMul(out,qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - " Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - " float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "void trInverse(float4 translationIn, Quaternion orientationIn,\n" - " float4* translationOut, Quaternion* orientationOut)\n" - "{\n" - " *orientationOut = qtInvert(orientationIn);\n" - " *translationOut = qtRotate(*orientationOut, -translationIn);\n" - "}\n" - "bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n" - " __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n" - "{\n" - " rayFromLocal.w = 0.f;\n" - " rayToLocal.w = 0.f;\n" - " bool result = true;\n" - " float exitFraction = hitFraction[0];\n" - " float enterFraction = -0.3f;\n" - " float4 curHitNormal = (float4)(0,0,0,0);\n" - " for (int i=0;i<numFaces && result;i++)\n" - " {\n" - " b3GpuFace face = faces[faceOffset+i];\n" - " float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n" - " float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n" - " if (fromPlaneDist<0.f)\n" - " {\n" - " if (toPlaneDist >= 0.f)\n" - " {\n" - " float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" - " if (exitFraction>fraction)\n" - " {\n" - " exitFraction = fraction;\n" - " }\n" - " } \n" - " } else\n" - " {\n" - " if (toPlaneDist<0.f)\n" - " {\n" - " float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" - " if (enterFraction <= fraction)\n" - " {\n" - " enterFraction = fraction;\n" - " curHitNormal = face.m_plane;\n" - " curHitNormal.w = 0.f;\n" - " }\n" - " } else\n" - " {\n" - " result = false;\n" - " }\n" - " }\n" - " if (exitFraction <= enterFraction)\n" - " result = false;\n" - " }\n" - " if (enterFraction < 0.f)\n" - " {\n" - " result = false;\n" - " }\n" - " if (result)\n" - " { \n" - " hitFraction[0] = enterFraction;\n" - " hitNormal[0] = curHitNormal;\n" - " }\n" - " return result;\n" - "}\n" - "bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n" - "{\n" - " float4 rs = rayFrom - spherePos;\n" - " rs.w = 0.f;\n" - " float4 rayDir = rayTo-rayFrom;\n" - " rayDir.w = 0.f;\n" - " float A = dot(rayDir,rayDir);\n" - " float B = dot(rs, rayDir);\n" - " float C = dot(rs, rs) - (radius * radius);\n" - " float D = B * B - A*C;\n" - " if (D > 0.0f)\n" - " {\n" - " float t = (-B - sqrt(D))/A;\n" - " if ( (t >= 0.0f) && (t < (*hitFraction)) )\n" - " {\n" - " *hitFraction = t;\n" - " return true;\n" - " }\n" - " }\n" - " return false;\n" - "}\n" - "float4 setInterpolate3(float4 from, float4 to, float t)\n" - "{\n" - " float s = 1.0f - t;\n" - " float4 result;\n" - " result = s * from + t * to;\n" - " result.w = 0.f; \n" - " return result; \n" - "}\n" - "__kernel void rayCastKernel( \n" - " int numRays, \n" - " const __global b3RayInfo* rays, \n" - " __global b3RayHit* hitResults, \n" - " const int numBodies, \n" - " __global Body* bodies,\n" - " __global Collidable* collidables,\n" - " __global const b3GpuFace* faces,\n" - " __global const ConvexPolyhedronCL* convexShapes )\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numRays)\n" - " return;\n" - " hitResults[i].m_hitFraction = 1.f;\n" - " float4 rayFrom = rays[i].m_from;\n" - " float4 rayTo = rays[i].m_to;\n" - " float hitFraction = 1.f;\n" - " float4 hitPoint;\n" - " float4 hitNormal;\n" - " int hitBodyIndex= -1;\n" - " int cachedCollidableIndex = -1;\n" - " Collidable cachedCollidable;\n" - " for (int b=0;b<numBodies;b++)\n" - " {\n" - " if (hitResults[i].m_hitResult2==b)\n" - " continue;\n" - " Body body = bodies[b];\n" - " float4 pos = body.m_pos;\n" - " float4 orn = body.m_quat;\n" - " if (cachedCollidableIndex != body.m_collidableIdx)\n" - " {\n" - " cachedCollidableIndex = body.m_collidableIdx;\n" - " cachedCollidable = collidables[cachedCollidableIndex];\n" - " }\n" - " if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" - " {\n" - " float4 invPos = (float4)(0,0,0,0);\n" - " float4 invOrn = (float4)(0,0,0,0);\n" - " float4 rayFromLocal = (float4)(0,0,0,0);\n" - " float4 rayToLocal = (float4)(0,0,0,0);\n" - " invOrn = qtInvert(orn);\n" - " invPos = qtRotate(invOrn, -pos);\n" - " rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" - " rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" - " rayFromLocal.w = 0.f;\n" - " rayToLocal.w = 0.f;\n" - " int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n" - " int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n" - " if (numFaces)\n" - " {\n" - " if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" - " {\n" - " hitBodyIndex = b;\n" - " \n" - " }\n" - " }\n" - " }\n" - " if (cachedCollidable.m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " float radius = cachedCollidable.m_radius;\n" - " \n" - " if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" - " {\n" - " hitBodyIndex = b;\n" - " hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n" - " }\n" - " }\n" - " }\n" - " if (hitBodyIndex>=0)\n" - " {\n" - " hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n" - " hitResults[i].m_hitFraction = hitFraction;\n" - " hitResults[i].m_hitPoint = hitPoint;\n" - " hitResults[i].m_hitNormal = normalize(hitNormal);\n" - " hitResults[i].m_hitResult0 = hitBodyIndex;\n" - " }\n" - "}\n" - "__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n" - " __global int* out_firstRayRigidPairIndexPerRay,\n" - " __global int* out_numRayRigidPairsPerRay,\n" - " int numRayRigidPairs)\n" - "{\n" - " int rayRigidPairIndex = get_global_id(0);\n" - " if (rayRigidPairIndex >= numRayRigidPairs) return;\n" - " \n" - " int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n" - " \n" - " atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n" - " atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n" - "}\n" - "__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n" - " __global b3RayHit* hitResults, \n" - " __global int* firstRayRigidPairIndexPerRay,\n" - " __global int* numRayRigidPairsPerRay,\n" - " \n" - " __global Body* bodies,\n" - " __global Collidable* collidables,\n" - " __global const b3GpuFace* faces,\n" - " __global const ConvexPolyhedronCL* convexShapes,\n" - " \n" - " __global int2* rayRigidPairs,\n" - " int numRays)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i >= numRays) return;\n" - " \n" - " float4 rayFrom = rays[i].m_from;\n" - " float4 rayTo = rays[i].m_to;\n" - " \n" - " hitResults[i].m_hitFraction = 1.f;\n" - " \n" - " float hitFraction = 1.f;\n" - " float4 hitPoint;\n" - " float4 hitNormal;\n" - " int hitBodyIndex = -1;\n" - " \n" - " //\n" - " for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n" - " {\n" - " int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n" - " int b = rayRigidPairs[rayRigidPairIndex].y;\n" - " \n" - " if (hitResults[i].m_hitResult2 == b) continue;\n" - " \n" - " Body body = bodies[b];\n" - " Collidable rigidCollidable = collidables[body.m_collidableIdx];\n" - " \n" - " float4 pos = body.m_pos;\n" - " float4 orn = body.m_quat;\n" - " \n" - " if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" - " {\n" - " float4 invPos = (float4)(0,0,0,0);\n" - " float4 invOrn = (float4)(0,0,0,0);\n" - " float4 rayFromLocal = (float4)(0,0,0,0);\n" - " float4 rayToLocal = (float4)(0,0,0,0);\n" - " invOrn = qtInvert(orn);\n" - " invPos = qtRotate(invOrn, -pos);\n" - " rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" - " rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" - " rayFromLocal.w = 0.f;\n" - " rayToLocal.w = 0.f;\n" - " int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n" - " int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n" - " \n" - " if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" - " {\n" - " hitBodyIndex = b;\n" - " hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" - " }\n" - " }\n" - " \n" - " if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n" - " {\n" - " float radius = rigidCollidable.m_radius;\n" - " \n" - " if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" - " {\n" - " hitBodyIndex = b;\n" - " hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" - " hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n" - " }\n" - " }\n" - " }\n" - " \n" - " if (hitBodyIndex >= 0)\n" - " {\n" - " hitResults[i].m_hitFraction = hitFraction;\n" - " hitResults[i].m_hitPoint = hitPoint;\n" - " hitResults[i].m_hitNormal = normalize(hitNormal);\n" - " hitResults[i].m_hitResult0 = hitBodyIndex;\n" - " }\n" - " \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h deleted file mode 100644 index 89c0142ab3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuConstraint4.h +++ /dev/null @@ -1,17 +0,0 @@ - -#ifndef B3_CONSTRAINT4_h -#define B3_CONSTRAINT4_h -#include "Bullet3Common/b3Vector3.h" - -#include "Bullet3Dynamics/shared/b3ContactConstraint4.h" - -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuConstraint4 : public b3ContactConstraint4 -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - inline void setFrictionCoeff(float value) { m_linear[3] = value; } - inline float getFrictionCoeff() const { return m_linear[3]; } -}; - -#endif //B3_CONSTRAINT4_h diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp deleted file mode 100644 index a271090af4..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.cpp +++ /dev/null @@ -1,134 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3GpuGenericConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include <new> -#include "Bullet3Common/b3Transform.h" - -void b3GpuGenericConstraint::getInfo1(unsigned int* info, const b3RigidBodyData* bodies) -{ - switch (m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - *info = 3; - break; - }; - default: - { - b3Assert(0); - } - }; -} - -void getInfo2Point2Point(b3GpuGenericConstraint* constraint, b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - b3Transform trA; - trA.setIdentity(); - trA.setOrigin(bodies[constraint->m_rbA].m_pos); - trA.setRotation(bodies[constraint->m_rbA].m_quat); - - b3Transform trB; - trB.setIdentity(); - trB.setOrigin(bodies[constraint->m_rbB].m_pos); - trB.setRotation(bodies[constraint->m_rbB].m_quat); - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip + 1] = 1; - info->m_J1linearAxis[2 * info->rowskip + 2] = 1; - - b3Vector3 a1 = trA.getBasis() * constraint->getPivotInA(); - //b3Vector3 a1a = b3QuatRotate(trA.getRotation(),constraint->getPivotInA()); - - { - b3Vector3* angular0 = (b3Vector3*)(info->m_J1angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J1angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J1angularAxis + 2 * info->rowskip); - b3Vector3 a1neg = -a1; - a1neg.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip + 1] = -1; - info->m_J2linearAxis[2 * info->rowskip + 2] = -1; - } - - b3Vector3 a2 = trB.getBasis() * constraint->getPivotInB(); - - { - // b3Vector3 a2n = -a2; - b3Vector3* angular0 = (b3Vector3*)(info->m_J2angularAxis); - b3Vector3* angular1 = (b3Vector3*)(info->m_J2angularAxis + info->rowskip); - b3Vector3* angular2 = (b3Vector3*)(info->m_J2angularAxis + 2 * info->rowskip); - a2.getSkewSymmetricMatrix(angular0, angular1, angular2); - } - - // set right hand side - // b3Scalar currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - b3Scalar currERP = info->erp; - - b3Scalar k = info->fps * currERP; - int j; - for (j = 0; j < 3; j++) - { - info->m_constraintError[j * info->rowskip] = k * (a2[j] + trB.getOrigin()[j] - a1[j] - trA.getOrigin()[j]); - //printf("info->m_constraintError[%d]=%f\n",j,info->m_constraintError[j]); - } -#if 0 - if(m_flags & B3_P2P_FLAGS_CFM) - { - for (j=0; j<3; j++) - { - info->cfm[j*info->rowskip] = m_cfm; - } - } -#endif - -#if 0 - b3Scalar impulseClamp = m_setting.m_impulseClamp;// - for (j=0; j<3; j++) - { - if (m_setting.m_impulseClamp > 0) - { - info->m_lowerLimit[j*info->rowskip] = -impulseClamp; - info->m_upperLimit[j*info->rowskip] = impulseClamp; - } - } - info->m_damping = m_setting.m_damping; -#endif -} - -void b3GpuGenericConstraint::getInfo2(b3GpuConstraintInfo2* info, const b3RigidBodyData* bodies) -{ - switch (m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - getInfo2Point2Point(this, info, bodies); - break; - }; - default: - { - b3Assert(0); - } - }; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h deleted file mode 100644 index 1f163ba7d5..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h +++ /dev/null @@ -1,128 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_GENERIC_CONSTRAINT_H -#define B3_GPU_GENERIC_CONSTRAINT_H - -#include "Bullet3Common/b3Quaternion.h" -struct b3RigidBodyData; -enum B3_CONSTRAINT_FLAGS -{ - B3_CONSTRAINT_FLAG_ENABLED = 1, -}; - -enum b3GpuGenericConstraintType -{ - B3_GPU_POINT2POINT_CONSTRAINT_TYPE = 3, - B3_GPU_FIXED_CONSTRAINT_TYPE = 4, - // B3_HINGE_CONSTRAINT_TYPE, - // B3_CONETWIST_CONSTRAINT_TYPE, - // B3_D6_CONSTRAINT_TYPE, - // B3_SLIDER_CONSTRAINT_TYPE, - // B3_CONTACT_CONSTRAINT_TYPE, - // B3_D6_SPRING_CONSTRAINT_TYPE, - // B3_GEAR_CONSTRAINT_TYPE, - - B3_GPU_MAX_CONSTRAINT_TYPE -}; - -struct b3GpuConstraintInfo2 -{ - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - b3Scalar fps, erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - b3Scalar *m_J1linearAxis, *m_J1angularAxis, *m_J2linearAxis, *m_J2angularAxis; - - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - b3Scalar *m_constraintError, *cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - b3Scalar *m_lowerLimit, *m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - int* findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - b3Scalar m_damping; -}; - -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuGenericConstraint -{ - int m_constraintType; - int m_rbA; - int m_rbB; - float m_breakingImpulseThreshold; - - b3Vector3 m_pivotInA; - b3Vector3 m_pivotInB; - b3Quaternion m_relTargetAB; - - int m_flags; - int m_uid; - int m_padding[2]; - - int getRigidBodyA() const - { - return m_rbA; - } - int getRigidBodyB() const - { - return m_rbB; - } - - const b3Vector3& getPivotInA() const - { - return m_pivotInA; - } - - const b3Vector3& getPivotInB() const - { - return m_pivotInB; - } - - int isEnabled() const - { - return m_flags & B3_CONSTRAINT_FLAG_ENABLED; - } - - float getBreakingImpulseThreshold() const - { - return m_breakingImpulseThreshold; - } - - ///internal method used by the constraint solver, don't use them directly - void getInfo1(unsigned int* info, const b3RigidBodyData* bodies); - - ///internal method used by the constraint solver, don't use them directly - void getInfo2(b3GpuConstraintInfo2 * info, const b3RigidBodyData* bodies); -}; - -#endif //B3_GPU_GENERIC_CONSTRAINT_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp deleted file mode 100644 index 089fb1f6a6..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.cpp +++ /dev/null @@ -1,1305 +0,0 @@ - -#include "b3GpuJacobiContactSolver.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2 -class b3Vector3; -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3OpenCL/RigidBody/kernels/solverUtils.h" -#include "Bullet3Common/b3Logging.h" -#include "b3GpuConstraint4.h" -#include "Bullet3Common/shared/b3Int2.h" -#include "Bullet3Common/shared/b3Int4.h" -#define SOLVER_UTILS_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl" - -struct b3GpuJacobiSolverInternalData -{ - //btRadixSort32CL* m_sort32; - //btBoundSearchCL* m_search; - b3PrefixScanCL* m_scan; - - b3OpenCLArray<unsigned int>* m_bodyCount; - b3OpenCLArray<b3Int2>* m_contactConstraintOffsets; - b3OpenCLArray<unsigned int>* m_offsetSplitBodies; - - b3OpenCLArray<b3Vector3>* m_deltaLinearVelocities; - b3OpenCLArray<b3Vector3>* m_deltaAngularVelocities; - - b3AlignedObjectArray<b3Vector3> m_deltaLinearVelocitiesCPU; - b3AlignedObjectArray<b3Vector3> m_deltaAngularVelocitiesCPU; - - b3OpenCLArray<b3GpuConstraint4>* m_contactConstraints; - - b3FillCL* m_filler; - - cl_kernel m_countBodiesKernel; - cl_kernel m_contactToConstraintSplitKernel; - cl_kernel m_clearVelocitiesKernel; - cl_kernel m_averageVelocitiesKernel; - cl_kernel m_updateBodyVelocitiesKernel; - cl_kernel m_solveContactKernel; - cl_kernel m_solveFrictionKernel; -}; - -b3GpuJacobiContactSolver::b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : m_context(ctx), - m_device(device), - m_queue(queue) -{ - m_data = new b3GpuJacobiSolverInternalData; - m_data->m_scan = new b3PrefixScanCL(m_context, m_device, m_queue); - m_data->m_bodyCount = new b3OpenCLArray<unsigned int>(m_context, m_queue); - m_data->m_filler = new b3FillCL(m_context, m_device, m_queue); - m_data->m_contactConstraintOffsets = new b3OpenCLArray<b3Int2>(m_context, m_queue); - m_data->m_offsetSplitBodies = new b3OpenCLArray<unsigned int>(m_context, m_queue); - m_data->m_contactConstraints = new b3OpenCLArray<b3GpuConstraint4>(m_context, m_queue); - m_data->m_deltaLinearVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue); - m_data->m_deltaAngularVelocities = new b3OpenCLArray<b3Vector3>(m_context, m_queue); - - cl_int pErrNum; - const char* additionalMacros = ""; - const char* solverUtilsSource = solverUtilsCL; - { - cl_program solverUtilsProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverUtilsSource, &pErrNum, additionalMacros, SOLVER_UTILS_KERNEL_PATH); - b3Assert(solverUtilsProg); - m_data->m_countBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "CountBodiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_countBodiesKernel); - - m_data->m_contactToConstraintSplitKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ContactToConstraintSplitKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_contactToConstraintSplitKernel); - m_data->m_clearVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "ClearVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_clearVelocitiesKernel); - - m_data->m_averageVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "AverageVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_averageVelocitiesKernel); - - m_data->m_updateBodyVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "UpdateBodyVelocitiesKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_updateBodyVelocitiesKernel); - - m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveContactJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_solveContactKernel); - - m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverUtilsSource, "SolveFrictionJacobiKernel", &pErrNum, solverUtilsProg, additionalMacros); - b3Assert(m_data->m_solveFrictionKernel); - } -} - -b3GpuJacobiContactSolver::~b3GpuJacobiContactSolver() -{ - clReleaseKernel(m_data->m_solveContactKernel); - clReleaseKernel(m_data->m_solveFrictionKernel); - clReleaseKernel(m_data->m_countBodiesKernel); - clReleaseKernel(m_data->m_contactToConstraintSplitKernel); - clReleaseKernel(m_data->m_averageVelocitiesKernel); - clReleaseKernel(m_data->m_updateBodyVelocitiesKernel); - clReleaseKernel(m_data->m_clearVelocitiesKernel); - - delete m_data->m_deltaLinearVelocities; - delete m_data->m_deltaAngularVelocities; - delete m_data->m_contactConstraints; - delete m_data->m_offsetSplitBodies; - delete m_data->m_contactConstraintOffsets; - delete m_data->m_bodyCount; - delete m_data->m_filler; - delete m_data->m_scan; - delete m_data; -} - -b3Vector3 make_float4(float v) -{ - return b3MakeVector3(v, v, v); -} - -b3Vector4 make_float4(float x, float y, float z, float w) -{ - return b3MakeVector4(x, y, z, w); -} - -static inline float calcRelVel(const b3Vector3& l0, const b3Vector3& l1, const b3Vector3& a0, const b3Vector3& a1, - const b3Vector3& linVel0, const b3Vector3& angVel0, const b3Vector3& linVel1, const b3Vector3& angVel1) -{ - return b3Dot(l0, linVel0) + b3Dot(a0, angVel0) + b3Dot(l1, linVel1) + b3Dot(a1, angVel1); -} - -static inline void setLinearAndAngular(const b3Vector3& n, const b3Vector3& r0, const b3Vector3& r1, - b3Vector3& linear, b3Vector3& angular0, b3Vector3& angular1) -{ - linear = n; - angular0 = b3Cross(r0, n); - angular1 = -b3Cross(r1, n); -} - -static __inline void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) -{ - for (int ic = 0; ic < 4; ic++) - { - // dont necessary because this makes change to 0 - if (cs.m_jacCoeffInv[ic] == 0.f) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, linear, angular0, angular1); - - float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, - linVelARO + dLinVelA, angVelARO + dAngVelA, linVelBRO + dLinVelB, angVelBRO + dAngVelB) + - cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[ic]); - updated = b3Min(updated, maxRambdaDt[ic]); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - - if (invMassA) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - } - if (invMassB) - { - dLinVelB += linImp1; - dAngVelB += angImp1; - } - } - } -} - -void solveContact3(b3GpuConstraint4* cs, - b3Vector3* posAPtr, b3Vector3* linVelA, b3Vector3* angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - b3Vector3* posBPtr, b3Vector3* linVelB, b3Vector3* angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - b3Vector3* dLinVelA, b3Vector3* dAngVelA, b3Vector3* dLinVelB, b3Vector3* dAngVelB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for (int ic = 0; ic < 4; ic++) - { - if (cs->m_jacCoeffInv[ic] == 0.f) continue; - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs->m_worldPos[ic] - *posAPtr; - b3Vector3 r1 = cs->m_worldPos[ic] - *posBPtr; - setLinearAndAngular(cs->m_linear, r0, r1, linear, angular0, angular1); - - float rambdaDt = calcRelVel(cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA + *dLinVelA, *angVelA + *dAngVelA, *linVelB + *dLinVelB, *angVelB + *dAngVelB) + - cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt); - updated = b3Min(updated, maxRambdaDt); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; - - if (invMassA) - { - *dLinVelA += linImp0; - *dAngVelA += angImp0; - } - if (invMassB) - { - *dLinVelB += linImp1; - *dAngVelB += angImp1; - } - } -} - -static inline void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, const b3Vector3& linVelARO, const b3Vector3& angVelARO, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelBRO, const b3Vector3& angVelBRO, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4], b3Vector3& dLinVelA, b3Vector3& dAngVelA, b3Vector3& dLinVelB, b3Vector3& dAngVelB) -{ - b3Vector3 linVelA = linVelARO + dLinVelA; - b3Vector3 linVelB = linVelBRO + dLinVelB; - b3Vector3 angVelA = angVelARO + dAngVelA; - b3Vector3 angVelB = angVelBRO + dAngVelB; - - if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1(n, tangent[0], tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0] - center; - tangent[0] = cross3(n, r); - tangent[1] = cross3(tangent[0], n); - tangent[0] = normalize3(tangent[0]); - tangent[1] = normalize3(tangent[1]); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for (int i = 0; i < 2; i++) - { - setLinearAndAngular(tangent[i], r0, r1, linear, angular0, angular1); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[i]); - updated = b3Min(updated, maxRambdaDt[i]); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if (invMassA) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - } - if (invMassB) - { - dLinVelB += linImp1; - dAngVelB += angImp1; - } - } - - { // angular damping for point constraint - b3Vector3 ab = (posB - posA).normalized(); - b3Vector3 ac = (center - posA).normalized(); - if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot(n, angVelA); - float angNB = b3Dot(n, angVelB); - - if (invMassA) - dAngVelA -= (angNA * 0.1f) * n; - if (invMassB) - dAngVelB -= (angNB * 0.1f) * n; - } - } -} - -float calcJacCoeff(const b3Vector3& linear0, const b3Vector3& linear1, const b3Vector3& angular0, const b3Vector3& angular1, - float invMass0, const b3Matrix3x3* invInertia0, float invMass1, const b3Matrix3x3* invInertia1, float countA, float countB) -{ - // linear0,1 are normlized - float jmj0 = invMass0; //dot3F4(linear0, linear0)*invMass0; - - float jmj1 = b3Dot(mtMul3(angular0, *invInertia0), angular0); - float jmj2 = invMass1; //dot3F4(linear1, linear1)*invMass1; - float jmj3 = b3Dot(mtMul3(angular1, *invInertia1), angular1); - return -1.f / ((jmj0 + jmj1) * countA + (jmj2 + jmj3) * countB); - // return -1.f/((jmj0+jmj1)+(jmj2+jmj3)); -} - -void setConstraint4(const b3Vector3& posA, const b3Vector3& linVelA, const b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, const b3Vector3& linVelB, const b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - b3Contact4* src, float dt, float positionDrift, float positionConstraintCoeff, float countA, float countB, - b3GpuConstraint4* dstC) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f / dt; - for (int ic = 0; ic < 4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear[3] = 0.7f; //src->getFrictionCoeff() ); - for (int ic = 0; ic < 4; ic++) - { - b3Vector3 r0 = src->m_worldPosB[ic] - posA; - b3Vector3 r1 = src->m_worldPosB[ic] - posB; - - if (ic >= src->m_worldNormalOnB[3]) //npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - b3Vector3 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, linear, angular0, angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f; //src->getRestituitionCoeff(); - if (relVelN * relVelN < 0.004f) - { - e = 0.f; - } - - dstC->m_b[ic] = e * relVelN; - //float penetration = src->m_worldPos[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic][3] + positionDrift) * positionConstraintCoeff * dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if (src->m_worldNormalOnB[3] > 0) //npoints - { // prepare friction - b3Vector3 center = make_float4(0.f); - for (int i = 0; i < src->m_worldNormalOnB[3]; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB[3]; - - b3Vector3 tangent[2]; - b3PlaneSpace1(src->m_worldNormalOnB, tangent[0], tangent[1]); - - b3Vector3 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for (int i = 0; i < 2; i++) - { - b3Vector3 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], linear, angular0, angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB, countA, countB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for (int i = 0; i < 4; i++) - { - if (i < src->m_worldNormalOnB[3]) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} - -void ContactToConstraintKernel(b3Contact4* gContact, b3RigidBodyData* gBodies, b3InertiaData* gShapes, b3GpuConstraint4* gConstraintOut, int nContacts, - float dt, - float positionDrift, - float positionConstraintCoeff, int gIdx, b3AlignedObjectArray<unsigned int>& bodyCount) -{ - //int gIdx = 0;//GET_GLOBAL_IDX; - - if (gIdx < nContacts) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Vector3 posA = gBodies[aIdx].m_pos; - b3Vector3 linVelA = gBodies[aIdx].m_linVel; - b3Vector3 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertiaWorld; //.m_invInertia; - - b3Vector3 posB = gBodies[bIdx].m_pos; - b3Vector3 linVelB = gBodies[bIdx].m_linVel; - b3Vector3 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertiaWorld; //m_invInertia; - - b3GpuConstraint4 cs; - float countA = invMassA ? (float)(bodyCount[aIdx]) : 1; - float countB = invMassB ? (float)(bodyCount[bIdx]) : 1; - setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, countA, countB, - &cs); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -} - -void b3GpuJacobiContactSolver::solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo) -{ - B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); - - b3AlignedObjectArray<unsigned int> bodyCount; - bodyCount.resize(numBodies); - for (int i = 0; i < numBodies; i++) - bodyCount[i] = 0; - - b3AlignedObjectArray<b3Int2> contactConstraintOffsets; - contactConstraintOffsets.resize(numManifolds); - - for (int i = 0; i < numManifolds; i++) - { - int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; - int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; - - bool isFixedA = (pa < 0) || (pa == solverInfo.m_fixedBodyIndex); - bool isFixedB = (pb < 0) || (pb == solverInfo.m_fixedBodyIndex); - - int bodyIndexA = manifoldPtr[i].getBodyA(); - int bodyIndexB = manifoldPtr[i].getBodyB(); - - if (!isFixedA) - { - contactConstraintOffsets[i].x = bodyCount[bodyIndexA]; - bodyCount[bodyIndexA]++; - } - if (!isFixedB) - { - contactConstraintOffsets[i].y = bodyCount[bodyIndexB]; - bodyCount[bodyIndexB]++; - } - } - - b3AlignedObjectArray<unsigned int> offsetSplitBodies; - offsetSplitBodies.resize(numBodies); - unsigned int totalNumSplitBodies; - m_data->m_scan->executeHost(bodyCount, offsetSplitBodies, numBodies, &totalNumSplitBodies); - int numlastBody = bodyCount[numBodies - 1]; - totalNumSplitBodies += numlastBody; - printf("totalNumSplitBodies = %d\n", totalNumSplitBodies); - - b3AlignedObjectArray<b3GpuConstraint4> contactConstraints; - contactConstraints.resize(numManifolds); - - for (int i = 0; i < numManifolds; i++) - { - ContactToConstraintKernel(&manifoldPtr[0], bodies, inertias, &contactConstraints[0], numManifolds, - solverInfo.m_deltaTime, - solverInfo.m_positionDrift, - solverInfo.m_positionConstraintCoeff, - i, bodyCount); - } - int maxIter = solverInfo.m_numIterations; - - b3AlignedObjectArray<b3Vector3> deltaLinearVelocities; - b3AlignedObjectArray<b3Vector3> deltaAngularVelocities; - deltaLinearVelocities.resize(totalNumSplitBodies); - deltaAngularVelocities.resize(totalNumSplitBodies); - for (unsigned int i = 0; i < totalNumSplitBodies; i++) - { - deltaLinearVelocities[i].setZero(); - deltaAngularVelocities[i].setZero(); - } - - for (int iter = 0; iter < maxIter; iter++) - { - int i = 0; - for (i = 0; i < numManifolds; i++) - { - //float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodies[aIdx]; - b3RigidBodyData& bodyB = bodies[bIdx]; - - b3Vector3 zero = b3MakeVector3(0, 0, 0); - - b3Vector3* dlvAPtr = &zero; - b3Vector3* davAPtr = &zero; - b3Vector3* dlvBPtr = &zero; - b3Vector3* davBPtr = &zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA + constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB = bodyOffsetB + constraintOffsetB; - dlvBPtr = &deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - solveContact(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr); - } - } - - //easy - for (int i = 0; i < numBodies; i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f / float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j = 0; j < count; j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor; - averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor; - } - for (int j = 0; j < count; j++) - { - deltaLinearVelocities[bodyOffset + j] = averageLinVel; - deltaAngularVelocities[bodyOffset + j] = averageAngVel; - } - } - } - } - for (int iter = 0; iter < maxIter; iter++) - { - //int i=0; - - //solve friction - - for (int i = 0; i < numManifolds; i++) - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - float sum = 0; - for (int j = 0; j < 4; j++) - { - sum += contactConstraints[i].m_appliedRambdaDt[j]; - } - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodies[aIdx]; - b3RigidBodyData& bodyB = bodies[bIdx]; - - b3Vector3 zero = b3MakeVector3(0, 0, 0); - - b3Vector3* dlvAPtr = &zero; - b3Vector3* davAPtr = &zero; - b3Vector3* dlvBPtr = &zero; - b3Vector3* davBPtr = &zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA + constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB = bodyOffsetB + constraintOffsetB; - dlvBPtr = &deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - for (int j = 0; j < 4; j++) - { - maxRambdaDt[j] = frictionCoeff * sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - solveFriction(contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertias[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertias[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt, *dlvAPtr, *davAPtr, *dlvBPtr, *davBPtr); - } - - //easy - for (int i = 0; i < numBodies; i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f / float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j = 0; j < count; j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset + j] * factor; - averageAngVel += deltaAngularVelocities[bodyOffset + j] * factor; - } - for (int j = 0; j < count; j++) - { - deltaLinearVelocities[bodyOffset + j] = averageLinVel; - deltaAngularVelocities[bodyOffset + j] = averageAngVel; - } - } - } - } - - //easy - for (int i = 0; i < numBodies; i++) - { - if (bodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - bodies[i].m_linVel += deltaLinearVelocities[bodyOffset]; - bodies[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } -} - -void b3GpuJacobiContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index) -// -// -//void b3GpuJacobiContactSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* bodies,b3OpenCLArray<b3InertiaData>* inertias,b3OpenCLArray<b3Contact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo) -{ - b3JacobiSolverInfo solverInfo; - solverInfo.m_fixedBodyIndex = static0Index; - - B3_PROFILE("b3GpuJacobiContactSolver::solveGroup"); - - //int numBodies = bodies->size(); - int numManifolds = numContacts; //manifoldPtr->size(); - - { - B3_PROFILE("resize"); - m_data->m_bodyCount->resize(numBodies); - } - - unsigned int val = 0; - b3Int2 val2; - val2.x = 0; - val2.y = 0; - - { - B3_PROFILE("m_filler"); - m_data->m_contactConstraintOffsets->resize(numManifolds); - m_data->m_filler->execute(*m_data->m_bodyCount, val, numBodies); - - m_data->m_filler->execute(*m_data->m_contactConstraintOffsets, val2, numManifolds); - } - - { - B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue, m_data->m_countBodiesKernel, "m_countBodiesKernel"); - launcher.setBuffer(contactBuf); //manifoldPtr->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setConst(numManifolds); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.launch1D(numManifolds); - } - unsigned int totalNumSplitBodies = 0; - { - B3_PROFILE("m_scan->execute"); - - m_data->m_offsetSplitBodies->resize(numBodies); - m_data->m_scan->execute(*m_data->m_bodyCount, *m_data->m_offsetSplitBodies, numBodies, &totalNumSplitBodies); - totalNumSplitBodies += m_data->m_bodyCount->at(numBodies - 1); - } - - { - B3_PROFILE("m_data->m_contactConstraints->resize"); - //int numContacts = manifoldPtr->size(); - m_data->m_contactConstraints->resize(numContacts); - } - - { - B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher(m_queue, m_data->m_contactToConstraintSplitKernel, "m_contactToConstraintSplitKernel"); - launcher.setBuffer(contactBuf); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setConst(numContacts); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.launch1D(numContacts, 64); - } - - { - B3_PROFILE("m_data->m_deltaLinearVelocities->resize"); - m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies); - m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); - } - - { - B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue, m_data->m_clearVelocitiesKernel, "m_clearVelocitiesKernel"); - launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launch.setConst(totalNumSplitBodies); - launch.launch1D(totalNumSplitBodies); - clFinish(m_queue); - } - - int maxIter = solverInfo.m_numIterations; - - for (int iter = 0; iter < maxIter; iter++) - { - { - B3_PROFILE("m_solveContactKernel"); - b3LauncherCL launcher(m_queue, m_data->m_solveContactKernel, "m_solveContactKernel"); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - { - B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher(m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(inertiaBuf); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher(m_queue, m_data->m_averageVelocitiesKernel, "m_averageVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - } - - { - B3_PROFILE("update body velocities"); - b3LauncherCL launcher(m_queue, m_data->m_updateBodyVelocitiesKernel, "m_updateBodyVelocitiesKernel"); - launcher.setBuffer(bodyBuf); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } -} - -#if 0 - -void b3GpuJacobiContactSolver::solveGroupMixed(b3OpenCLArray<b3RigidBodyData>* bodiesGPU,b3OpenCLArray<b3InertiaData>* inertiasGPU,b3OpenCLArray<b3Contact4>* manifoldPtrGPU,const btJacobiSolverInfo& solverInfo) -{ - - b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; - bodiesGPU->copyToHost(bodiesCPU); - b3AlignedObjectArray<b3InertiaData> inertiasCPU; - inertiasGPU->copyToHost(inertiasCPU); - b3AlignedObjectArray<b3Contact4> manifoldPtrCPU; - manifoldPtrGPU->copyToHost(manifoldPtrCPU); - - int numBodiesCPU = bodiesGPU->size(); - int numManifoldsCPU = manifoldPtrGPU->size(); - B3_PROFILE("b3GpuJacobiContactSolver::solveGroupMixed"); - - b3AlignedObjectArray<unsigned int> bodyCount; - bodyCount.resize(numBodiesCPU); - for (int i=0;i<numBodiesCPU;i++) - bodyCount[i] = 0; - - b3AlignedObjectArray<b3Int2> contactConstraintOffsets; - contactConstraintOffsets.resize(numManifoldsCPU); - - - for (int i=0;i<numManifoldsCPU;i++) - { - int pa = manifoldPtrCPU[i].m_bodyAPtrAndSignBit; - int pb = manifoldPtrCPU[i].m_bodyBPtrAndSignBit; - - bool isFixedA = (pa <0) || (pa == solverInfo.m_fixedBodyIndex); - bool isFixedB = (pb <0) || (pb == solverInfo.m_fixedBodyIndex); - - int bodyIndexA = manifoldPtrCPU[i].getBodyA(); - int bodyIndexB = manifoldPtrCPU[i].getBodyB(); - - if (!isFixedA) - { - contactConstraintOffsets[i].x = bodyCount[bodyIndexA]; - bodyCount[bodyIndexA]++; - } - if (!isFixedB) - { - contactConstraintOffsets[i].y = bodyCount[bodyIndexB]; - bodyCount[bodyIndexB]++; - } - } - - b3AlignedObjectArray<unsigned int> offsetSplitBodies; - offsetSplitBodies.resize(numBodiesCPU); - unsigned int totalNumSplitBodiesCPU; - m_data->m_scan->executeHost(bodyCount,offsetSplitBodies,numBodiesCPU,&totalNumSplitBodiesCPU); - int numlastBody = bodyCount[numBodiesCPU-1]; - totalNumSplitBodiesCPU += numlastBody; - - int numBodies = bodiesGPU->size(); - int numManifolds = manifoldPtrGPU->size(); - - m_data->m_bodyCount->resize(numBodies); - - unsigned int val=0; - b3Int2 val2; - val2.x=0; - val2.y=0; - - { - B3_PROFILE("m_filler"); - m_data->m_contactConstraintOffsets->resize(numManifolds); - m_data->m_filler->execute(*m_data->m_bodyCount,val,numBodies); - - - m_data->m_filler->execute(*m_data->m_contactConstraintOffsets,val2,numManifolds); - } - - { - B3_PROFILE("m_countBodiesKernel"); - b3LauncherCL launcher(this->m_queue,m_data->m_countBodiesKernel); - launcher.setBuffer(manifoldPtrGPU->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setConst(numManifolds); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.launch1D(numManifolds); - } - - unsigned int totalNumSplitBodies=0; - m_data->m_offsetSplitBodies->resize(numBodies); - m_data->m_scan->execute(*m_data->m_bodyCount,*m_data->m_offsetSplitBodies,numBodies,&totalNumSplitBodies); - totalNumSplitBodies+=m_data->m_bodyCount->at(numBodies-1); - - if (totalNumSplitBodies != totalNumSplitBodiesCPU) - { - printf("error in totalNumSplitBodies!\n"); - } - - int numContacts = manifoldPtrGPU->size(); - m_data->m_contactConstraints->resize(numContacts); - - - { - B3_PROFILE("contactToConstraintSplitKernel"); - b3LauncherCL launcher( m_queue, m_data->m_contactToConstraintSplitKernel); - launcher.setBuffer(manifoldPtrGPU->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setConst(numContacts); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.launch1D( numContacts, 64 ); - clFinish(m_queue); - } - - - - b3AlignedObjectArray<b3GpuConstraint4> contactConstraints; - contactConstraints.resize(numManifoldsCPU); - - for (int i=0;i<numManifoldsCPU;i++) - { - ContactToConstraintKernel(&manifoldPtrCPU[0],&bodiesCPU[0],&inertiasCPU[0],&contactConstraints[0],numManifoldsCPU, - solverInfo.m_deltaTime, - solverInfo.m_positionDrift, - solverInfo.m_positionConstraintCoeff, - i, bodyCount); - } - int maxIter = solverInfo.m_numIterations; - - - b3AlignedObjectArray<b3Vector3> deltaLinearVelocities; - b3AlignedObjectArray<b3Vector3> deltaAngularVelocities; - deltaLinearVelocities.resize(totalNumSplitBodiesCPU); - deltaAngularVelocities.resize(totalNumSplitBodiesCPU); - for (int i=0;i<totalNumSplitBodiesCPU;i++) - { - deltaLinearVelocities[i].setZero(); - deltaAngularVelocities[i].setZero(); - } - - m_data->m_deltaLinearVelocities->resize(totalNumSplitBodies); - m_data->m_deltaAngularVelocities->resize(totalNumSplitBodies); - - - - { - B3_PROFILE("m_clearVelocitiesKernel"); - b3LauncherCL launch(m_queue,m_data->m_clearVelocitiesKernel); - launch.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launch.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launch.setConst(totalNumSplitBodies); - launch.launch1D(totalNumSplitBodies); - } - - - ///!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - - - m_data->m_contactConstraints->copyToHost(contactConstraints); - m_data->m_offsetSplitBodies->copyToHost(offsetSplitBodies); - m_data->m_contactConstraintOffsets->copyToHost(contactConstraintOffsets); - m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); - m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); - - for (int iter = 0;iter<maxIter;iter++) - { - - { - B3_PROFILE("m_solveContactKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveContactKernel ); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - - int i=0; - for( i=0; i<numManifoldsCPU; i++) - { - - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodiesCPU[aIdx]; - b3RigidBodyData& bodyB = bodiesCPU[bIdx]; - - b3Vector3 zero(0,0,0); - - b3Vector3* dlvAPtr=&zero; - b3Vector3* davAPtr=&zero; - b3Vector3* dlvBPtr=&zero; - b3Vector3* davBPtr=&zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - dlvBPtr =&deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, inertiasCPU[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr ); - - - } - } - - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - //easy - for (int i=0;i<numBodiesCPU;i++) - { - if (bodiesCPU[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - } - } -// m_data->m_deltaAngularVelocities->copyFromHost(deltaAngularVelocities); - //m_data->m_deltaLinearVelocities->copyFromHost(deltaLinearVelocities); - m_data->m_deltaAngularVelocities->copyToHost(deltaAngularVelocities); - m_data->m_deltaLinearVelocities->copyToHost(deltaLinearVelocities); - -#if 0 - - { - B3_PROFILE("m_solveFrictionKernel"); - b3LauncherCL launcher( m_queue, m_data->m_solveFrictionKernel); - launcher.setBuffer(m_data->m_contactConstraints->getBufferCL()); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(inertiasGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactConstraintOffsets->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(solverInfo.m_deltaTime); - launcher.setConst(solverInfo.m_positionDrift); - launcher.setConst(solverInfo.m_positionConstraintCoeff); - launcher.setConst(solverInfo.m_fixedBodyIndex); - launcher.setConst(numManifolds); - - launcher.launch1D(numManifolds); - clFinish(m_queue); - } - - //solve friction - - for(int i=0; i<numManifoldsCPU; i++) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=contactConstraints[i].m_appliedRambdaDt[j]; - } - float frictionCoeff = contactConstraints[i].getFrictionCoeff(); - int aIdx = (int)contactConstraints[i].m_bodyA; - int bIdx = (int)contactConstraints[i].m_bodyB; - b3RigidBodyData& bodyA = bodiesCPU[aIdx]; - b3RigidBodyData& bodyB = bodiesCPU[bIdx]; - - b3Vector3 zero(0,0,0); - - b3Vector3* dlvAPtr=&zero; - b3Vector3* davAPtr=&zero; - b3Vector3* dlvBPtr=&zero; - b3Vector3* davBPtr=&zero; - - if (bodyA.m_invMass) - { - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[i].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - dlvAPtr = &deltaLinearVelocities[splitIndexA]; - davAPtr = &deltaAngularVelocities[splitIndexA]; - } - - if (bodyB.m_invMass) - { - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[i].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - dlvBPtr =&deltaLinearVelocities[splitIndexB]; - davBPtr = &deltaAngularVelocities[splitIndexB]; - } - - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - solveFriction( contactConstraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,inertiasCPU[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, inertiasCPU[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt , *dlvAPtr,*davAPtr,*dlvBPtr,*davBPtr); - - } - - { - B3_PROFILE("average velocities"); - b3LauncherCL launcher( m_queue, m_data->m_averageVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - //easy - for (int i=0;i<numBodiesCPU;i++) - { - if (bodiesCPU[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/float(count); - b3Vector3 averageLinVel; - averageLinVel.setZero(); - b3Vector3 averageAngVel; - averageAngVel.setZero(); - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - } - } - -#endif - - } - - { - B3_PROFILE("update body velocities"); - b3LauncherCL launcher( m_queue, m_data->m_updateBodyVelocitiesKernel); - launcher.setBuffer(bodiesGPU->getBufferCL()); - launcher.setBuffer(m_data->m_offsetSplitBodies->getBufferCL()); - launcher.setBuffer(m_data->m_bodyCount->getBufferCL()); - launcher.setBuffer(m_data->m_deltaLinearVelocities->getBufferCL()); - launcher.setBuffer(m_data->m_deltaAngularVelocities->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_queue); - } - - - //easy - for (int i=0;i<numBodiesCPU;i++) - { - if (bodiesCPU[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - bodiesCPU[i].m_linVel += deltaLinearVelocities[bodyOffset]; - bodiesCPU[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } - - -// bodiesGPU->copyFromHost(bodiesCPU); - - -} -#endif diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h deleted file mode 100644 index 8281aee05d..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuJacobiContactSolver.h +++ /dev/null @@ -1,56 +0,0 @@ - -#ifndef B3_GPU_JACOBI_CONTACT_SOLVER_H -#define B3_GPU_JACOBI_CONTACT_SOLVER_H -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -//#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" - -//struct b3InertiaData; -//b3InertiaData - -class b3TypedConstraint; - -struct b3JacobiSolverInfo -{ - int m_fixedBodyIndex; - - float m_deltaTime; - float m_positionDrift; - float m_positionConstraintCoeff; - int m_numIterations; - - b3JacobiSolverInfo() - : m_fixedBodyIndex(0), - m_deltaTime(1. / 60.f), - m_positionDrift(0.005f), - m_positionConstraintCoeff(0.99f), - m_numIterations(7) - { - } -}; -class b3GpuJacobiContactSolver -{ -protected: - struct b3GpuJacobiSolverInternalData* m_data; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - -public: - b3GpuJacobiContactSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); - virtual ~b3GpuJacobiContactSolver(); - - void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); - void solveGroupHost(b3RigidBodyData* bodies, b3InertiaData* inertias, int numBodies, struct b3Contact4* manifoldPtr, int numManifolds, const b3JacobiSolverInfo& solverInfo); - //void solveGroupHost(btRigidBodyCL* bodies,b3InertiaData* inertias,int numBodies,btContact4* manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btJacobiSolverInfo& solverInfo); - - //b3Scalar solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies,b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies,b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints,int numConstraints,const b3ContactSolverInfo& infoGlobal); - - //void solveGroup(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo); - //void solveGroupMixed(btOpenCLArray<btRigidBodyCL>* bodies,btOpenCLArray<btInertiaCL>* inertias,btOpenCLArray<btContact4>* manifoldPtr,const btJacobiSolverInfo& solverInfo); -}; -#endif //B3_GPU_JACOBI_CONTACT_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp deleted file mode 100644 index 2e4f6c1572..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.cpp +++ /dev/null @@ -1,1013 +0,0 @@ -#include "b3GpuNarrowPhase.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3ConvexHullContact.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include <string.h> -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3TriangleIndexVertexArray.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" - -#include "b3GpuNarrowPhaseInternalData.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3ConvexUtility.h" - -b3GpuNarrowPhase::b3GpuNarrowPhase(cl_context ctx, cl_device_id device, cl_command_queue queue, const b3Config& config) - : m_data(0), m_planeBodyIndex(-1), m_static0Index(-1), m_context(ctx), m_device(device), m_queue(queue) -{ - m_data = new b3GpuNarrowPhaseInternalData(); - m_data->m_currentContactBuffer = 0; - - memset(m_data, 0, sizeof(b3GpuNarrowPhaseInternalData)); - - m_data->m_config = config; - - m_data->m_gpuSatCollision = new GpuSatCollision(ctx, device, queue); - - m_data->m_triangleConvexPairs = new b3OpenCLArray<b3Int4>(m_context, m_queue, config.m_maxTriConvexPairCapacity); - - //m_data->m_convexPairsOutGPU = new b3OpenCLArray<b3Int2>(ctx,queue,config.m_maxBroadphasePairs,false); - //m_data->m_planePairs = new b3OpenCLArray<b3Int2>(ctx,queue,config.m_maxBroadphasePairs,false); - - m_data->m_pBufContactOutCPU = new b3AlignedObjectArray<b3Contact4>(); - m_data->m_pBufContactOutCPU->resize(config.m_maxBroadphasePairs); - m_data->m_bodyBufferCPU = new b3AlignedObjectArray<b3RigidBodyData>(); - m_data->m_bodyBufferCPU->resize(config.m_maxConvexBodies); - - m_data->m_inertiaBufferCPU = new b3AlignedObjectArray<b3InertiaData>(); - m_data->m_inertiaBufferCPU->resize(config.m_maxConvexBodies); - - m_data->m_pBufContactBuffersGPU[0] = new b3OpenCLArray<b3Contact4>(ctx, queue, config.m_maxContactCapacity, true); - m_data->m_pBufContactBuffersGPU[1] = new b3OpenCLArray<b3Contact4>(ctx, queue, config.m_maxContactCapacity, true); - - m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaData>(ctx, queue, config.m_maxConvexBodies, false); - m_data->m_collidablesGPU = new b3OpenCLArray<b3Collidable>(ctx, queue, config.m_maxConvexShapes); - m_data->m_collidablesCPU.reserve(config.m_maxConvexShapes); - - m_data->m_localShapeAABBCPU = new b3AlignedObjectArray<b3SapAabb>; - m_data->m_localShapeAABBGPU = new b3OpenCLArray<b3SapAabb>(ctx, queue, config.m_maxConvexShapes); - - //m_data->m_solverDataGPU = adl::Solver<adl::TYPE_CL>::allocate(ctx,queue, config.m_maxBroadphasePairs,false); - m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyData>(ctx, queue, config.m_maxConvexBodies, false); - - m_data->m_convexFacesGPU = new b3OpenCLArray<b3GpuFace>(ctx, queue, config.m_maxConvexShapes * config.m_maxFacesPerShape, false); - m_data->m_convexFaces.reserve(config.m_maxConvexShapes * config.m_maxFacesPerShape); - - m_data->m_gpuChildShapes = new b3OpenCLArray<b3GpuChildShape>(ctx, queue, config.m_maxCompoundChildShapes, false); - - m_data->m_convexPolyhedraGPU = new b3OpenCLArray<b3ConvexPolyhedronData>(ctx, queue, config.m_maxConvexShapes, false); - m_data->m_convexPolyhedra.reserve(config.m_maxConvexShapes); - - m_data->m_uniqueEdgesGPU = new b3OpenCLArray<b3Vector3>(ctx, queue, config.m_maxConvexUniqueEdges, true); - m_data->m_uniqueEdges.reserve(config.m_maxConvexUniqueEdges); - - m_data->m_convexVerticesGPU = new b3OpenCLArray<b3Vector3>(ctx, queue, config.m_maxConvexVertices, true); - m_data->m_convexVertices.reserve(config.m_maxConvexVertices); - - m_data->m_convexIndicesGPU = new b3OpenCLArray<int>(ctx, queue, config.m_maxConvexIndices, true); - m_data->m_convexIndices.reserve(config.m_maxConvexIndices); - - m_data->m_worldVertsB1GPU = new b3OpenCLArray<b3Vector3>(ctx, queue, config.m_maxConvexBodies * config.m_maxVerticesPerFace); - m_data->m_clippingFacesOutGPU = new b3OpenCLArray<b3Int4>(ctx, queue, config.m_maxConvexBodies); - m_data->m_worldNormalsAGPU = new b3OpenCLArray<b3Vector3>(ctx, queue, config.m_maxConvexBodies); - m_data->m_worldVertsA1GPU = new b3OpenCLArray<b3Vector3>(ctx, queue, config.m_maxConvexBodies * config.m_maxVerticesPerFace); - m_data->m_worldVertsB2GPU = new b3OpenCLArray<b3Vector3>(ctx, queue, config.m_maxConvexBodies * config.m_maxVerticesPerFace); - - m_data->m_convexData = new b3AlignedObjectArray<b3ConvexUtility*>(); - - m_data->m_convexData->resize(config.m_maxConvexShapes); - m_data->m_convexPolyhedra.resize(config.m_maxConvexShapes); - - m_data->m_numAcceleratedShapes = 0; - m_data->m_numAcceleratedRigidBodies = 0; - - m_data->m_subTreesGPU = new b3OpenCLArray<b3BvhSubtreeInfo>(this->m_context, this->m_queue); - m_data->m_treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context, this->m_queue); - m_data->m_bvhInfoGPU = new b3OpenCLArray<b3BvhInfo>(this->m_context, this->m_queue); - - //m_data->m_contactCGPU = new b3OpenCLArray<Constraint4>(ctx,queue,config.m_maxBroadphasePairs,false); - //m_data->m_frictionCGPU = new b3OpenCLArray<adl::Solver<adl::TYPE_CL>::allocateFrictionConstraint( m_data->m_deviceCL, config.m_maxBroadphasePairs); -} - -b3GpuNarrowPhase::~b3GpuNarrowPhase() -{ - delete m_data->m_gpuSatCollision; - - delete m_data->m_triangleConvexPairs; - //delete m_data->m_convexPairsOutGPU; - //delete m_data->m_planePairs; - delete m_data->m_pBufContactOutCPU; - delete m_data->m_bodyBufferCPU; - delete m_data->m_inertiaBufferCPU; - delete m_data->m_pBufContactBuffersGPU[0]; - delete m_data->m_pBufContactBuffersGPU[1]; - - delete m_data->m_inertiaBufferGPU; - delete m_data->m_collidablesGPU; - delete m_data->m_localShapeAABBCPU; - delete m_data->m_localShapeAABBGPU; - delete m_data->m_bodyBufferGPU; - delete m_data->m_convexFacesGPU; - delete m_data->m_gpuChildShapes; - delete m_data->m_convexPolyhedraGPU; - delete m_data->m_uniqueEdgesGPU; - delete m_data->m_convexVerticesGPU; - delete m_data->m_convexIndicesGPU; - delete m_data->m_worldVertsB1GPU; - delete m_data->m_clippingFacesOutGPU; - delete m_data->m_worldNormalsAGPU; - delete m_data->m_worldVertsA1GPU; - delete m_data->m_worldVertsB2GPU; - - delete m_data->m_bvhInfoGPU; - - for (int i = 0; i < m_data->m_bvhData.size(); i++) - { - delete m_data->m_bvhData[i]; - } - for (int i = 0; i < m_data->m_meshInterfaces.size(); i++) - { - delete m_data->m_meshInterfaces[i]; - } - m_data->m_meshInterfaces.clear(); - m_data->m_bvhData.clear(); - delete m_data->m_treeNodesGPU; - delete m_data->m_subTreesGPU; - - delete m_data->m_convexData; - delete m_data; -} - -int b3GpuNarrowPhase::allocateCollidable() -{ - int curSize = m_data->m_collidablesCPU.size(); - if (curSize < m_data->m_config.m_maxConvexShapes) - { - m_data->m_collidablesCPU.expand(); - return curSize; - } - else - { - b3Error("allocateCollidable out-of-range %d\n", m_data->m_config.m_maxConvexShapes); - } - return -1; -} - -int b3GpuNarrowPhase::registerSphereShape(float radius) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_SPHERE; - col.m_shapeIndex = 0; - col.m_radius = radius; - - if (col.m_shapeIndex >= 0) - { - b3SapAabb aabb; - b3Vector3 myAabbMin = b3MakeVector3(-radius, -radius, -radius); - b3Vector3 myAabbMax = b3MakeVector3(radius, radius, radius); - - aabb.m_min[0] = myAabbMin[0]; //s_convexHeightField->m_aabb.m_min.x; - aabb.m_min[1] = myAabbMin[1]; //s_convexHeightField->m_aabb.m_min.y; - aabb.m_min[2] = myAabbMin[2]; //s_convexHeightField->m_aabb.m_min.z; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; //s_convexHeightField->m_aabb.m_max.x; - aabb.m_max[1] = myAabbMax[1]; //s_convexHeightField->m_aabb.m_max.y; - aabb.m_max[2] = myAabbMax[2]; //s_convexHeightField->m_aabb.m_max.z; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - clFinish(m_queue); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerFace(const b3Vector3& faceNormal, float faceConstant) -{ - int faceOffset = m_data->m_convexFaces.size(); - b3GpuFace& face = m_data->m_convexFaces.expand(); - face.m_plane = b3MakeVector3(faceNormal.x, faceNormal.y, faceNormal.z, faceConstant); - return faceOffset; -} - -int b3GpuNarrowPhase::registerPlaneShape(const b3Vector3& planeNormal, float planeConstant) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_PLANE; - col.m_shapeIndex = registerFace(planeNormal, planeConstant); - col.m_radius = planeConstant; - - if (col.m_shapeIndex >= 0) - { - b3SapAabb aabb; - aabb.m_min[0] = -1e30f; - aabb.m_min[1] = -1e30f; - aabb.m_min[2] = -1e30f; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = 1e30f; - aabb.m_max[1] = 1e30f; - aabb.m_max[2] = 1e30f; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - clFinish(m_queue); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConvexHullShapeInternal(b3ConvexUtility* convexPtr, b3Collidable& col) -{ - m_data->m_convexData->resize(m_data->m_numAcceleratedShapes + 1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes + 1); - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size() - 1); - convex.mC = convexPtr->mC; - convex.mE = convexPtr->mE; - convex.m_extents = convexPtr->m_extents; - convex.m_localCenter = convexPtr->m_localCenter; - convex.m_radius = convexPtr->m_radius; - - convex.m_numUniqueEdges = convexPtr->m_uniqueEdges.size(); - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - m_data->m_uniqueEdges.resize(edgeOffset + convex.m_numUniqueEdges); - - //convex data here - int i; - for (i = 0; i < convexPtr->m_uniqueEdges.size(); i++) - { - m_data->m_uniqueEdges[edgeOffset + i] = convexPtr->m_uniqueEdges[i]; - } - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - convex.m_numFaces = convexPtr->m_faces.size(); - - m_data->m_convexFaces.resize(faceOffset + convex.m_numFaces); - - for (i = 0; i < convexPtr->m_faces.size(); i++) - { - m_data->m_convexFaces[convex.m_faceOffset + i].m_plane = b3MakeVector3(convexPtr->m_faces[i].m_plane[0], - convexPtr->m_faces[i].m_plane[1], - convexPtr->m_faces[i].m_plane[2], - convexPtr->m_faces[i].m_plane[3]); - - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = convexPtr->m_faces[i].m_indices.size(); - m_data->m_convexFaces[convex.m_faceOffset + i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset + i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset + numIndices); - for (int p = 0; p < numIndices; p++) - { - m_data->m_convexIndices[indexOffset + p] = convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = convexPtr->m_vertices.size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset = vertexOffset; - - m_data->m_convexVertices.resize(vertexOffset + convex.m_numVertices); - for (int i = 0; i < convexPtr->m_vertices.size(); i++) - { - m_data->m_convexVertices[vertexOffset + i] = convexPtr->m_vertices[i]; - } - - (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = convexPtr; - - return m_data->m_numAcceleratedShapes++; -} - -int b3GpuNarrowPhase::registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling) -{ - b3AlignedObjectArray<b3Vector3> verts; - - unsigned char* vts = (unsigned char*)vertices; - for (int i = 0; i < numVertices; i++) - { - float* vertex = (float*)&vts[i * strideInBytes]; - verts.push_back(b3MakeVector3(vertex[0] * scaling[0], vertex[1] * scaling[1], vertex[2] * scaling[2])); - } - - b3ConvexUtility* utilPtr = new b3ConvexUtility(); - bool merge = true; - if (numVertices) - { - utilPtr->initializePolyhedralFeatures(&verts[0], verts.size(), merge); - } - - int collidableIndex = registerConvexHullShape(utilPtr); - delete utilPtr; - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConvexHullShape(b3ConvexUtility* utilPtr) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_CONVEX_HULL; - col.m_shapeIndex = -1; - - { - b3Vector3 localCenter = b3MakeVector3(0, 0, 0); - for (int i = 0; i < utilPtr->m_vertices.size(); i++) - localCenter += utilPtr->m_vertices[i]; - localCenter *= (1.f / utilPtr->m_vertices.size()); - utilPtr->m_localCenter = localCenter; - - col.m_shapeIndex = registerConvexHullShapeInternal(utilPtr, col); - } - - if (col.m_shapeIndex >= 0) - { - b3SapAabb aabb; - - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - for (int i = 0; i < utilPtr->m_vertices.size(); i++) - { - myAabbMin.setMin(utilPtr->m_vertices[i]); - myAabbMax.setMax(utilPtr->m_vertices[i]); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1] = myAabbMax[1]; - aabb.m_max[2] = myAabbMax[2]; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes) -{ - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - col.m_shapeType = SHAPE_COMPOUND_OF_CONVEX_HULLS; - col.m_shapeIndex = m_data->m_cpuChildShapes.size(); - col.m_compoundBvhIndex = m_data->m_bvhInfoCPU.size(); - - { - b3Assert(col.m_shapeIndex + childShapes->size() < m_data->m_config.m_maxCompoundChildShapes); - for (int i = 0; i < childShapes->size(); i++) - { - m_data->m_cpuChildShapes.push_back(childShapes->at(i)); - } - } - - col.m_numChildShapes = childShapes->size(); - - b3SapAabb aabbLocalSpace; - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - b3AlignedObjectArray<b3Aabb> childLocalAabbs; - childLocalAabbs.resize(childShapes->size()); - - //compute local AABB of the compound of all children - for (int i = 0; i < childShapes->size(); i++) - { - int childColIndex = childShapes->at(i).m_shapeIndex; - //b3Collidable& childCol = getCollidableCpu(childColIndex); - b3SapAabb aabbLoc = m_data->m_localShapeAABBCPU->at(childColIndex); - - b3Vector3 childLocalAabbMin = b3MakeVector3(aabbLoc.m_min[0], aabbLoc.m_min[1], aabbLoc.m_min[2]); - b3Vector3 childLocalAabbMax = b3MakeVector3(aabbLoc.m_max[0], aabbLoc.m_max[1], aabbLoc.m_max[2]); - b3Vector3 aMin, aMax; - b3Scalar margin(0.f); - b3Transform childTr; - childTr.setIdentity(); - - childTr.setOrigin(childShapes->at(i).m_childPosition); - childTr.setRotation(b3Quaternion(childShapes->at(i).m_childOrientation)); - b3TransformAabb(childLocalAabbMin, childLocalAabbMax, margin, childTr, aMin, aMax); - myAabbMin.setMin(aMin); - myAabbMax.setMax(aMax); - childLocalAabbs[i].m_min[0] = aMin[0]; - childLocalAabbs[i].m_min[1] = aMin[1]; - childLocalAabbs[i].m_min[2] = aMin[2]; - childLocalAabbs[i].m_min[3] = 0; - childLocalAabbs[i].m_max[0] = aMax[0]; - childLocalAabbs[i].m_max[1] = aMax[1]; - childLocalAabbs[i].m_max[2] = aMax[2]; - childLocalAabbs[i].m_max[3] = 0; - } - - aabbLocalSpace.m_min[0] = myAabbMin[0]; //s_convexHeightField->m_aabb.m_min.x; - aabbLocalSpace.m_min[1] = myAabbMin[1]; //s_convexHeightField->m_aabb.m_min.y; - aabbLocalSpace.m_min[2] = myAabbMin[2]; //s_convexHeightField->m_aabb.m_min.z; - aabbLocalSpace.m_minIndices[3] = 0; - - aabbLocalSpace.m_max[0] = myAabbMax[0]; //s_convexHeightField->m_aabb.m_max.x; - aabbLocalSpace.m_max[1] = myAabbMax[1]; //s_convexHeightField->m_aabb.m_max.y; - aabbLocalSpace.m_max[2] = myAabbMax[2]; //s_convexHeightField->m_aabb.m_max.z; - aabbLocalSpace.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabbLocalSpace); - - b3QuantizedBvh* bvh = new b3QuantizedBvh; - bvh->setQuantizationValues(myAabbMin, myAabbMax); - QuantizedNodeArray& nodes = bvh->getLeafNodeArray(); - int numNodes = childShapes->size(); - - for (int i = 0; i < numNodes; i++) - { - b3QuantizedBvhNode node; - b3Vector3 aabbMin, aabbMax; - aabbMin = (b3Vector3&)childLocalAabbs[i].m_min; - aabbMax = (b3Vector3&)childLocalAabbs[i].m_max; - - bvh->quantize(&node.m_quantizedAabbMin[0], aabbMin, 0); - bvh->quantize(&node.m_quantizedAabbMax[0], aabbMax, 1); - int partId = 0; - node.m_escapeIndexOrTriangleIndex = (partId << (31 - MAX_NUM_PARTS_IN_BITS)) | i; - nodes.push_back(node); - } - bvh->buildInternal(); - - int numSubTrees = bvh->getSubtreeInfoArray().size(); - - //void setQuantizationValues(const b3Vector3& bvhAabbMin,const b3Vector3& bvhAabbMax,b3Scalar quantizationMargin=b3Scalar(1.0)); - //QuantizedNodeArray& getLeafNodeArray() { return m_quantizedLeafNodes; } - ///buildInternal is expert use only: assumes that setQuantizationValues and LeafNodeArray are initialized - //void buildInternal(); - - b3BvhInfo bvhInfo; - - bvhInfo.m_aabbMin = bvh->m_bvhAabbMin; - bvhInfo.m_aabbMax = bvh->m_bvhAabbMax; - bvhInfo.m_quantization = bvh->m_bvhQuantization; - bvhInfo.m_numNodes = numNodes; - bvhInfo.m_numSubTrees = numSubTrees; - bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size(); - bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size(); - - int numNewNodes = bvh->getQuantizedNodeArray().size(); - - for (int i = 0; i < numNewNodes - 1; i++) - { - if (bvh->getQuantizedNodeArray()[i].isLeafNode()) - { - int orgIndex = bvh->getQuantizedNodeArray()[i].getTriangleIndex(); - - b3Vector3 nodeMinVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMin); - b3Vector3 nodeMaxVec = bvh->unQuantize(bvh->getQuantizedNodeArray()[i].m_quantizedAabbMax); - - for (int c = 0; c < 3; c++) - { - if (childLocalAabbs[orgIndex].m_min[c] < nodeMinVec[c]) - { - printf("min org (%f) and new (%f) ? at i:%d,c:%d\n", childLocalAabbs[i].m_min[c], nodeMinVec[c], i, c); - } - if (childLocalAabbs[orgIndex].m_max[c] > nodeMaxVec[c]) - { - printf("max org (%f) and new (%f) ? at i:%d,c:%d\n", childLocalAabbs[i].m_max[c], nodeMaxVec[c], i, c); - } - } - } - } - - m_data->m_bvhInfoCPU.push_back(bvhInfo); - - int numNewSubtrees = bvh->getSubtreeInfoArray().size(); - m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size() + numNewSubtrees); - for (int i = 0; i < numNewSubtrees; i++) - { - m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]); - } - int numNewTreeNodes = bvh->getQuantizedNodeArray().size(); - - for (int i = 0; i < numNewTreeNodes; i++) - { - m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]); - } - - // m_data->m_localShapeAABBGPU->push_back(aabbWS); - clFinish(m_queue); - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, const float* scaling1) -{ - b3Vector3 scaling = b3MakeVector3(scaling1[0], scaling1[1], scaling1[2]); - - int collidableIndex = allocateCollidable(); - if (collidableIndex < 0) - return collidableIndex; - - b3Collidable& col = getCollidableCpu(collidableIndex); - - col.m_shapeType = SHAPE_CONCAVE_TRIMESH; - col.m_shapeIndex = registerConcaveMeshShape(vertices, indices, col, scaling); - col.m_bvhIndex = m_data->m_bvhInfoCPU.size(); - - b3SapAabb aabb; - b3Vector3 myAabbMin = b3MakeVector3(1e30f, 1e30f, 1e30f); - b3Vector3 myAabbMax = b3MakeVector3(-1e30f, -1e30f, -1e30f); - - for (int i = 0; i < vertices->size(); i++) - { - b3Vector3 vtx(vertices->at(i) * scaling); - myAabbMin.setMin(vtx); - myAabbMax.setMax(vtx); - } - aabb.m_min[0] = myAabbMin[0]; - aabb.m_min[1] = myAabbMin[1]; - aabb.m_min[2] = myAabbMin[2]; - aabb.m_minIndices[3] = 0; - - aabb.m_max[0] = myAabbMax[0]; - aabb.m_max[1] = myAabbMax[1]; - aabb.m_max[2] = myAabbMax[2]; - aabb.m_signedMaxIndices[3] = 0; - - m_data->m_localShapeAABBCPU->push_back(aabb); - // m_data->m_localShapeAABBGPU->push_back(aabb); - - b3OptimizedBvh* bvh = new b3OptimizedBvh(); - //void b3OptimizedBvh::build(b3StridingMeshInterface* triangles, bool useQuantizedAabbCompression, const b3Vector3& bvhAabbMin, const b3Vector3& bvhAabbMax) - - bool useQuantizedAabbCompression = true; - b3TriangleIndexVertexArray* meshInterface = new b3TriangleIndexVertexArray(); - m_data->m_meshInterfaces.push_back(meshInterface); - b3IndexedMesh mesh; - mesh.m_numTriangles = indices->size() / 3; - mesh.m_numVertices = vertices->size(); - mesh.m_vertexBase = (const unsigned char*)&vertices->at(0).x; - mesh.m_vertexStride = sizeof(b3Vector3); - mesh.m_triangleIndexStride = 3 * sizeof(int); // or sizeof(int) - mesh.m_triangleIndexBase = (const unsigned char*)&indices->at(0); - - meshInterface->addIndexedMesh(mesh); - bvh->build(meshInterface, useQuantizedAabbCompression, (b3Vector3&)aabb.m_min, (b3Vector3&)aabb.m_max); - m_data->m_bvhData.push_back(bvh); - int numNodes = bvh->getQuantizedNodeArray().size(); - //b3OpenCLArray<b3QuantizedBvhNode>* treeNodesGPU = new b3OpenCLArray<b3QuantizedBvhNode>(this->m_context,this->m_queue,numNodes); - int numSubTrees = bvh->getSubtreeInfoArray().size(); - - b3BvhInfo bvhInfo; - - bvhInfo.m_aabbMin = bvh->m_bvhAabbMin; - bvhInfo.m_aabbMax = bvh->m_bvhAabbMax; - bvhInfo.m_quantization = bvh->m_bvhQuantization; - bvhInfo.m_numNodes = numNodes; - bvhInfo.m_numSubTrees = numSubTrees; - bvhInfo.m_nodeOffset = m_data->m_treeNodesCPU.size(); - bvhInfo.m_subTreeOffset = m_data->m_subTreesCPU.size(); - - m_data->m_bvhInfoCPU.push_back(bvhInfo); - - int numNewSubtrees = bvh->getSubtreeInfoArray().size(); - m_data->m_subTreesCPU.reserve(m_data->m_subTreesCPU.size() + numNewSubtrees); - for (int i = 0; i < numNewSubtrees; i++) - { - m_data->m_subTreesCPU.push_back(bvh->getSubtreeInfoArray()[i]); - } - int numNewTreeNodes = bvh->getQuantizedNodeArray().size(); - - for (int i = 0; i < numNewTreeNodes; i++) - { - m_data->m_treeNodesCPU.push_back(bvh->getQuantizedNodeArray()[i]); - } - - return collidableIndex; -} - -int b3GpuNarrowPhase::registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling1) -{ - b3Vector3 scaling = b3MakeVector3(scaling1[0], scaling1[1], scaling1[2]); - - m_data->m_convexData->resize(m_data->m_numAcceleratedShapes + 1); - m_data->m_convexPolyhedra.resize(m_data->m_numAcceleratedShapes + 1); - - b3ConvexPolyhedronData& convex = m_data->m_convexPolyhedra.at(m_data->m_convexPolyhedra.size() - 1); - convex.mC = b3MakeVector3(0, 0, 0); - convex.mE = b3MakeVector3(0, 0, 0); - convex.m_extents = b3MakeVector3(0, 0, 0); - convex.m_localCenter = b3MakeVector3(0, 0, 0); - convex.m_radius = 0.f; - - convex.m_numUniqueEdges = 0; - int edgeOffset = m_data->m_uniqueEdges.size(); - convex.m_uniqueEdgesOffset = edgeOffset; - - int faceOffset = m_data->m_convexFaces.size(); - convex.m_faceOffset = faceOffset; - - convex.m_numFaces = indices->size() / 3; - m_data->m_convexFaces.resize(faceOffset + convex.m_numFaces); - m_data->m_convexIndices.reserve(convex.m_numFaces * 3); - for (int i = 0; i < convex.m_numFaces; i++) - { - if (i % 256 == 0) - { - //printf("i=%d out of %d", i,convex.m_numFaces); - } - b3Vector3 vert0(vertices->at(indices->at(i * 3)) * scaling); - b3Vector3 vert1(vertices->at(indices->at(i * 3 + 1)) * scaling); - b3Vector3 vert2(vertices->at(indices->at(i * 3 + 2)) * scaling); - - b3Vector3 normal = ((vert1 - vert0).cross(vert2 - vert0)).normalize(); - b3Scalar c = -(normal.dot(vert0)); - - m_data->m_convexFaces[convex.m_faceOffset + i].m_plane = b3MakeVector4(normal.x, normal.y, normal.z, c); - int indexOffset = m_data->m_convexIndices.size(); - int numIndices = 3; - m_data->m_convexFaces[convex.m_faceOffset + i].m_numIndices = numIndices; - m_data->m_convexFaces[convex.m_faceOffset + i].m_indexOffset = indexOffset; - m_data->m_convexIndices.resize(indexOffset + numIndices); - for (int p = 0; p < numIndices; p++) - { - int vi = indices->at(i * 3 + p); - m_data->m_convexIndices[indexOffset + p] = vi; //convexPtr->m_faces[i].m_indices[p]; - } - } - - convex.m_numVertices = vertices->size(); - int vertexOffset = m_data->m_convexVertices.size(); - convex.m_vertexOffset = vertexOffset; - m_data->m_convexVertices.resize(vertexOffset + convex.m_numVertices); - for (int i = 0; i < vertices->size(); i++) - { - m_data->m_convexVertices[vertexOffset + i] = vertices->at(i) * scaling; - } - - (*m_data->m_convexData)[m_data->m_numAcceleratedShapes] = 0; - - return m_data->m_numAcceleratedShapes++; -} - -cl_mem b3GpuNarrowPhase::getBodiesGpu() -{ - return (cl_mem)m_data->m_bodyBufferGPU->getBufferCL(); -} - -const struct b3RigidBodyData* b3GpuNarrowPhase::getBodiesCpu() const -{ - return &m_data->m_bodyBufferCPU->at(0); -}; - -int b3GpuNarrowPhase::getNumBodiesGpu() const -{ - return m_data->m_bodyBufferGPU->size(); -} - -cl_mem b3GpuNarrowPhase::getBodyInertiasGpu() -{ - return (cl_mem)m_data->m_inertiaBufferGPU->getBufferCL(); -} - -int b3GpuNarrowPhase::getNumBodyInertiasGpu() const -{ - return m_data->m_inertiaBufferGPU->size(); -} - -b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -const b3Collidable& b3GpuNarrowPhase::getCollidableCpu(int collidableIndex) const -{ - return m_data->m_collidablesCPU[collidableIndex]; -} - -cl_mem b3GpuNarrowPhase::getCollidablesGpu() -{ - return m_data->m_collidablesGPU->getBufferCL(); -} - -const struct b3Collidable* b3GpuNarrowPhase::getCollidablesCpu() const -{ - if (m_data->m_collidablesCPU.size()) - return &m_data->m_collidablesCPU[0]; - return 0; -} - -const struct b3SapAabb* b3GpuNarrowPhase::getLocalSpaceAabbsCpu() const -{ - if (m_data->m_localShapeAABBCPU->size()) - { - return &m_data->m_localShapeAABBCPU->at(0); - } - return 0; -} - -cl_mem b3GpuNarrowPhase::getAabbLocalSpaceBufferGpu() -{ - return m_data->m_localShapeAABBGPU->getBufferCL(); -} -int b3GpuNarrowPhase::getNumCollidablesGpu() const -{ - return m_data->m_collidablesGPU->size(); -} - -int b3GpuNarrowPhase::getNumContactsGpu() const -{ - return m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->size(); -} -cl_mem b3GpuNarrowPhase::getContactsGpu() -{ - return m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->getBufferCL(); -} - -const b3Contact4* b3GpuNarrowPhase::getContactsCPU() const -{ - m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->copyToHost(*m_data->m_pBufContactOutCPU); - return &m_data->m_pBufContactOutCPU->at(0); -} - -void b3GpuNarrowPhase::computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects) -{ - cl_mem aabbsLocalSpace = m_data->m_localShapeAABBGPU->getBufferCL(); - - int nContactOut = 0; - - //swap buffer - m_data->m_currentContactBuffer = 1 - m_data->m_currentContactBuffer; - - //int curSize = m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer]->size(); - - int maxTriConvexPairCapacity = m_data->m_config.m_maxTriConvexPairCapacity; - int numTriConvexPairsOut = 0; - - b3OpenCLArray<b3Int4> broadphasePairsGPU(m_context, m_queue); - broadphasePairsGPU.setFromOpenCLBuffer(broadphasePairs, numBroadphasePairs); - - b3OpenCLArray<b3Aabb> clAabbArrayWorldSpace(this->m_context, this->m_queue); - clAabbArrayWorldSpace.setFromOpenCLBuffer(aabbsWorldSpace, numObjects); - - b3OpenCLArray<b3Aabb> clAabbArrayLocalSpace(this->m_context, this->m_queue); - clAabbArrayLocalSpace.setFromOpenCLBuffer(aabbsLocalSpace, numObjects); - - m_data->m_gpuSatCollision->computeConvexConvexContactsGPUSAT( - &broadphasePairsGPU, numBroadphasePairs, - m_data->m_bodyBufferGPU, - m_data->m_pBufContactBuffersGPU[m_data->m_currentContactBuffer], - nContactOut, - m_data->m_pBufContactBuffersGPU[1 - m_data->m_currentContactBuffer], - m_data->m_config.m_maxContactCapacity, - m_data->m_config.m_compoundPairCapacity, - *m_data->m_convexPolyhedraGPU, - *m_data->m_convexVerticesGPU, - *m_data->m_uniqueEdgesGPU, - *m_data->m_convexFacesGPU, - *m_data->m_convexIndicesGPU, - *m_data->m_collidablesGPU, - *m_data->m_gpuChildShapes, - clAabbArrayWorldSpace, - clAabbArrayLocalSpace, - *m_data->m_worldVertsB1GPU, - *m_data->m_clippingFacesOutGPU, - *m_data->m_worldNormalsAGPU, - *m_data->m_worldVertsA1GPU, - *m_data->m_worldVertsB2GPU, - m_data->m_bvhData, - m_data->m_treeNodesGPU, - m_data->m_subTreesGPU, - m_data->m_bvhInfoGPU, - numObjects, - maxTriConvexPairCapacity, - *m_data->m_triangleConvexPairs, - numTriConvexPairsOut); - - /*b3AlignedObjectArray<b3Int4> broadphasePairsCPU; - broadphasePairsGPU.copyToHost(broadphasePairsCPU); - printf("checking pairs\n"); - */ -} - -const b3SapAabb& b3GpuNarrowPhase::getLocalSpaceAabb(int collidableIndex) const -{ - return m_data->m_localShapeAABBCPU->at(collidableIndex); -} - -int b3GpuNarrowPhase::registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMinPtr, const float* aabbMaxPtr, bool writeToGpu) -{ - b3Vector3 aabbMin = b3MakeVector3(aabbMinPtr[0], aabbMinPtr[1], aabbMinPtr[2]); - b3Vector3 aabbMax = b3MakeVector3(aabbMaxPtr[0], aabbMaxPtr[1], aabbMaxPtr[2]); - - if (m_data->m_numAcceleratedRigidBodies >= (m_data->m_config.m_maxConvexBodies)) - { - b3Error("registerRigidBody: exceeding the number of rigid bodies, %d > %d \n", m_data->m_numAcceleratedRigidBodies, m_data->m_config.m_maxConvexBodies); - return -1; - } - - m_data->m_bodyBufferCPU->resize(m_data->m_numAcceleratedRigidBodies + 1); - - b3RigidBodyData& body = m_data->m_bodyBufferCPU->at(m_data->m_numAcceleratedRigidBodies); - - float friction = 1.f; - float restitution = 0.f; - - body.m_frictionCoeff = friction; - body.m_restituitionCoeff = restitution; - body.m_angVel = b3MakeVector3(0, 0, 0); - body.m_linVel = b3MakeVector3(0, 0, 0); //.setZero(); - body.m_pos = b3MakeVector3(position[0], position[1], position[2]); - body.m_quat.setValue(orientation[0], orientation[1], orientation[2], orientation[3]); - body.m_collidableIdx = collidableIndex; - if (collidableIndex >= 0) - { - // body.m_shapeType = m_data->m_collidablesCPU.at(collidableIndex).m_shapeType; - } - else - { - // body.m_shapeType = CollisionShape::SHAPE_PLANE; - m_planeBodyIndex = m_data->m_numAcceleratedRigidBodies; - } - //body.m_shapeType = shapeType; - - body.m_invMass = mass ? 1.f / mass : 0.f; - - if (writeToGpu) - { - m_data->m_bodyBufferGPU->copyFromHostPointer(&body, 1, m_data->m_numAcceleratedRigidBodies); - } - - b3InertiaData& shapeInfo = m_data->m_inertiaBufferCPU->at(m_data->m_numAcceleratedRigidBodies); - - if (mass == 0.f) - { - if (m_data->m_numAcceleratedRigidBodies == 0) - m_static0Index = 0; - - shapeInfo.m_initInvInertia.setValue(0, 0, 0, 0, 0, 0, 0, 0, 0); - shapeInfo.m_invInertiaWorld.setValue(0, 0, 0, 0, 0, 0, 0, 0, 0); - } - else - { - b3Assert(body.m_collidableIdx >= 0); - - //approximate using the aabb of the shape - - //Aabb aabb = (*m_data->m_shapePointers)[shapeIndex]->m_aabb; - b3Vector3 halfExtents = (aabbMax - aabbMin); //*0.5f;//fake larger inertia makes demos more stable ;-) - - b3Vector3 localInertia; - - float lx = 2.f * halfExtents[0]; - float ly = 2.f * halfExtents[1]; - float lz = 2.f * halfExtents[2]; - - localInertia.setValue((mass / 12.0f) * (ly * ly + lz * lz), - (mass / 12.0f) * (lx * lx + lz * lz), - (mass / 12.0f) * (lx * lx + ly * ly)); - - b3Vector3 invLocalInertia; - invLocalInertia[0] = 1.f / localInertia[0]; - invLocalInertia[1] = 1.f / localInertia[1]; - invLocalInertia[2] = 1.f / localInertia[2]; - invLocalInertia[3] = 0.f; - - shapeInfo.m_initInvInertia.setValue( - invLocalInertia[0], 0, 0, - 0, invLocalInertia[1], 0, - 0, 0, invLocalInertia[2]); - - b3Matrix3x3 m(body.m_quat); - - shapeInfo.m_invInertiaWorld = m.scaled(invLocalInertia) * m.transpose(); - } - - if (writeToGpu) - m_data->m_inertiaBufferGPU->copyFromHostPointer(&shapeInfo, 1, m_data->m_numAcceleratedRigidBodies); - - return m_data->m_numAcceleratedRigidBodies++; -} - -int b3GpuNarrowPhase::getNumRigidBodies() const -{ - return m_data->m_numAcceleratedRigidBodies; -} - -void b3GpuNarrowPhase::writeAllBodiesToGpu() -{ - if (m_data->m_localShapeAABBCPU->size()) - { - m_data->m_localShapeAABBGPU->copyFromHost(*m_data->m_localShapeAABBCPU); - } - - m_data->m_gpuChildShapes->copyFromHost(m_data->m_cpuChildShapes); - m_data->m_convexFacesGPU->copyFromHost(m_data->m_convexFaces); - m_data->m_convexPolyhedraGPU->copyFromHost(m_data->m_convexPolyhedra); - m_data->m_uniqueEdgesGPU->copyFromHost(m_data->m_uniqueEdges); - m_data->m_convexVerticesGPU->copyFromHost(m_data->m_convexVertices); - m_data->m_convexIndicesGPU->copyFromHost(m_data->m_convexIndices); - m_data->m_bvhInfoGPU->copyFromHost(m_data->m_bvhInfoCPU); - m_data->m_treeNodesGPU->copyFromHost(m_data->m_treeNodesCPU); - m_data->m_subTreesGPU->copyFromHost(m_data->m_subTreesCPU); - - m_data->m_bodyBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); - m_data->m_inertiaBufferGPU->resize(m_data->m_numAcceleratedRigidBodies); - - if (m_data->m_numAcceleratedRigidBodies) - { - m_data->m_bodyBufferGPU->copyFromHostPointer(&m_data->m_bodyBufferCPU->at(0), m_data->m_numAcceleratedRigidBodies); - m_data->m_inertiaBufferGPU->copyFromHostPointer(&m_data->m_inertiaBufferCPU->at(0), m_data->m_numAcceleratedRigidBodies); - } - if (m_data->m_collidablesCPU.size()) - { - m_data->m_collidablesGPU->copyFromHost(m_data->m_collidablesCPU); - } -} - -void b3GpuNarrowPhase::reset() -{ - m_data->m_numAcceleratedShapes = 0; - m_data->m_numAcceleratedRigidBodies = 0; - this->m_static0Index = -1; - m_data->m_uniqueEdges.resize(0); - m_data->m_convexVertices.resize(0); - m_data->m_convexPolyhedra.resize(0); - m_data->m_convexIndices.resize(0); - m_data->m_cpuChildShapes.resize(0); - m_data->m_convexFaces.resize(0); - m_data->m_collidablesCPU.resize(0); - m_data->m_localShapeAABBCPU->resize(0); - m_data->m_bvhData.resize(0); - m_data->m_treeNodesCPU.resize(0); - m_data->m_subTreesCPU.resize(0); - m_data->m_bvhInfoCPU.resize(0); -} - -void b3GpuNarrowPhase::readbackAllBodiesToCpu() -{ - m_data->m_bodyBufferGPU->copyToHostPointer(&m_data->m_bodyBufferCPU->at(0), m_data->m_numAcceleratedRigidBodies); -} - -void b3GpuNarrowPhase::setObjectTransformCpu(float* position, float* orientation, int bodyIndex) -{ - if (bodyIndex >= 0 && bodyIndex < m_data->m_bodyBufferCPU->size()) - { - m_data->m_bodyBufferCPU->at(bodyIndex).m_pos = b3MakeVector3(position[0], position[1], position[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.setValue(orientation[0], orientation[1], orientation[2], orientation[3]); - } - else - { - b3Warning("setObjectVelocityCpu out of range.\n"); - } -} -void b3GpuNarrowPhase::setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex) -{ - if (bodyIndex >= 0 && bodyIndex < m_data->m_bodyBufferCPU->size()) - { - m_data->m_bodyBufferCPU->at(bodyIndex).m_linVel = b3MakeVector3(linVel[0], linVel[1], linVel[2]); - m_data->m_bodyBufferCPU->at(bodyIndex).m_angVel = b3MakeVector3(angVel[0], angVel[1], angVel[2]); - } - else - { - b3Warning("setObjectVelocityCpu out of range.\n"); - } -} - -bool b3GpuNarrowPhase::getObjectTransformFromCpu(float* position, float* orientation, int bodyIndex) const -{ - if (bodyIndex >= 0 && bodyIndex < m_data->m_bodyBufferCPU->size()) - { - position[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.x; - position[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.y; - position[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_pos.z; - position[3] = 1.f; //or 1 - - orientation[0] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.x; - orientation[1] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.y; - orientation[2] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.z; - orientation[3] = m_data->m_bodyBufferCPU->at(bodyIndex).m_quat.w; - return true; - } - - b3Warning("getObjectTransformFromCpu out of range.\n"); - return false; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h deleted file mode 100644 index 21a68de343..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhase.h +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef B3_GPU_NARROWPHASE_H -#define B3_GPU_NARROWPHASE_H - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -class b3GpuNarrowPhase -{ -protected: - struct b3GpuNarrowPhaseInternalData* m_data; - int m_acceleratedCompanionShapeIndex; - int m_planeBodyIndex; - int m_static0Index; - - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - int registerConvexHullShapeInternal(class b3ConvexUtility* convexPtr, b3Collidable& col); - int registerConcaveMeshShape(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, b3Collidable& col, const float* scaling); - -public: - b3GpuNarrowPhase(cl_context vtx, cl_device_id dev, cl_command_queue q, const struct b3Config& config); - - virtual ~b3GpuNarrowPhase(void); - - int registerSphereShape(float radius); - int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes); - int registerFace(const b3Vector3& faceNormal, float faceConstant); - - int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, const float* scaling); - - //do they need to be merged? - - int registerConvexHullShape(b3ConvexUtility* utilPtr); - int registerConvexHullShape(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - - int registerRigidBody(int collidableIndex, float mass, const float* position, const float* orientation, const float* aabbMin, const float* aabbMax, bool writeToGpu); - void setObjectTransform(const float* position, const float* orientation, int bodyIndex); - - void writeAllBodiesToGpu(); - void reset(); - void readbackAllBodiesToCpu(); - bool getObjectTransformFromCpu(float* position, float* orientation, int bodyIndex) const; - - void setObjectTransformCpu(float* position, float* orientation, int bodyIndex); - void setObjectVelocityCpu(float* linVel, float* angVel, int bodyIndex); - - virtual void computeContacts(cl_mem broadphasePairs, int numBroadphasePairs, cl_mem aabbsWorldSpace, int numObjects); - - cl_mem getBodiesGpu(); - const struct b3RigidBodyData* getBodiesCpu() const; - //struct b3RigidBodyData* getBodiesCpu(); - - int getNumBodiesGpu() const; - - cl_mem getBodyInertiasGpu(); - int getNumBodyInertiasGpu() const; - - cl_mem getCollidablesGpu(); - const struct b3Collidable* getCollidablesCpu() const; - int getNumCollidablesGpu() const; - - const struct b3SapAabb* getLocalSpaceAabbsCpu() const; - - const struct b3Contact4* getContactsCPU() const; - - cl_mem getContactsGpu(); - int getNumContactsGpu() const; - - cl_mem getAabbLocalSpaceBufferGpu(); - - int getNumRigidBodies() const; - - int allocateCollidable(); - - int getStatic0Index() const - { - return m_static0Index; - } - b3Collidable& getCollidableCpu(int collidableIndex); - const b3Collidable& getCollidableCpu(int collidableIndex) const; - - const b3GpuNarrowPhaseInternalData* getInternalData() const - { - return m_data; - } - - b3GpuNarrowPhaseInternalData* getInternalData() - { - return m_data; - } - - const struct b3SapAabb& getLocalSpaceAabb(int collidableIndex) const; -}; - -#endif //B3_GPU_NARROWPHASE_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h deleted file mode 100644 index 716a5ea0fc..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h +++ /dev/null @@ -1,89 +0,0 @@ - -#ifndef B3_GPU_NARROWPHASE_INTERNAL_DATA_H -#define B3_GPU_NARROWPHASE_INTERNAL_DATA_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Common/b3Vector3.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" - -#include "Bullet3OpenCL/NarrowphaseCollision/b3QuantizedBvh.h" -#include "Bullet3OpenCL/NarrowphaseCollision/b3BvhInfo.h" -#include "Bullet3Common/shared/b3Int4.h" -#include "Bullet3Common/shared/b3Int2.h" - -class b3ConvexUtility; - -struct b3GpuNarrowPhaseInternalData -{ - b3AlignedObjectArray<b3ConvexUtility*>* m_convexData; - - b3AlignedObjectArray<b3ConvexPolyhedronData> m_convexPolyhedra; - b3AlignedObjectArray<b3Vector3> m_uniqueEdges; - b3AlignedObjectArray<b3Vector3> m_convexVertices; - b3AlignedObjectArray<int> m_convexIndices; - - b3OpenCLArray<b3ConvexPolyhedronData>* m_convexPolyhedraGPU; - b3OpenCLArray<b3Vector3>* m_uniqueEdgesGPU; - b3OpenCLArray<b3Vector3>* m_convexVerticesGPU; - b3OpenCLArray<int>* m_convexIndicesGPU; - - b3OpenCLArray<b3Vector3>* m_worldVertsB1GPU; - b3OpenCLArray<b3Int4>* m_clippingFacesOutGPU; - b3OpenCLArray<b3Vector3>* m_worldNormalsAGPU; - b3OpenCLArray<b3Vector3>* m_worldVertsA1GPU; - b3OpenCLArray<b3Vector3>* m_worldVertsB2GPU; - - b3AlignedObjectArray<b3GpuChildShape> m_cpuChildShapes; - b3OpenCLArray<b3GpuChildShape>* m_gpuChildShapes; - - b3AlignedObjectArray<b3GpuFace> m_convexFaces; - b3OpenCLArray<b3GpuFace>* m_convexFacesGPU; - - struct GpuSatCollision* m_gpuSatCollision; - - b3OpenCLArray<b3Int4>* m_triangleConvexPairs; - - b3OpenCLArray<b3Contact4>* m_pBufContactBuffersGPU[2]; - int m_currentContactBuffer; - b3AlignedObjectArray<b3Contact4>* m_pBufContactOutCPU; - - b3AlignedObjectArray<b3RigidBodyData>* m_bodyBufferCPU; - b3OpenCLArray<b3RigidBodyData>* m_bodyBufferGPU; - - b3AlignedObjectArray<b3InertiaData>* m_inertiaBufferCPU; - b3OpenCLArray<b3InertiaData>* m_inertiaBufferGPU; - - int m_numAcceleratedShapes; - int m_numAcceleratedRigidBodies; - - b3AlignedObjectArray<b3Collidable> m_collidablesCPU; - b3OpenCLArray<b3Collidable>* m_collidablesGPU; - - b3OpenCLArray<b3SapAabb>* m_localShapeAABBGPU; - b3AlignedObjectArray<b3SapAabb>* m_localShapeAABBCPU; - - b3AlignedObjectArray<class b3OptimizedBvh*> m_bvhData; - b3AlignedObjectArray<class b3TriangleIndexVertexArray*> m_meshInterfaces; - - b3AlignedObjectArray<b3QuantizedBvhNode> m_treeNodesCPU; - b3AlignedObjectArray<b3BvhSubtreeInfo> m_subTreesCPU; - - b3AlignedObjectArray<b3BvhInfo> m_bvhInfoCPU; - b3OpenCLArray<b3BvhInfo>* m_bvhInfoGPU; - - b3OpenCLArray<b3QuantizedBvhNode>* m_treeNodesGPU; - b3OpenCLArray<b3BvhSubtreeInfo>* m_subTreesGPU; - - b3Config m_config; -}; - -#endif //B3_GPU_NARROWPHASE_INTERNAL_DATA_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp deleted file mode 100644 index bd9d6bb04b..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.cpp +++ /dev/null @@ -1,1068 +0,0 @@ - -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -bool useGpuInitSolverBodies = true; -bool useGpuInfo1 = true; -bool useGpuInfo2 = true; -bool useGpuSolveJointConstraintRows = true; -bool useGpuWriteBackVelocities = true; -bool gpuBreakConstraints = true; - -#include "b3GpuPgsConstraintSolver.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include <new> -#include "Bullet3Common/b3AlignedObjectArray.h" -#include <string.h> //for memset -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" - -#include "Bullet3OpenCL/RigidBody/kernels/jointSolver.h" //solveConstraintRowsCL -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" - -#define B3_JOINT_SOLVER_PATH "src/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl" - -struct b3GpuPgsJacobiSolverInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3PrefixScanCL* m_prefixScan; - - cl_kernel m_solveJointConstraintRowsKernels; - cl_kernel m_initSolverBodiesKernel; - cl_kernel m_getInfo1Kernel; - cl_kernel m_initBatchConstraintsKernel; - cl_kernel m_getInfo2Kernel; - cl_kernel m_writeBackVelocitiesKernel; - cl_kernel m_breakViolatedConstraintsKernel; - - b3OpenCLArray<unsigned int>* m_gpuConstraintRowOffsets; - - b3OpenCLArray<b3GpuSolverBody>* m_gpuSolverBodies; - b3OpenCLArray<b3BatchConstraint>* m_gpuBatchConstraints; - b3OpenCLArray<b3GpuSolverConstraint>* m_gpuConstraintRows; - b3OpenCLArray<unsigned int>* m_gpuConstraintInfo1; - - // b3AlignedObjectArray<b3GpuSolverBody> m_cpuSolverBodies; - b3AlignedObjectArray<b3BatchConstraint> m_cpuBatchConstraints; - b3AlignedObjectArray<b3GpuSolverConstraint> m_cpuConstraintRows; - b3AlignedObjectArray<unsigned int> m_cpuConstraintInfo1; - b3AlignedObjectArray<unsigned int> m_cpuConstraintRowOffsets; - - b3AlignedObjectArray<b3RigidBodyData> m_cpuBodies; - b3AlignedObjectArray<b3InertiaData> m_cpuInertias; - - b3AlignedObjectArray<b3GpuGenericConstraint> m_cpuConstraints; - - b3AlignedObjectArray<int> m_batchSizes; -}; - -/* -static b3Transform getWorldTransform(b3RigidBodyData* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -} - -static const b3Matrix3x3& getInvInertiaTensorWorld(b3InertiaData* inertia) -{ - return inertia->m_invInertiaWorld; -} - -*/ - -static const b3Vector3& getLinearVelocity(b3RigidBodyData* rb) -{ - return rb->m_linVel; -} - -static const b3Vector3& getAngularVelocity(b3RigidBodyData* rb) -{ - return rb->m_angVel; -} - -b3Vector3 getVelocityInLocalPoint(b3RigidBodyData* rb, const b3Vector3& rel_pos) -{ - //we also calculate lin/ang velocity for kinematic objects - return getLinearVelocity(rb) + getAngularVelocity(rb).cross(rel_pos); -} - -b3GpuPgsConstraintSolver::b3GpuPgsConstraintSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, bool usePgs) -{ - m_usePgs = usePgs; - m_gpuData = new b3GpuPgsJacobiSolverInternalData(); - m_gpuData->m_context = ctx; - m_gpuData->m_device = device; - m_gpuData->m_queue = queue; - - m_gpuData->m_prefixScan = new b3PrefixScanCL(ctx, device, queue); - - m_gpuData->m_gpuConstraintRowOffsets = new b3OpenCLArray<unsigned int>(m_gpuData->m_context, m_gpuData->m_queue); - - m_gpuData->m_gpuSolverBodies = new b3OpenCLArray<b3GpuSolverBody>(m_gpuData->m_context, m_gpuData->m_queue); - m_gpuData->m_gpuBatchConstraints = new b3OpenCLArray<b3BatchConstraint>(m_gpuData->m_context, m_gpuData->m_queue); - m_gpuData->m_gpuConstraintRows = new b3OpenCLArray<b3GpuSolverConstraint>(m_gpuData->m_context, m_gpuData->m_queue); - m_gpuData->m_gpuConstraintInfo1 = new b3OpenCLArray<unsigned int>(m_gpuData->m_context, m_gpuData->m_queue); - cl_int errNum = 0; - - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, &errNum, "", B3_JOINT_SOLVER_PATH); - //cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_gpuData->m_context,m_gpuData->m_device,0,&errNum,"",B3_JOINT_SOLVER_PATH,true); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_solveJointConstraintRowsKernels = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "solveJointConstraintRows", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_initSolverBodiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "initSolverBodies", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_getInfo1Kernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "getInfo1Kernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_initBatchConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "initBatchConstraintsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_getInfo2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "getInfo2Kernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_writeBackVelocitiesKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "writeBackVelocitiesKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - m_gpuData->m_breakViolatedConstraintsKernel = b3OpenCLUtils::compileCLKernelFromString(m_gpuData->m_context, m_gpuData->m_device, solveConstraintRowsCL, "breakViolatedConstraintsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - - clReleaseProgram(prog); - } -} - -b3GpuPgsConstraintSolver::~b3GpuPgsConstraintSolver() -{ - clReleaseKernel(m_gpuData->m_solveJointConstraintRowsKernels); - clReleaseKernel(m_gpuData->m_initSolverBodiesKernel); - clReleaseKernel(m_gpuData->m_getInfo1Kernel); - clReleaseKernel(m_gpuData->m_initBatchConstraintsKernel); - clReleaseKernel(m_gpuData->m_getInfo2Kernel); - clReleaseKernel(m_gpuData->m_writeBackVelocitiesKernel); - clReleaseKernel(m_gpuData->m_breakViolatedConstraintsKernel); - - delete m_gpuData->m_prefixScan; - delete m_gpuData->m_gpuConstraintRowOffsets; - delete m_gpuData->m_gpuSolverBodies; - delete m_gpuData->m_gpuBatchConstraints; - delete m_gpuData->m_gpuConstraintRows; - delete m_gpuData->m_gpuConstraintInfo1; - - delete m_gpuData; -} - -struct b3BatchConstraint -{ - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - int m_originalConstraintIndex; - int m_batchId; -}; - -static b3AlignedObjectArray<b3BatchConstraint> batchConstraints; - -void b3GpuPgsConstraintSolver::recomputeBatches() -{ - m_gpuData->m_batchSizes.clear(); -} - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("GPU solveGroupCacheFriendlySetup"); - batchConstraints.resize(numConstraints); - m_gpuData->m_gpuBatchConstraints->resize(numConstraints); - m_staticIdx = -1; - m_maxOverrideNumSolverIterations = 0; - - /* m_gpuData->m_gpuBodies->resize(numBodies); - m_gpuData->m_gpuBodies->copyFromHostPointer(bodies,numBodies); - - b3OpenCLArray<b3InertiaData> gpuInertias(m_gpuData->m_context,m_gpuData->m_queue); - gpuInertias.resize(numBodies); - gpuInertias.copyFromHostPointer(inertias,numBodies); - */ - - m_gpuData->m_gpuSolverBodies->resize(numBodies); - - m_tmpSolverBodyPool.resize(numBodies); - { - if (useGpuInitSolverBodies) - { - B3_PROFILE("m_initSolverBodiesKernel"); - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initSolverBodiesKernel, "m_initSolverBodiesKernel"); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_gpuData->m_queue); - - // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - } - else - { - gpuBodies->copyToHost(m_gpuData->m_cpuBodies); - for (int i = 0; i < numBodies; i++) - { - b3RigidBodyData& body = m_gpuData->m_cpuBodies[i]; - b3GpuSolverBody& solverBody = m_tmpSolverBodyPool[i]; - initSolverBody(i, &solverBody, &body); - solverBody.m_originalBodyIndex = i; - } - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - } - } - - // int totalBodies = 0; - int totalNumRows = 0; - //b3RigidBody* rb0=0,*rb1=0; - //if (1) - { - { - // int i; - - m_tmpConstraintSizesPool.resizeNoInitialize(numConstraints); - - // b3OpenCLArray<b3GpuGenericConstraint> gpuConstraints(m_gpuData->m_context,m_gpuData->m_queue); - - if (useGpuInfo1) - { - B3_PROFILE("info1 and init batchConstraint"); - - m_gpuData->m_gpuConstraintInfo1->resize(numConstraints); - - if (1) - { - B3_PROFILE("getInfo1Kernel"); - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo1Kernel, "m_getInfo1Kernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - } - - if (m_gpuData->m_batchSizes.size() == 0) - { - B3_PROFILE("initBatchConstraintsKernel"); - - m_gpuData->m_gpuConstraintRowOffsets->resize(numConstraints); - unsigned int total = 0; - m_gpuData->m_prefixScan->execute(*m_gpuData->m_gpuConstraintInfo1, *m_gpuData->m_gpuConstraintRowOffsets, numConstraints, &total); - unsigned int lastElem = m_gpuData->m_gpuConstraintInfo1->at(numConstraints - 1); - totalNumRows = total + lastElem; - - { - B3_PROFILE("init batch constraints"); - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_initBatchConstraintsKernel, "m_initBatchConstraintsKernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - } - //assume the batching happens on CPU, so copy the data - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - } - } - else - { - totalNumRows = 0; - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - //calculate the total number of contraint rows - for (int i = 0; i < numConstraints; i++) - { - unsigned int& info1 = m_tmpConstraintSizesPool[i]; - // unsigned int info1; - if (m_gpuData->m_cpuConstraints[i].isEnabled()) - { - m_gpuData->m_cpuConstraints[i].getInfo1(&info1, &m_gpuData->m_cpuBodies[0]); - } - else - { - info1 = 0; - } - - totalNumRows += info1; - } - - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - } - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(totalNumRows); - m_gpuData->m_gpuConstraintRows->resize(totalNumRows); - - // b3GpuConstraintArray verify; - - if (useGpuInfo2) - { - { - B3_PROFILE("getInfo2Kernel"); - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_getInfo2Kernel, "m_getInfo2Kernel"); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setBuffer(gpuInertias->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setConst(infoGlobal.m_timeStep); - launcher.setConst(infoGlobal.m_erp); - launcher.setConst(infoGlobal.m_globalCfm); - launcher.setConst(infoGlobal.m_damping); - launcher.setConst(infoGlobal.m_numIterations); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - clFinish(m_gpuData->m_queue); - - if (m_gpuData->m_batchSizes.size() == 0) - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - //m_gpuData->m_gpuConstraintRows->copyToHost(verify); - //m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); - } - } - else - { - gpuInertias->copyToHost(m_gpuData->m_cpuInertias); - - ///setup the b3SolverConstraints - - for (int i = 0; i < numConstraints; i++) - { - const int& info1 = m_tmpConstraintSizesPool[i]; - - if (info1) - { - int constraintIndex = batchConstraints[i].m_originalConstraintIndex; - int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[constraintIndex]; - - b3GpuSolverConstraint* currentConstraintRow = &m_tmpSolverNonContactConstraintPool[constraintRowOffset]; - b3GpuGenericConstraint& constraint = m_gpuData->m_cpuConstraints[i]; - - b3RigidBodyData& rbA = m_gpuData->m_cpuBodies[constraint.getRigidBodyA()]; - //b3RigidBody& rbA = constraint.getRigidBodyA(); - // b3RigidBody& rbB = constraint.getRigidBodyB(); - b3RigidBodyData& rbB = m_gpuData->m_cpuBodies[constraint.getRigidBodyB()]; - - int solverBodyIdA = constraint.getRigidBodyA(); //getOrInitSolverBody(constraint.getRigidBodyA(),bodies,inertias); - int solverBodyIdB = constraint.getRigidBodyB(); //getOrInitSolverBody(constraint.getRigidBodyB(),bodies,inertias); - - b3GpuSolverBody* bodyAPtr = &m_tmpSolverBodyPool[solverBodyIdA]; - b3GpuSolverBody* bodyBPtr = &m_tmpSolverBodyPool[solverBodyIdB]; - - if (rbA.m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } - else - { - if (!solverBodyIdA) - m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } - - if (rbB.m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } - else - { - if (!solverBodyIdB) - m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } - - int overrideNumSolverIterations = 0; //constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; - if (overrideNumSolverIterations > m_maxOverrideNumSolverIterations) - m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - int j; - for (j = 0; j < info1; j++) - { - memset(¤tConstraintRow[j], 0, sizeof(b3GpuSolverConstraint)); - currentConstraintRow[j].m_angularComponentA.setValue(0, 0, 0); - currentConstraintRow[j].m_angularComponentB.setValue(0, 0, 0); - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_cfm = 0.f; - currentConstraintRow[j].m_contactNormal.setValue(0, 0, 0); - currentConstraintRow[j].m_friction = 0.f; - currentConstraintRow[j].m_frictionIndex = 0; - currentConstraintRow[j].m_jacDiagABInv = 0.f; - currentConstraintRow[j].m_lowerLimit = 0.f; - currentConstraintRow[j].m_upperLimit = 0.f; - - currentConstraintRow[j].m_originalContactPoint = 0; - currentConstraintRow[j].m_overrideNumSolverIterations = 0; - currentConstraintRow[j].m_relpos1CrossNormal.setValue(0, 0, 0); - currentConstraintRow[j].m_relpos2CrossNormal.setValue(0, 0, 0); - currentConstraintRow[j].m_rhs = 0.f; - currentConstraintRow[j].m_rhsPenetration = 0.f; - currentConstraintRow[j].m_solverBodyIdA = 0; - currentConstraintRow[j].m_solverBodyIdB = 0; - - currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; - currentConstraintRow[j].m_upperLimit = B3_INFINITY; - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; - currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; - currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; - } - - bodyAPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - bodyAPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetDeltaLinearVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetDeltaAngularVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - bodyBPtr->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - - b3GpuConstraintInfo2 info2; - info2.fps = 1.f / infoGlobal.m_timeStep; - info2.erp = infoGlobal.m_erp; - info2.m_J1linearAxis = currentConstraintRow->m_contactNormal; - info2.m_J1angularAxis = currentConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxis = 0; - info2.m_J2angularAxis = currentConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3GpuSolverConstraint) / sizeof(b3Scalar); //check this - ///the size of b3GpuSolverConstraint needs be a multiple of b3Scalar - b3Assert(info2.rowskip * sizeof(b3Scalar) == sizeof(b3GpuSolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = infoGlobal.m_globalCfm; - info2.m_damping = infoGlobal.m_damping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = infoGlobal.m_numIterations; - m_gpuData->m_cpuConstraints[i].getInfo2(&info2, &m_gpuData->m_cpuBodies[0]); - - ///finalize the constraint setup - for (j = 0; j < info1; j++) - { - b3GpuSolverConstraint& solverConstraint = currentConstraintRow[j]; - - if (solverConstraint.m_upperLimit >= m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) - { - solverConstraint.m_upperLimit = m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); - } - - if (solverConstraint.m_lowerLimit <= -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold()) - { - solverConstraint.m_lowerLimit = -m_gpuData->m_cpuConstraints[i].getBreakingImpulseThreshold(); - } - - // solverConstraint.m_originalContactPoint = constraint; - - b3Matrix3x3& invInertiaWorldA = m_gpuData->m_cpuInertias[constraint.getRigidBodyA()].m_invInertiaWorld; - { - //b3Vector3 angularFactorA(1,1,1); - const b3Vector3& ftorqueAxis1 = solverConstraint.m_relpos1CrossNormal; - solverConstraint.m_angularComponentA = invInertiaWorldA * ftorqueAxis1; //*angularFactorA; - } - - b3Matrix3x3& invInertiaWorldB = m_gpuData->m_cpuInertias[constraint.getRigidBodyB()].m_invInertiaWorld; - { - const b3Vector3& ftorqueAxis2 = solverConstraint.m_relpos2CrossNormal; - solverConstraint.m_angularComponentB = invInertiaWorldB * ftorqueAxis2; //*constraint.getRigidBodyB().getAngularFactor(); - } - - { - //it is ok to use solverConstraint.m_contactNormal instead of -solverConstraint.m_contactNormal - //because it gets multiplied iMJlB - b3Vector3 iMJlA = solverConstraint.m_contactNormal * rbA.m_invMass; - b3Vector3 iMJaA = invInertiaWorldA * solverConstraint.m_relpos1CrossNormal; - b3Vector3 iMJlB = solverConstraint.m_contactNormal * rbB.m_invMass; //sign of normal? - b3Vector3 iMJaB = invInertiaWorldB * solverConstraint.m_relpos2CrossNormal; - - b3Scalar sum = iMJlA.dot(solverConstraint.m_contactNormal); - sum += iMJaA.dot(solverConstraint.m_relpos1CrossNormal); - sum += iMJlB.dot(solverConstraint.m_contactNormal); - sum += iMJaB.dot(solverConstraint.m_relpos2CrossNormal); - b3Scalar fsum = b3Fabs(sum); - b3Assert(fsum > B3_EPSILON); - solverConstraint.m_jacDiagABInv = fsum > B3_EPSILON ? b3Scalar(1.) / sum : 0.f; - } - - ///fix rhs - ///todo: add force/torque accelerators - { - b3Scalar rel_vel; - b3Scalar vel1Dotn = solverConstraint.m_contactNormal.dot(rbA.m_linVel) + solverConstraint.m_relpos1CrossNormal.dot(rbA.m_angVel); - b3Scalar vel2Dotn = -solverConstraint.m_contactNormal.dot(rbB.m_linVel) + solverConstraint.m_relpos2CrossNormal.dot(rbB.m_angVel); - - rel_vel = vel1Dotn + vel2Dotn; - - b3Scalar restitution = 0.f; - b3Scalar positionalError = solverConstraint.m_rhs; //already filled in by getConstraintInfo2 - b3Scalar velocityError = restitution - rel_vel * info2.m_damping; - b3Scalar penetrationImpulse = positionalError * solverConstraint.m_jacDiagABInv; - b3Scalar velocityImpulse = velocityError * solverConstraint.m_jacDiagABInv; - solverConstraint.m_rhs = penetrationImpulse + velocityImpulse; - solverConstraint.m_appliedImpulse = 0.f; - } - } - } - } - - m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); - m_gpuData->m_gpuConstraintInfo1->copyFromHost(m_tmpConstraintSizesPool); - - if (m_gpuData->m_batchSizes.size() == 0) - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - else - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - - } //end useGpuInfo2 - } - -#ifdef B3_SUPPORT_CONTACT_CONSTRAINTS - { - int i; - - for (i = 0; i < numManifolds; i++) - { - b3Contact4& manifold = manifoldPtr[i]; - convertContact(bodies, inertias, &manifold, infoGlobal); - } - } -#endif //B3_SUPPORT_CONTACT_CONSTRAINTS - } - - // b3ContactSolverInfo info = infoGlobal; - - // int numNonContactPool = m_tmpSolverNonContactConstraintPool.size(); - // int numConstraintPool = m_tmpSolverContactConstraintPool.size(); - // int numFrictionPool = m_tmpSolverContactFrictionConstraintPool.size(); - - return 0.f; -} - -///a straight copy from GPU/OpenCL kernel, for debugging -__inline void internalApplyImpulse(b3GpuSolverBody* body, const b3Vector3& linearComponent, const b3Vector3& angularComponent, float impulseMagnitude) -{ - body->m_deltaLinearVelocity += linearComponent * impulseMagnitude * body->m_linearFactor; - body->m_deltaAngularVelocity += angularComponent * (impulseMagnitude * body->m_angularFactor); -} - -void resolveSingleConstraintRowGeneric2(b3GpuSolverBody* body1, b3GpuSolverBody* body2, b3GpuSolverConstraint* c) -{ - float deltaImpulse = c->m_rhs - b3Scalar(c->m_appliedImpulse) * c->m_cfm; - float deltaVel1Dotn = b3Dot(c->m_contactNormal, body1->m_deltaLinearVelocity) + b3Dot(c->m_relpos1CrossNormal, body1->m_deltaAngularVelocity); - float deltaVel2Dotn = -b3Dot(c->m_contactNormal, body2->m_deltaLinearVelocity) + b3Dot(c->m_relpos2CrossNormal, body2->m_deltaAngularVelocity); - - deltaImpulse -= deltaVel1Dotn * c->m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn * c->m_jacDiagABInv; - - float sum = b3Scalar(c->m_appliedImpulse) + deltaImpulse; - if (sum < c->m_lowerLimit) - { - deltaImpulse = c->m_lowerLimit - b3Scalar(c->m_appliedImpulse); - c->m_appliedImpulse = c->m_lowerLimit; - } - else if (sum > c->m_upperLimit) - { - deltaImpulse = c->m_upperLimit - b3Scalar(c->m_appliedImpulse); - c->m_appliedImpulse = c->m_upperLimit; - } - else - { - c->m_appliedImpulse = sum; - } - - internalApplyImpulse(body1, c->m_contactNormal * body1->m_invMass, c->m_angularComponentA, deltaImpulse); - internalApplyImpulse(body2, -c->m_contactNormal * body2->m_invMass, c->m_angularComponentB, deltaImpulse); -} - -void b3GpuPgsConstraintSolver::initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyData* rb) -{ - solverBody->m_deltaLinearVelocity.setValue(0.f, 0.f, 0.f); - solverBody->m_deltaAngularVelocity.setValue(0.f, 0.f, 0.f); - solverBody->internalGetPushVelocity().setValue(0.f, 0.f, 0.f); - solverBody->internalGetTurnVelocity().setValue(0.f, 0.f, 0.f); - - b3Assert(rb); - // solverBody->m_worldTransform = getWorldTransform(rb); - solverBody->internalSetInvMass(b3MakeVector3(rb->m_invMass, rb->m_invMass, rb->m_invMass)); - solverBody->m_originalBodyIndex = bodyIndex; - solverBody->m_angularFactor = b3MakeVector3(1, 1, 1); - solverBody->m_linearFactor = b3MakeVector3(1, 1, 1); - solverBody->m_linearVelocity = getLinearVelocity(rb); - solverBody->m_angularVelocity = getAngularVelocity(rb); -} - -void b3GpuPgsConstraintSolver::averageVelocities() -{ -} - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - //only create the batches once. - //@todo: incrementally update batches when constraints are added/activated and/or removed/deactivated - B3_PROFILE("GpuSolveGroupCacheFriendlyIterations"); - - bool createBatches = m_gpuData->m_batchSizes.size() == 0; - { - if (createBatches) - { - m_gpuData->m_batchSizes.resize(0); - - { - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - - B3_PROFILE("batch joints"); - b3Assert(batchConstraints.size() == numConstraints); - int simdWidth = numConstraints + 1; - int numBodies = m_tmpSolverBodyPool.size(); - sortConstraintByBatch3(&batchConstraints[0], numConstraints, simdWidth, m_staticIdx, numBodies); - - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - } - } - else - { - /*b3AlignedObjectArray<b3BatchConstraint> cpuCheckBatches; - m_gpuData->m_gpuBatchConstraints->copyToHost(cpuCheckBatches); - b3Assert(cpuCheckBatches.size()==batchConstraints.size()); - printf(".\n"); - */ - //>copyFromHost(batchConstraints); - } - int maxIterations = infoGlobal.m_numIterations; - - bool useBatching = true; - - if (useBatching) - { - if (!useGpuSolveJointConstraintRows) - { - B3_PROFILE("copy to host"); - m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - m_gpuData->m_gpuBatchConstraints->copyToHost(batchConstraints); - m_gpuData->m_gpuConstraintRows->copyToHost(m_tmpSolverNonContactConstraintPool); - m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); - m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); - gpuConstraints1->copyToHost(m_gpuData->m_cpuConstraints); - } - - for (int iteration = 0; iteration < maxIterations; iteration++) - { - int batchOffset = 0; - int constraintOffset = 0; - int numBatches = m_gpuData->m_batchSizes.size(); - for (int bb = 0; bb < numBatches; bb++) - { - int numConstraintsInBatch = m_gpuData->m_batchSizes[bb]; - - if (useGpuSolveJointConstraintRows) - { - B3_PROFILE("solveJointConstraintRowsKernels"); - - /* - __kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, - __global b3BatchConstraint* batchConstraints, - __global b3SolverConstraint* rows, - __global unsigned int* numConstraintRowsInfo1, - __global unsigned int* rowOffsets, - __global b3GpuGenericConstraint* constraints, - int batchOffset, - int numConstraintsInBatch*/ - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_solveJointConstraintRowsKernels, "m_solveJointConstraintRowsKernels"); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuBatchConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(gpuConstraints1->getBufferCL()); //to detect disabled constraints - launcher.setConst(batchOffset); - launcher.setConst(numConstraintsInBatch); - - launcher.launch1D(numConstraintsInBatch); - } - else //useGpu - { - for (int b = 0; b < numConstraintsInBatch; b++) - { - const b3BatchConstraint& c = batchConstraints[batchOffset + b]; - /*printf("-----------\n"); - printf("bb=%d\n",bb); - printf("c.batchId = %d\n", c.m_batchId); - */ - b3Assert(c.m_batchId == bb); - b3GpuGenericConstraint* constraint = &m_gpuData->m_cpuConstraints[c.m_originalConstraintIndex]; - if (constraint->m_flags & B3_CONSTRAINT_FLAG_ENABLED) - { - int numConstraintRows = m_gpuData->m_cpuConstraintInfo1[c.m_originalConstraintIndex]; - int constraintOffset = m_gpuData->m_cpuConstraintRowOffsets[c.m_originalConstraintIndex]; - - for (int jj = 0; jj < numConstraintRows; jj++) - { - // - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[constraintOffset + jj]; - //resolveSingleConstraintRowGenericSIMD(m_tmpSolverBodyPool[constraint.m_solverBodyIdA],m_tmpSolverBodyPool[constraint.m_solverBodyIdB],constraint); - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); - } - } - } - } //useGpu - batchOffset += numConstraintsInBatch; - constraintOffset += numConstraintsInBatch; - } - } //for (int iteration... - - if (!useGpuSolveJointConstraintRows) - { - { - B3_PROFILE("copy from host"); - m_gpuData->m_gpuSolverBodies->copyFromHost(m_tmpSolverBodyPool); - m_gpuData->m_gpuBatchConstraints->copyFromHost(batchConstraints); - m_gpuData->m_gpuConstraintRows->copyFromHost(m_tmpSolverNonContactConstraintPool); - } - - //B3_PROFILE("copy to host"); - //m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - } - //int sz = sizeof(b3GpuSolverBody); - //printf("cpu sizeof(b3GpuSolverBody)=%d\n",sz); - } - else - { - for (int iteration = 0; iteration < maxIterations; iteration++) - { - int numJoints = m_tmpSolverNonContactConstraintPool.size(); - for (int j = 0; j < numJoints; j++) - { - b3GpuSolverConstraint& constraint = m_tmpSolverNonContactConstraintPool[j]; - resolveSingleConstraintRowGeneric2(&m_tmpSolverBodyPool[constraint.m_solverBodyIdA], &m_tmpSolverBodyPool[constraint.m_solverBodyIdB], &constraint); - } - - if (!m_usePgs) - { - averageVelocities(); - } - } - } - } - clFinish(m_gpuData->m_queue); - return 0.f; -} - -static b3AlignedObjectArray<int> bodyUsed; -static b3AlignedObjectArray<int> curUsed; - -inline int b3GpuPgsConstraintSolver::sortConstraintByBatch3(b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies) -{ - //int sz = sizeof(b3BatchConstraint); - - B3_PROFILE("sortConstraintByBatch3"); - - static int maxSwaps = 0; - int numSwaps = 0; - - curUsed.resize(2 * simdWidth); - - static int maxNumConstraints = 0; - if (maxNumConstraints < numConstraints) - { - maxNumConstraints = numConstraints; - //printf("maxNumConstraints = %d\n",maxNumConstraints ); - } - - int numUsedArray = numBodies / 32 + 1; - bodyUsed.resize(numUsedArray); - - for (int q = 0; q < numUsedArray; q++) - bodyUsed[q] = 0; - - int curBodyUsed = 0; - - int numIter = 0; - -#if defined(_DEBUG) - for (int i = 0; i < numConstraints; i++) - cs[i].m_batchId = -1; -#endif - - int numValidConstraints = 0; - // int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - - while (numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - // clear flag - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - - curBodyUsed = 0; - - for (int i = numValidConstraints; i < numConstraints; i++) - { - int idx = i; - b3Assert(idx < numConstraints); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31)); - } - if (!aUnavailable) - if (!bIsStatic) - { - bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31)); - } - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - { - bodyUsed[bodyA / 32] |= (1 << (bodyA & 31)); - curUsed[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed[bodyB / 32] |= (1 << (bodyB & 31)); - curUsed[curBodyUsed++] = bodyB; - } - - cs[idx].m_batchId = batchIdx; - - if (i != numValidConstraints) - { - b3Swap(cs[i], cs[numValidConstraints]); - numSwaps++; - } - - numValidConstraints++; - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - nCurrentBatch = 0; - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - curBodyUsed = 0; - } - } - } - } - m_gpuData->m_batchSizes.push_back(nCurrentBatch); - batchIdx++; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < numConstraints; i++) - { - b3Assert(cs[i].m_batchId != -1); - } -#endif - - if (maxSwaps < numSwaps) - { - maxSwaps = numSwaps; - //printf("maxSwaps = %d\n", maxSwaps); - } - - return batchIdx; -} - -/// b3PgsJacobiSolver Sequentially applies impulses -b3Scalar b3GpuPgsConstraintSolver::solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, - int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveJoints"); - //you need to provide at least some bodies - - solveGroupCacheFriendlySetup(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); - - solveGroupCacheFriendlyIterations(gpuConstraints, numConstraints, infoGlobal); - - solveGroupCacheFriendlyFinish(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); - - return 0.f; -} - -void b3GpuPgsConstraintSolver::solveJoints(int numBodies, b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, - int numConstraints, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints) -{ - b3ContactSolverInfo infoGlobal; - infoGlobal.m_splitImpulse = false; - infoGlobal.m_timeStep = 1.f / 60.f; - infoGlobal.m_numIterations = 4; //4; - // infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS|B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION; - //infoGlobal.m_solverMode|=B3_SOLVER_USE_2_FRICTION_DIRECTIONS|B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS; - infoGlobal.m_solverMode |= B3_SOLVER_USE_2_FRICTION_DIRECTIONS; - - //if (infoGlobal.m_solverMode & B3_SOLVER_INTERLEAVE_CONTACT_AND_FRICTION_CONSTRAINTS) - //if ((infoGlobal.m_solverMode & B3_SOLVER_USE_2_FRICTION_DIRECTIONS) && (infoGlobal.m_solverMode & B3_SOLVER_DISABLE_VELOCITY_DEPENDENT_FRICTION_DIRECTION)) - - solveGroup(gpuBodies, gpuInertias, numBodies, gpuConstraints, numConstraints, infoGlobal); -} - -//b3AlignedObjectArray<b3RigidBodyData> testBodies; - -b3Scalar b3GpuPgsConstraintSolver::solveGroupCacheFriendlyFinish(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal) -{ - B3_PROFILE("solveGroupCacheFriendlyFinish"); - // int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - // int i,j; - - { - if (gpuBreakConstraints) - { - B3_PROFILE("breakViolatedConstraintsKernel"); - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_breakViolatedConstraintsKernel, "m_breakViolatedConstraintsKernel"); - launcher.setBuffer(gpuConstraints->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintInfo1->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRowOffsets->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuConstraintRows->getBufferCL()); - launcher.setConst(numConstraints); - launcher.launch1D(numConstraints); - } - else - { - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - m_gpuData->m_gpuBatchConstraints->copyToHost(m_gpuData->m_cpuBatchConstraints); - m_gpuData->m_gpuConstraintRows->copyToHost(m_gpuData->m_cpuConstraintRows); - gpuConstraints->copyToHost(m_gpuData->m_cpuConstraints); - m_gpuData->m_gpuConstraintInfo1->copyToHost(m_gpuData->m_cpuConstraintInfo1); - m_gpuData->m_gpuConstraintRowOffsets->copyToHost(m_gpuData->m_cpuConstraintRowOffsets); - - for (int cid = 0; cid < numConstraints; cid++) - { - int originalConstraintIndex = batchConstraints[cid].m_originalConstraintIndex; - int constraintRowOffset = m_gpuData->m_cpuConstraintRowOffsets[originalConstraintIndex]; - int numRows = m_gpuData->m_cpuConstraintInfo1[originalConstraintIndex]; - if (numRows) - { - // printf("cid=%d, breakingThreshold =%f\n",cid,breakingThreshold); - for (int i = 0; i < numRows; i++) - { - int rowIndex = constraintRowOffset + i; - int orgConstraintIndex = m_gpuData->m_cpuConstraintRows[rowIndex].m_originalConstraintIndex; - float breakingThreshold = m_gpuData->m_cpuConstraints[orgConstraintIndex].m_breakingImpulseThreshold; - // printf("rows[%d].m_appliedImpulse=%f\n",rowIndex,rows[rowIndex].m_appliedImpulse); - if (b3Fabs(m_gpuData->m_cpuConstraintRows[rowIndex].m_appliedImpulse) >= breakingThreshold) - { - m_gpuData->m_cpuConstraints[orgConstraintIndex].m_flags = 0; //&= ~B3_CONSTRAINT_FLAG_ENABLED; - } - } - } - } - - gpuConstraints->copyFromHost(m_gpuData->m_cpuConstraints); - } - } - - { - if (useGpuWriteBackVelocities) - { - B3_PROFILE("GPU write back velocities and transforms"); - - b3LauncherCL launcher(m_gpuData->m_queue, m_gpuData->m_writeBackVelocitiesKernel, "m_writeBackVelocitiesKernel"); - launcher.setBuffer(gpuBodies->getBufferCL()); - launcher.setBuffer(m_gpuData->m_gpuSolverBodies->getBufferCL()); - launcher.setConst(numBodies); - launcher.launch1D(numBodies); - clFinish(m_gpuData->m_queue); - // m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - // m_gpuData->m_gpuBodies->copyToHostPointer(bodies,numBodies); - //m_gpuData->m_gpuBodies->copyToHost(testBodies); - } - else - { - B3_PROFILE("CPU write back velocities and transforms"); - - m_gpuData->m_gpuSolverBodies->copyToHost(m_tmpSolverBodyPool); - gpuBodies->copyToHost(m_gpuData->m_cpuBodies); - for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) - { - int bodyIndex = m_tmpSolverBodyPool[i].m_originalBodyIndex; - //printf("bodyIndex=%d\n",bodyIndex); - b3Assert(i == bodyIndex); - - b3RigidBodyData* body = &m_gpuData->m_cpuBodies[bodyIndex]; - if (body->m_invMass) - { - if (infoGlobal.m_splitImpulse) - m_tmpSolverBodyPool[i].writebackVelocityAndTransform(infoGlobal.m_timeStep, infoGlobal.m_splitImpulseTurnErp); - else - m_tmpSolverBodyPool[i].writebackVelocity(); - - if (m_usePgs) - { - body->m_linVel = m_tmpSolverBodyPool[i].m_linearVelocity; - body->m_angVel = m_tmpSolverBodyPool[i].m_angularVelocity; - } - else - { - b3Assert(0); - } - /* - if (infoGlobal.m_splitImpulse) - { - body->m_pos = m_tmpSolverBodyPool[i].m_worldTransform.getOrigin(); - b3Quaternion orn; - orn = m_tmpSolverBodyPool[i].m_worldTransform.getRotation(); - body->m_quat = orn; - } - */ - } - } //for - - gpuBodies->copyFromHost(m_gpuData->m_cpuBodies); - } - } - - clFinish(m_gpuData->m_queue); - - m_tmpSolverContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverNonContactConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactFrictionConstraintPool.resizeNoInitialize(0); - m_tmpSolverContactRollingFrictionConstraintPool.resizeNoInitialize(0); - - m_tmpSolverBodyPool.resizeNoInitialize(0); - return 0.f; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h deleted file mode 100644 index 00bc544f02..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h +++ /dev/null @@ -1,76 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_PGS_CONSTRAINT_SOLVER_H -#define B3_GPU_PGS_CONSTRAINT_SOLVER_H - -struct b3Contact4; -struct b3ContactPoint; - -class b3Dispatcher; - -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include "Bullet3Dynamics/ConstraintSolver/b3ContactSolverInfo.h" -#include "b3GpuSolverBody.h" -#include "b3GpuSolverConstraint.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -struct b3RigidBodyData; -struct b3InertiaData; - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "b3GpuGenericConstraint.h" - -class b3GpuPgsConstraintSolver -{ -protected: - int m_staticIdx; - struct b3GpuPgsJacobiSolverInternalData* m_gpuData; - -protected: - b3AlignedObjectArray<b3GpuSolverBody> m_tmpSolverBodyPool; - b3GpuConstraintArray m_tmpSolverContactConstraintPool; - b3GpuConstraintArray m_tmpSolverNonContactConstraintPool; - b3GpuConstraintArray m_tmpSolverContactFrictionConstraintPool; - b3GpuConstraintArray m_tmpSolverContactRollingFrictionConstraintPool; - - b3AlignedObjectArray<unsigned int> m_tmpConstraintSizesPool; - - bool m_usePgs; - void averageVelocities(); - - int m_maxOverrideNumSolverIterations; - - int m_numSplitImpulseRecoveries; - - // int getOrInitSolverBody(int bodyIndex, b3RigidBodyData* bodies,b3InertiaData* inertias); - void initSolverBody(int bodyIndex, b3GpuSolverBody* solverBody, b3RigidBodyData* rb); - -public: - b3GpuPgsConstraintSolver(cl_context ctx, cl_device_id device, cl_command_queue queue, bool usePgs); - virtual ~b3GpuPgsConstraintSolver(); - - virtual b3Scalar solveGroupCacheFriendlyIterations(b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints1, int numConstraints, const b3ContactSolverInfo& infoGlobal); - virtual b3Scalar solveGroupCacheFriendlySetup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - b3Scalar solveGroupCacheFriendlyFinish(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - - b3Scalar solveGroup(b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, int numBodies, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints, int numConstraints, const b3ContactSolverInfo& infoGlobal); - void solveJoints(int numBodies, b3OpenCLArray<b3RigidBodyData>* gpuBodies, b3OpenCLArray<b3InertiaData>* gpuInertias, - int numConstraints, b3OpenCLArray<b3GpuGenericConstraint>* gpuConstraints); - - int sortConstraintByBatch3(struct b3BatchConstraint* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies); - void recomputeBatches(); -}; - -#endif //B3_GPU_PGS_CONSTRAINT_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp deleted file mode 100644 index e3d235a4fd..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.cpp +++ /dev/null @@ -1,1529 +0,0 @@ - -bool gUseLargeBatches = false; -bool gCpuBatchContacts = false; -bool gCpuSolveConstraint = false; -bool gCpuRadixSort = false; -bool gCpuSetSortData = false; -bool gCpuSortContactsDeterminism = false; -bool gUseCpuCopyConstraints = false; -bool gUseScanHost = false; -bool gReorderContactsOnCpu = false; - -bool optionalSortContactsDeterminism = true; - -#include "b3GpuPgsContactSolver.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include <string.h> -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "b3Solver.h" - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - -struct b3GpuBatchingPgsSolverInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - int m_pairCapacity; - int m_nIterations; - - b3OpenCLArray<b3GpuConstraint4>* m_contactCGPU; - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - - b3Solver* m_solverGPU; - - cl_kernel m_batchingKernel; - cl_kernel m_batchingKernelNew; - cl_kernel m_solveContactKernel; - cl_kernel m_solveSingleContactKernel; - cl_kernel m_solveSingleFrictionKernel; - cl_kernel m_solveFrictionKernel; - cl_kernel m_contactToConstraintKernel; - cl_kernel m_setSortDataKernel; - cl_kernel m_reorderContactKernel; - cl_kernel m_copyConstraintKernel; - - cl_kernel m_setDeterminismSortDataBodyAKernel; - cl_kernel m_setDeterminismSortDataBodyBKernel; - cl_kernel m_setDeterminismSortDataChildShapeAKernel; - cl_kernel m_setDeterminismSortDataChildShapeBKernel; - - class b3RadixSort32CL* m_sort32; - class b3BoundSearchCL* m_search; - class b3PrefixScanCL* m_scan; - - b3OpenCLArray<b3SortData>* m_sortDataBuffer; - b3OpenCLArray<b3Contact4>* m_contactBuffer; - - b3OpenCLArray<b3RigidBodyData>* m_bodyBufferGPU; - b3OpenCLArray<b3InertiaData>* m_inertiaBufferGPU; - b3OpenCLArray<b3Contact4>* m_pBufContactOutGPU; - - b3OpenCLArray<b3Contact4>* m_pBufContactOutGPUCopy; - b3OpenCLArray<b3SortData>* m_contactKeyValues; - - b3AlignedObjectArray<unsigned int> m_idxBuffer; - b3AlignedObjectArray<b3SortData> m_sortData; - b3AlignedObjectArray<b3Contact4> m_old; - - b3AlignedObjectArray<int> m_batchSizes; - b3OpenCLArray<int>* m_batchSizesGpu; -}; - -b3GpuPgsContactSolver::b3GpuPgsContactSolver(cl_context ctx, cl_device_id device, cl_command_queue q, int pairCapacity) -{ - m_debugOutput = 0; - m_data = new b3GpuBatchingPgsSolverInternalData; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_queue = q; - m_data->m_pairCapacity = pairCapacity; - m_data->m_nIterations = 4; - m_data->m_batchSizesGpu = new b3OpenCLArray<int>(ctx, q); - m_data->m_bodyBufferGPU = new b3OpenCLArray<b3RigidBodyData>(ctx, q); - m_data->m_inertiaBufferGPU = new b3OpenCLArray<b3InertiaData>(ctx, q); - m_data->m_pBufContactOutGPU = new b3OpenCLArray<b3Contact4>(ctx, q); - - m_data->m_pBufContactOutGPUCopy = new b3OpenCLArray<b3Contact4>(ctx, q); - m_data->m_contactKeyValues = new b3OpenCLArray<b3SortData>(ctx, q); - - m_data->m_solverGPU = new b3Solver(ctx, device, q, 512 * 1024); - - m_data->m_sort32 = new b3RadixSort32CL(ctx, device, m_data->m_queue); - m_data->m_scan = new b3PrefixScanCL(ctx, device, m_data->m_queue, B3_SOLVER_N_CELLS); - m_data->m_search = new b3BoundSearchCL(ctx, device, m_data->m_queue, B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512); - - m_data->m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx, m_data->m_queue, sortSize); - m_data->m_contactBuffer = new b3OpenCLArray<b3Contact4>(ctx, m_data->m_queue); - - m_data->m_numConstraints = new b3OpenCLArray<unsigned int>(ctx, m_data->m_queue, B3_SOLVER_N_CELLS); - m_data->m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_data->m_contactCGPU = new b3OpenCLArray<b3GpuConstraint4>(ctx, q, pairCapacity); - - m_data->m_offsets = new b3OpenCLArray<unsigned int>(ctx, m_data->m_queue, B3_SOLVER_N_CELLS); - m_data->m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; - //const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - { - cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - - b3Assert(solverSetup2Prog); - - cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - m_data->m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_data->m_solveFrictionKernel); - - m_data->m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_data->m_solveContactKernel); - - m_data->m_solveSingleContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "solveSingleContactKernel", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_data->m_solveSingleContactKernel); - - m_data->m_solveSingleFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "solveSingleFrictionKernel", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_data->m_solveSingleFrictionKernel); - - m_data->m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros); - b3Assert(m_data->m_contactToConstraintKernel); - - m_data->m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setSortDataKernel); - - m_data->m_setDeterminismSortDataBodyAKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyA", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataBodyAKernel); - - m_data->m_setDeterminismSortDataBodyBKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataBodyB", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataBodyBKernel); - - m_data->m_setDeterminismSortDataChildShapeAKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeA", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataChildShapeAKernel); - - m_data->m_setDeterminismSortDataChildShapeBKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetDeterminismSortDataChildShapeB", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_setDeterminismSortDataChildShapeBKernel); - - m_data->m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_reorderContactKernel); - - m_data->m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_data->m_copyConstraintKernel); - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH); - b3Assert(batchingProg); - - m_data->m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros); - b3Assert(m_data->m_batchingKernel); - } - - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_data->m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros); - b3Assert(m_data->m_batchingKernelNew); - } -} - -b3GpuPgsContactSolver::~b3GpuPgsContactSolver() -{ - delete m_data->m_batchSizesGpu; - delete m_data->m_bodyBufferGPU; - delete m_data->m_inertiaBufferGPU; - delete m_data->m_pBufContactOutGPU; - delete m_data->m_pBufContactOutGPUCopy; - delete m_data->m_contactKeyValues; - - delete m_data->m_contactCGPU; - delete m_data->m_numConstraints; - delete m_data->m_offsets; - delete m_data->m_sortDataBuffer; - delete m_data->m_contactBuffer; - - delete m_data->m_sort32; - delete m_data->m_scan; - delete m_data->m_search; - delete m_data->m_solverGPU; - - clReleaseKernel(m_data->m_batchingKernel); - clReleaseKernel(m_data->m_batchingKernelNew); - clReleaseKernel(m_data->m_solveSingleContactKernel); - clReleaseKernel(m_data->m_solveSingleFrictionKernel); - clReleaseKernel(m_data->m_solveContactKernel); - clReleaseKernel(m_data->m_solveFrictionKernel); - - clReleaseKernel(m_data->m_contactToConstraintKernel); - clReleaseKernel(m_data->m_setSortDataKernel); - clReleaseKernel(m_data->m_reorderContactKernel); - clReleaseKernel(m_data->m_copyConstraintKernel); - - clReleaseKernel(m_data->m_setDeterminismSortDataBodyAKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataBodyBKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeAKernel); - clReleaseKernel(m_data->m_setDeterminismSortDataChildShapeBKernel); - - delete m_data; -} - -struct b3ConstraintCfg -{ - b3ConstraintCfg(float dt = 0.f) : m_positionDrift(0.005f), m_positionConstraintCoeff(0.2f), m_dt(dt), m_staticIdx(0) {} - - float m_positionDrift; - float m_positionConstraintCoeff; - float m_dt; - bool m_enableParallelSolve; - float m_batchCellSize; - int m_staticIdx; -}; - -void b3GpuPgsContactSolver::solveContactConstraintBatchSizes(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes) //const b3OpenCLArray<int>* gpuBatchSizes) -{ - B3_PROFILE("solveContactConstraintBatchSizes"); - int numBatches = batchSizes->size() / B3_MAX_NUM_BATCHES; - for (int iter = 0; iter < numIterations; iter++) - { - for (int cellId = 0; cellId < numBatches; cellId++) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = batchSizes->at(cellId * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveSingleContactKernel, "m_solveSingleContactKernel"); - launcher.setBuffer(bodyBuf->getBufferCL()); - launcher.setBuffer(shapeBuf->getBufferCL()); - launcher.setBuffer(constraint->getBufferCL()); - launcher.setConst(cellId); - launcher.setConst(offset); - launcher.setConst(numInBatch); - launcher.launch1D(numInBatch); - offset += numInBatch; - } - } - } - } - - for (int iter = 0; iter < numIterations; iter++) - { - for (int cellId = 0; cellId < numBatches; cellId++) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = batchSizes->at(cellId * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveSingleFrictionKernel, "m_solveSingleFrictionKernel"); - launcher.setBuffer(bodyBuf->getBufferCL()); - launcher.setBuffer(shapeBuf->getBufferCL()); - launcher.setBuffer(constraint->getBufferCL()); - launcher.setConst(cellId); - launcher.setConst(offset); - launcher.setConst(numInBatch); - launcher.launch1D(numInBatch); - offset += numInBatch; - } - } - } - } -} - -void b3GpuPgsContactSolver::solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes) //,const b3OpenCLArray<int>* gpuBatchSizes) -{ - //sort the contacts - - b3Int4 cdata = b3MakeInt4(n, 0, 0, 0); - { - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches; //250; - - int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); -#endif - - { - B3_PROFILE("m_batchSolveKernel iterations"); - for (int iter = 0; iter < numIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { -#ifdef DEBUG_ME - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - - cdata.z = ib; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveContactKernel, "m_solveContactKernel"); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setBuffer(m_data->m_solverGPU->m_batchSizes.getBufferCL()); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - launcher.launch1D(numWorkItems, 64); - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D(num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt2 > 0) - { - printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - } - } - - clFinish(m_data->m_queue); - } - - cdata.x = 1; - bool applyFriction = true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for (int iter = 0; iter < numIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setBuffer(m_data->m_solverGPU->m_batchSizes.getBufferCL()); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - - launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64); - } - } - clFinish(m_data->m_queue); - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } -} - -static bool sortfnc(const b3SortData& a, const b3SortData& b) -{ - return (a.m_key < b.m_key); -} - -static bool b3ContactCmp(const b3Contact4& p, const b3Contact4& q) -{ - return ((p.m_bodyAPtrAndSignBit < q.m_bodyAPtrAndSignBit) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit < q.m_bodyBPtrAndSignBit)) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit == q.m_bodyBPtrAndSignBit) && p.m_childIndexA < q.m_childIndexA) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit == q.m_bodyBPtrAndSignBit) && p.m_childIndexA < q.m_childIndexA) || - ((p.m_bodyAPtrAndSignBit == q.m_bodyAPtrAndSignBit) && (p.m_bodyBPtrAndSignBit == q.m_bodyBPtrAndSignBit) && p.m_childIndexA == q.m_childIndexA && p.m_childIndexB < q.m_childIndexB)); -} - -#define USE_SPATIAL_BATCHING 1 -#define USE_4x4_GRID 1 - -#ifndef USE_SPATIAL_BATCHING -static const int gridTable4x4[] = - { - 0, 1, 17, 16, - 1, 2, 18, 19, - 17, 18, 32, 3, - 16, 19, 3, 34}; -static const int gridTable8x8[] = - { - 0, 2, 3, 16, 17, 18, 19, 1, - 66, 64, 80, 67, 82, 81, 65, 83, - 131, 144, 128, 130, 147, 129, 145, 146, - 208, 195, 194, 192, 193, 211, 210, 209, - 21, 22, 23, 5, 4, 6, 7, 20, - 86, 85, 69, 87, 70, 68, 84, 71, - 151, 133, 149, 150, 135, 148, 132, 134, - 197, 27, 214, 213, 212, 199, 198, 196 - -}; - -#endif - -void SetSortDataCPU(b3Contact4* gContact, b3RigidBodyData* gBodies, b3SortData* gSortDataOut, int nContacts, float scale, const b3Int4& nSplit, int staticIdx) -{ - for (int gIdx = 0; gIdx < nContacts; gIdx++) - { - if (gIdx < nContacts) - { - int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit; - int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit; - - int aIdx = abs(aPtrAndSignBit); - int bIdx = abs(bPtrAndSignBit); - - bool aStatic = (aPtrAndSignBit < 0) || (aPtrAndSignBit == staticIdx); - -#if USE_SPATIAL_BATCHING - int idx = (aStatic) ? bIdx : aIdx; - b3Vector3 p = gBodies[idx].m_pos; - int xIdx = (int)((p.x - ((p.x < 0.f) ? 1.f : 0.f)) * scale) & (nSplit.x - 1); - int yIdx = (int)((p.y - ((p.y < 0.f) ? 1.f : 0.f)) * scale) & (nSplit.y - 1); - int zIdx = (int)((p.z - ((p.z < 0.f) ? 1.f : 0.f)) * scale) & (nSplit.z - 1); - - int newIndex = (xIdx + yIdx * nSplit.x + zIdx * nSplit.x * nSplit.y); - -#else //USE_SPATIAL_BATCHING - bool bStatic = (bPtrAndSignBit < 0) || (bPtrAndSignBit == staticIdx); - -#if USE_4x4_GRID - int aa = aIdx & 3; - int bb = bIdx & 3; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb * 4; - int newIndex = gridTable4x4[gridIndex]; -#else //USE_4x4_GRID - int aa = aIdx & 7; - int bb = bIdx & 7; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb * 8; - int newIndex = gridTable8x8[gridIndex]; -#endif //USE_4x4_GRID -#endif //USE_SPATIAL_BATCHING - - gSortDataOut[gIdx].x = newIndex; - gSortDataOut[gIdx].y = gIdx; - } - else - { - gSortDataOut[gIdx].x = 0xffffffff; - } - } -} - -void b3GpuPgsContactSolver::solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const b3Config& config, int static0Index) -{ - B3_PROFILE("solveContacts"); - m_data->m_bodyBufferGPU->setFromOpenCLBuffer(bodyBuf, numBodies); - m_data->m_inertiaBufferGPU->setFromOpenCLBuffer(inertiaBuf, numBodies); - m_data->m_pBufContactOutGPU->setFromOpenCLBuffer(contactBuf, numContacts); - - if (optionalSortContactsDeterminism) - { - if (!gCpuSortContactsDeterminism) - { - B3_PROFILE("GPU Sort contact constraints (determinism)"); - - m_data->m_pBufContactOutGPUCopy->resize(numContacts); - m_data->m_contactKeyValues->resize(numContacts); - - m_data->m_pBufContactOutGPU->copyToCL(m_data->m_pBufContactOutGPUCopy->getBufferCL(), numContacts, 0, 0); - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeBKernel, "m_setDeterminismSortDataChildShapeBKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataChildShapeAKernel, "m_setDeterminismSortDataChildShapeAKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyBKernel, "m_setDeterminismSortDataBodyBKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_setDeterminismSortDataBodyAKernel, "m_setDeterminismSortDataBodyAKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(numContacts); - launcher.launch1D(numContacts, 64); - } - - m_data->m_solverGPU->m_sort32->execute(*m_data->m_contactKeyValues); - - { - B3_PROFILE("gpu reorderContactKernel (determinism)"); - - b3Int4 cdata; - cdata.x = numContacts; - - //b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_data->m_pBufContactOutGPU->getBufferCL() ), b3BufferInfoCL( m_data->m_solverGPU->m_contactBuffer2->getBufferCL()) - // , b3BufferInfoCL( m_data->m_solverGPU->m_sortDataBuffer->getBufferCL()) }; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_reorderContactKernel, "m_reorderContactKernel"); - launcher.setBuffer(m_data->m_pBufContactOutGPUCopy->getBufferCL()); - launcher.setBuffer(m_data->m_pBufContactOutGPU->getBufferCL()); - launcher.setBuffer(m_data->m_contactKeyValues->getBufferCL()); - launcher.setConst(cdata); - launcher.launch1D(numContacts, 64); - } - } - else - { - B3_PROFILE("CPU Sort contact constraints (determinism)"); - b3AlignedObjectArray<b3Contact4> cpuConstraints; - m_data->m_pBufContactOutGPU->copyToHost(cpuConstraints); - bool sort = true; - if (sort) - { - cpuConstraints.quickSort(b3ContactCmp); - - for (int i = 0; i < cpuConstraints.size(); i++) - { - cpuConstraints[i].m_batchIdx = i; - } - } - m_data->m_pBufContactOutGPU->copyFromHost(cpuConstraints); - if (m_debugOutput == 100) - { - for (int i = 0; i < cpuConstraints.size(); i++) - { - printf("c[%d].m_bodyA = %d, m_bodyB = %d, batchId = %d\n", i, cpuConstraints[i].m_bodyAPtrAndSignBit, cpuConstraints[i].m_bodyBPtrAndSignBit, cpuConstraints[i].m_batchIdx); - } - } - - m_debugOutput++; - } - } - - int nContactOut = m_data->m_pBufContactOutGPU->size(); - - bool useSolver = true; - - if (useSolver) - { - float dt = 1. / 60.; - b3ConstraintCfg csCfg(dt); - csCfg.m_enableParallelSolve = true; - csCfg.m_batchCellSize = 6; - csCfg.m_staticIdx = static0Index; - - b3OpenCLArray<b3RigidBodyData>* bodyBuf = m_data->m_bodyBufferGPU; - - void* additionalData = 0; //m_data->m_frictionCGPU; - const b3OpenCLArray<b3InertiaData>* shapeBuf = m_data->m_inertiaBufferGPU; - b3OpenCLArray<b3GpuConstraint4>* contactConstraintOut = m_data->m_contactCGPU; - int nContacts = nContactOut; - - int maxNumBatches = 0; - - if (!gUseLargeBatches) - { - if (m_data->m_solverGPU->m_contactBuffer2) - { - m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); - } - - if (m_data->m_solverGPU->m_contactBuffer2 == 0) - { - m_data->m_solverGPU->m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(m_data->m_context, m_data->m_queue, nContacts); - m_data->m_solverGPU->m_contactBuffer2->resize(nContacts); - } - - //clFinish(m_data->m_queue); - - { - B3_PROFILE("batching"); - //@todo: just reserve it, without copy of original contact (unless we use warmstarting) - - //const b3OpenCLArray<b3RigidBodyData>* bodyNative = bodyBuf; - - { - //b3OpenCLArray<b3RigidBodyData>* bodyNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, bodyBuf ); - //b3OpenCLArray<b3Contact4>* contactNative = b3OpenCLArrayUtils::map<adl::TYPE_CL, true>( data->m_device, contactsIn ); - - const int sortAlignment = 512; // todo. get this out of sort - if (csCfg.m_enableParallelSolve) - { - int sortSize = B3NEXTMULTIPLEOF(nContacts, sortAlignment); - - b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; - b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; - - if (!gCpuSetSortData) - { // 2. set cell idx - B3_PROFILE("GPU set cell idx"); - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - b3Int4 m_nSplit; - }; - - b3Assert(sortSize % 64 == 0); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = csCfg.m_staticIdx; - cdata.m_scale = 1.f / csCfg.m_batchCellSize; - cdata.m_nSplit.x = B3_SOLVER_N_SPLIT_X; - cdata.m_nSplit.y = B3_SOLVER_N_SPLIT_Y; - cdata.m_nSplit.z = B3_SOLVER_N_SPLIT_Z; - - m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); - - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_data->m_pBufContactOutGPU->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(m_data->m_solverGPU->m_sortDataBuffer->getBufferCL())}; - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_setSortDataKernel, "m_setSortDataKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_scale); - launcher.setConst(cdata.m_nSplit); - launcher.setConst(cdata.m_staticIdx); - - launcher.launch1D(sortSize, 64); - } - else - { - m_data->m_solverGPU->m_sortDataBuffer->resize(nContacts); - b3AlignedObjectArray<b3SortData> sortDataCPU; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataCPU); - - b3AlignedObjectArray<b3Contact4> contactCPU; - m_data->m_pBufContactOutGPU->copyToHost(contactCPU); - b3AlignedObjectArray<b3RigidBodyData> bodiesCPU; - bodyBuf->copyToHost(bodiesCPU); - float scale = 1.f / csCfg.m_batchCellSize; - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - SetSortDataCPU(&contactCPU[0], &bodiesCPU[0], &sortDataCPU[0], nContacts, scale, nSplit, csCfg.m_staticIdx); - - m_data->m_solverGPU->m_sortDataBuffer->copyFromHost(sortDataCPU); - } - - if (!gCpuRadixSort) - { // 3. sort by cell idx - B3_PROFILE("gpuRadixSort"); - //int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - //int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - //adl::RadixSort<adl::TYPE_CL>::execute( data->m_sort, *data->m_sortDataBuffer, sortSize ); - //adl::RadixSort32<adl::TYPE_CL>::execute( data->m_sort32, *data->m_sortDataBuffer, sortSize ); - b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); - this->m_data->m_solverGPU->m_sort32->execute(keyValuesInOut); - } - else - { - b3OpenCLArray<b3SortData>& keyValuesInOut = *(m_data->m_solverGPU->m_sortDataBuffer); - b3AlignedObjectArray<b3SortData> hostValues; - keyValuesInOut.copyToHost(hostValues); - hostValues.quickSort(sortfnc); - keyValuesInOut.copyFromHost(hostValues); - } - - if (gUseScanHost) - { - // 4. find entries - B3_PROFILE("cpuBoundSearch"); - b3AlignedObjectArray<unsigned int> countsHost; - countsNative->copyToHost(countsHost); - - b3AlignedObjectArray<b3SortData> sortDataHost; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); - - //m_data->m_solverGPU->m_search->executeHost(*m_data->m_solverGPU->m_sortDataBuffer,nContacts,*countsNative,B3_SOLVER_N_CELLS,b3BoundSearchCL::COUNT); - m_data->m_solverGPU->m_search->executeHost(sortDataHost, nContacts, countsHost, B3_SOLVER_N_CELLS, b3BoundSearchCL::COUNT); - - countsNative->copyFromHost(countsHost); - - //adl::BoundSearch<adl::TYPE_CL>::execute( data->m_search, *data->m_sortDataBuffer, nContacts, *countsNative, - // B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, adl::BoundSearchBase::COUNT ); - - //unsigned int sum; - //m_data->m_solverGPU->m_scan->execute(*countsNative,*offsetsNative, B3_SOLVER_N_CELLS);//,&sum ); - b3AlignedObjectArray<unsigned int> offsetsHost; - offsetsHost.resize(offsetsNative->size()); - - m_data->m_solverGPU->m_scan->executeHost(countsHost, offsetsHost, B3_SOLVER_N_CELLS); //,&sum ); - offsetsNative->copyFromHost(offsetsHost); - - //printf("sum = %d\n",sum); - } - else - { - // 4. find entries - B3_PROFILE("gpuBoundSearch"); - m_data->m_solverGPU->m_search->execute(*m_data->m_solverGPU->m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_CELLS, b3BoundSearchCL::COUNT); - m_data->m_solverGPU->m_scan->execute(*countsNative, *offsetsNative, B3_SOLVER_N_CELLS); //,&sum ); - } - - if (nContacts) - { // 5. sort constraints by cellIdx - if (gReorderContactsOnCpu) - { - B3_PROFILE("cpu m_reorderContactKernel"); - b3AlignedObjectArray<b3SortData> sortDataHost; - m_data->m_solverGPU->m_sortDataBuffer->copyToHost(sortDataHost); - b3AlignedObjectArray<b3Contact4> inContacts; - b3AlignedObjectArray<b3Contact4> outContacts; - m_data->m_pBufContactOutGPU->copyToHost(inContacts); - outContacts.resize(inContacts.size()); - for (int i = 0; i < nContacts; i++) - { - int srcIdx = sortDataHost[i].y; - outContacts[i] = inContacts[srcIdx]; - } - m_data->m_solverGPU->m_contactBuffer2->copyFromHost(outContacts); - - /* "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" - "{\n" - " int nContacts = cb.x;\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int srcIdx = sortData[gIdx].y;\n" - " out[gIdx] = in[srcIdx];\n" - " }\n" - "}\n" - */ - } - else - { - B3_PROFILE("gpu m_reorderContactKernel"); - - b3Int4 cdata; - cdata.x = nContacts; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_data->m_pBufContactOutGPU->getBufferCL()), - b3BufferInfoCL(m_data->m_solverGPU->m_contactBuffer2->getBufferCL()), b3BufferInfoCL(m_data->m_solverGPU->m_sortDataBuffer->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_reorderContactKernel, "m_reorderContactKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nContacts, 64); - } - } - } - } - - //clFinish(m_data->m_queue); - - // { - // b3AlignedObjectArray<unsigned int> histogram; - // m_data->m_solverGPU->m_numConstraints->copyToHost(histogram); - // printf(",,,\n"); - // } - - if (nContacts) - { - if (gUseCpuCopyConstraints) - { - for (int i = 0; i < nContacts; i++) - { - m_data->m_pBufContactOutGPU->copyFromOpenCLArray(*m_data->m_solverGPU->m_contactBuffer2); - // m_data->m_solverGPU->m_contactBuffer2->getBufferCL(); - // m_data->m_pBufContactOutGPU->getBufferCL() - } - } - else - { - B3_PROFILE("gpu m_copyConstraintKernel"); - b3Int4 cdata; - cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(m_data->m_solverGPU->m_contactBuffer2->getBufferCL()), - b3BufferInfoCL(m_data->m_pBufContactOutGPU->getBufferCL())}; - - b3LauncherCL launcher(m_data->m_queue, m_data->m_solverGPU->m_copyConstraintKernel, "m_copyConstraintKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - launcher.setConst(cdata); - launcher.launch1D(nContacts, 64); - //we use the clFinish for proper benchmark/profile - clFinish(m_data->m_queue); - } - } - - // bool compareGPU = false; - if (nContacts) - { - if (!gCpuBatchContacts) - { - B3_PROFILE("gpu batchContacts"); - maxNumBatches = 250; //250; - m_data->m_solverGPU->batchContacts(m_data->m_pBufContactOutGPU, nContacts, m_data->m_solverGPU->m_numConstraints, m_data->m_solverGPU->m_offsets, csCfg.m_staticIdx); - clFinish(m_data->m_queue); - } - else - { - B3_PROFILE("cpu batchContacts"); - static b3AlignedObjectArray<b3Contact4> cpuContacts; - b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - { - B3_PROFILE("copyToHost"); - contactsIn->copyToHost(cpuContacts); - } - b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; - b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; - - b3AlignedObjectArray<unsigned int> nNativeHost; - b3AlignedObjectArray<unsigned int> offsetsNativeHost; - - { - B3_PROFILE("countsNative/offsetsNative copyToHost"); - countsNative->copyToHost(nNativeHost); - offsetsNative->copyToHost(offsetsNativeHost); - } - - int numNonzeroGrid = 0; - - if (gUseLargeBatches) - { - m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); - int totalNumConstraints = cpuContacts.size(); - //int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3(&cpuContacts[0], totalNumConstraints, totalNumConstraints + 1, csCfg.m_staticIdx, numBodies, &m_data->m_batchSizes[0]); // on GPU - maxNumBatches = b3Max(numBatches, maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches > globalMaxBatch) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n", maxNumBatches); - } - } - else - { - m_data->m_batchSizes.resize(B3_SOLVER_N_CELLS * B3_MAX_NUM_BATCHES); - B3_PROFILE("cpu batch grid"); - for (int i = 0; i < B3_SOLVER_N_CELLS; i++) - { - int n = (nNativeHost)[i]; - int offset = (offsetsNativeHost)[i]; - if (n) - { - numNonzeroGrid++; - int simdWidth = numBodies + 1; //-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3(&cpuContacts[0] + offset, n, simdWidth, csCfg.m_staticIdx, numBodies, &m_data->m_batchSizes[i * B3_MAX_NUM_BATCHES]); // on GPU - maxNumBatches = b3Max(numBatches, maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches > globalMaxBatch) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n", maxNumBatches); - } - //we use the clFinish for proper benchmark/profile - } - } - //clFinish(m_data->m_queue); - } - { - B3_PROFILE("m_contactBuffer->copyFromHost"); - m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); - } - } - } - } - } - - //printf("maxNumBatches = %d\n", maxNumBatches); - - if (gUseLargeBatches) - { - if (nContacts) - { - B3_PROFILE("cpu batchContacts"); - static b3AlignedObjectArray<b3Contact4> cpuContacts; - // b3OpenCLArray<b3Contact4>* contactsIn = m_data->m_solverGPU->m_contactBuffer2; - { - B3_PROFILE("copyToHost"); - m_data->m_pBufContactOutGPU->copyToHost(cpuContacts); - } - // b3OpenCLArray<unsigned int>* countsNative = m_data->m_solverGPU->m_numConstraints; - // b3OpenCLArray<unsigned int>* offsetsNative = m_data->m_solverGPU->m_offsets; - - // int numNonzeroGrid=0; - - { - m_data->m_batchSizes.resize(B3_MAX_NUM_BATCHES); - int totalNumConstraints = cpuContacts.size(); - // int simdWidth =numBodies+1;//-1;//64;//-1;//32; - int numBatches = sortConstraintByBatch3(&cpuContacts[0], totalNumConstraints, totalNumConstraints + 1, csCfg.m_staticIdx, numBodies, &m_data->m_batchSizes[0]); // on GPU - maxNumBatches = b3Max(numBatches, maxNumBatches); - static int globalMaxBatch = 0; - if (maxNumBatches > globalMaxBatch) - { - globalMaxBatch = maxNumBatches; - b3Printf("maxNumBatches = %d\n", maxNumBatches); - } - } - { - B3_PROFILE("m_contactBuffer->copyFromHost"); - m_data->m_solverGPU->m_contactBuffer2->copyFromHost((b3AlignedObjectArray<b3Contact4>&)cpuContacts); - } - } - } - - if (nContacts) - { - B3_PROFILE("gpu convertToConstraints"); - m_data->m_solverGPU->convertToConstraints(bodyBuf, - shapeBuf, m_data->m_solverGPU->m_contactBuffer2, - contactConstraintOut, - additionalData, nContacts, - (b3SolverBase::ConstraintCfg&)csCfg); - clFinish(m_data->m_queue); - } - - if (1) - { - int numIter = 4; - - m_data->m_solverGPU->m_nIterations = numIter; //10 - if (!gCpuSolveConstraint) - { - B3_PROFILE("GPU solveContactConstraint"); - - /*m_data->m_solverGPU->solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU,0, - nContactOut , - maxNumBatches); - */ - - //m_data->m_batchSizesGpu->copyFromHost(m_data->m_batchSizes); - - if (gUseLargeBatches) - { - solveContactConstraintBatchSizes(m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU, 0, - nContactOut, - maxNumBatches, numIter, &m_data->m_batchSizes); - } - else - { - solveContactConstraint( - m_data->m_bodyBufferGPU, - m_data->m_inertiaBufferGPU, - m_data->m_contactCGPU, 0, - nContactOut, - maxNumBatches, numIter, &m_data->m_batchSizes); //m_data->m_batchSizesGpu); - } - } - else - { - B3_PROFILE("Host solveContactConstraint"); - - m_data->m_solverGPU->solveContactConstraintHost(m_data->m_bodyBufferGPU, m_data->m_inertiaBufferGPU, m_data->m_contactCGPU, 0, nContactOut, maxNumBatches, &m_data->m_batchSizes); - } - } - -#if 0 - if (0) - { - B3_PROFILE("read body velocities back to CPU"); - //read body updated linear/angular velocities back to CPU - m_data->m_bodyBufferGPU->read( - m_data->m_bodyBufferCPU->m_ptr,numOfConvexRBodies); - adl::DeviceUtils::waitForCompletion( m_data->m_deviceCL ); - } -#endif - } -} - -void b3GpuPgsContactSolver::batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx) -{ -} - -b3AlignedObjectArray<unsigned int> idxBuffer; -b3AlignedObjectArray<b3SortData> sortData; -b3AlignedObjectArray<b3Contact4> old; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies) -{ - B3_PROFILE("sortConstraintByBatch"); - int numIter = 0; - - sortData.resize(n); - idxBuffer.resize(n); - old.resize(n); - - unsigned int* idxSrc = &idxBuffer[0]; - unsigned int* idxDst = &idxBuffer[0]; - int nIdxSrc, nIdxDst; - - const int N_FLG = 256; - const int FLG_MASK = N_FLG - 1; - unsigned int flg[N_FLG / 32]; -#if defined(_DEBUG) - for (int i = 0; i < n; i++) - cs[i].getBatchIdx() = -1; -#endif - for (int i = 0; i < n; i++) - idxSrc[i] = i; - nIdxSrc = n; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - while (nIdxSrc) - { - numIter++; - nIdxDst = 0; - int nCurrentBatch = 0; - - // clear flag - for (int i = 0; i < N_FLG / 32; i++) flg[i] = 0; - - for (int i = 0; i < nIdxSrc; i++) - { - int idx = idxSrc[i]; - - b3Assert(idx < n); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - - int aIdx = bodyA & FLG_MASK; - int bIdx = bodyB & FLG_MASK; - - unsigned int aUnavailable = flg[aIdx / 32] & (1 << (aIdx & 31)); - unsigned int bUnavailable = flg[bIdx / 32] & (1 << (bIdx & 31)); - - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - - //use inv_mass! - aUnavailable = !aIsStatic ? aUnavailable : 0; // - bUnavailable = !bIsStatic ? bUnavailable : 0; - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - flg[aIdx / 32] |= (1 << (aIdx & 31)); - if (!bIsStatic) - flg[bIdx / 32] |= (1 << (bIdx & 31)); - - cs[idx].getBatchIdx() = batchIdx; - sortData[idx].m_key = batchIdx; - sortData[idx].m_value = idx; - - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - nCurrentBatch = 0; - for (int i = 0; i < N_FLG / 32; i++) flg[i] = 0; - } - } - } - else - { - idxDst[nIdxDst++] = idx; - } - } - b3Swap(idxSrc, idxDst); - b3Swap(nIdxSrc, nIdxDst); - batchIdx++; - } - } - { - B3_PROFILE("quickSort"); - sortData.quickSort(sortfnc); - } - - { - B3_PROFILE("reorder"); - // reorder - - memcpy(&old[0], cs, sizeof(b3Contact4) * n); - for (int i = 0; i < n; i++) - { - int idx = sortData[i].m_value; - cs[i] = old[idx]; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < n; i++) - { - b3Assert(cs[i].getBatchIdx() != -1); - } -#endif - return batchIdx; -} - -b3AlignedObjectArray<int> bodyUsed2; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch2(b3Contact4* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies) -{ - B3_PROFILE("sortConstraintByBatch2"); - - bodyUsed2.resize(2 * simdWidth); - - for (int q = 0; q < 2 * simdWidth; q++) - bodyUsed2[q] = 0; - - int curBodyUsed = 0; - - int numIter = 0; - - m_data->m_sortData.resize(numConstraints); - m_data->m_idxBuffer.resize(numConstraints); - m_data->m_old.resize(numConstraints); - - unsigned int* idxSrc = &m_data->m_idxBuffer[0]; - -#if defined(_DEBUG) - for (int i = 0; i < numConstraints; i++) - cs[i].getBatchIdx() = -1; -#endif - for (int i = 0; i < numConstraints; i++) - idxSrc[i] = i; - - int numValidConstraints = 0; - // int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - - while (numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - // clear flag - for (int i = 0; i < curBodyUsed; i++) - bodyUsed2[i] = 0; - curBodyUsed = 0; - - for (int i = numValidConstraints; i < numConstraints; i++) - { - int idx = idxSrc[i]; - b3Assert(idx < numConstraints); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - for (int j = 0; j < curBodyUsed; j++) - { - if (bodyA == bodyUsed2[j]) - { - aUnavailable = 1; - break; - } - } - } - if (!aUnavailable) - if (!bIsStatic) - { - for (int j = 0; j < curBodyUsed; j++) - { - if (bodyB == bodyUsed2[j]) - { - bUnavailable = 1; - break; - } - } - } - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - { - bodyUsed2[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed2[curBodyUsed++] = bodyB; - } - - cs[idx].getBatchIdx() = batchIdx; - m_data->m_sortData[idx].m_key = batchIdx; - m_data->m_sortData[idx].m_value = idx; - - if (i != numValidConstraints) - { - b3Swap(idxSrc[i], idxSrc[numValidConstraints]); - } - - numValidConstraints++; - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - nCurrentBatch = 0; - for (int i = 0; i < curBodyUsed; i++) - bodyUsed2[i] = 0; - - curBodyUsed = 0; - } - } - } - } - - batchIdx++; - } - } - { - B3_PROFILE("quickSort"); - //m_data->m_sortData.quickSort(sortfnc); - } - - { - B3_PROFILE("reorder"); - // reorder - - memcpy(&m_data->m_old[0], cs, sizeof(b3Contact4) * numConstraints); - - for (int i = 0; i < numConstraints; i++) - { - b3Assert(m_data->m_sortData[idxSrc[i]].m_value == idxSrc[i]); - int idx = m_data->m_sortData[idxSrc[i]].m_value; - cs[i] = m_data->m_old[idx]; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < numConstraints; i++) - { - b3Assert(cs[i].getBatchIdx() != -1); - } -#endif - - return batchIdx; -} - -b3AlignedObjectArray<int> bodyUsed; -b3AlignedObjectArray<int> curUsed; - -inline int b3GpuPgsContactSolver::sortConstraintByBatch3(b3Contact4* cs, int numConstraints, int simdWidth, int staticIdx, int numBodies, int* batchSizes) -{ - B3_PROFILE("sortConstraintByBatch3"); - - static int maxSwaps = 0; - int numSwaps = 0; - - curUsed.resize(2 * simdWidth); - - static int maxNumConstraints = 0; - if (maxNumConstraints < numConstraints) - { - maxNumConstraints = numConstraints; - //printf("maxNumConstraints = %d\n",maxNumConstraints ); - } - - int numUsedArray = numBodies / 32 + 1; - bodyUsed.resize(numUsedArray); - - for (int q = 0; q < numUsedArray; q++) - bodyUsed[q] = 0; - - int curBodyUsed = 0; - - int numIter = 0; - - m_data->m_sortData.resize(0); - m_data->m_idxBuffer.resize(0); - m_data->m_old.resize(0); - -#if defined(_DEBUG) - for (int i = 0; i < numConstraints; i++) - cs[i].getBatchIdx() = -1; -#endif - - int numValidConstraints = 0; - // int unprocessedConstraintIndex = 0; - - int batchIdx = 0; - - { - B3_PROFILE("cpu batch innerloop"); - - while (numValidConstraints < numConstraints) - { - numIter++; - int nCurrentBatch = 0; - batchSizes[batchIdx] = 0; - - // clear flag - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - - curBodyUsed = 0; - - for (int i = numValidConstraints; i < numConstraints; i++) - { - int idx = i; - b3Assert(idx < numConstraints); - // check if it can go - int bodyAS = cs[idx].m_bodyAPtrAndSignBit; - int bodyBS = cs[idx].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS < 0) || bodyAS == staticIdx; - bool bIsStatic = (bodyBS < 0) || bodyBS == staticIdx; - int aUnavailable = 0; - int bUnavailable = 0; - if (!aIsStatic) - { - aUnavailable = bodyUsed[bodyA / 32] & (1 << (bodyA & 31)); - } - if (!aUnavailable) - if (!bIsStatic) - { - bUnavailable = bodyUsed[bodyB / 32] & (1 << (bodyB & 31)); - } - - if (aUnavailable == 0 && bUnavailable == 0) // ok - { - if (!aIsStatic) - { - bodyUsed[bodyA / 32] |= (1 << (bodyA & 31)); - curUsed[curBodyUsed++] = bodyA; - } - if (!bIsStatic) - { - bodyUsed[bodyB / 32] |= (1 << (bodyB & 31)); - curUsed[curBodyUsed++] = bodyB; - } - - cs[idx].getBatchIdx() = batchIdx; - - if (i != numValidConstraints) - { - b3Swap(cs[i], cs[numValidConstraints]); - numSwaps++; - } - - numValidConstraints++; - { - nCurrentBatch++; - if (nCurrentBatch == simdWidth) - { - batchSizes[batchIdx] += simdWidth; - nCurrentBatch = 0; - for (int i = 0; i < curBodyUsed; i++) - bodyUsed[curUsed[i] / 32] = 0; - curBodyUsed = 0; - } - } - } - } - - if (batchIdx >= B3_MAX_NUM_BATCHES) - { - b3Error("batchIdx>=B3_MAX_NUM_BATCHES"); - b3Assert(0); - break; - } - - batchSizes[batchIdx] += nCurrentBatch; - - batchIdx++; - } - } - -#if defined(_DEBUG) - // debugPrintf( "nBatches: %d\n", batchIdx ); - for (int i = 0; i < numConstraints; i++) - { - b3Assert(cs[i].getBatchIdx() != -1); - } -#endif - - batchSizes[batchIdx] = 0; - - if (maxSwaps < numSwaps) - { - maxSwaps = numSwaps; - //printf("maxSwaps = %d\n", maxSwaps); - } - - return batchIdx; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h deleted file mode 100644 index 6ab7502af3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuPgsContactSolver.h +++ /dev/null @@ -1,37 +0,0 @@ - -#ifndef B3_GPU_BATCHING_PGS_SOLVER_H -#define B3_GPU_BATCHING_PGS_SOLVER_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "b3GpuConstraint4.h" - -class b3GpuPgsContactSolver -{ -protected: - int m_debugOutput; - - struct b3GpuBatchingPgsSolverInternalData* m_data; - - void batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx); - - inline int sortConstraintByBatch(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies); - inline int sortConstraintByBatch2(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies); - inline int sortConstraintByBatch3(b3Contact4* cs, int n, int simdWidth, int staticIdx, int numBodies, int* batchSizes); - - void solveContactConstraintBatchSizes(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes); //const b3OpenCLArray<int>* gpuBatchSizes); - - void solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, int numIterations, const b3AlignedObjectArray<int>* batchSizes); //const b3OpenCLArray<int>* gpuBatchSizes); - -public: - b3GpuPgsContactSolver(cl_context ctx, cl_device_id device, cl_command_queue q, int pairCapacity); - virtual ~b3GpuPgsContactSolver(); - - void solveContacts(int numBodies, cl_mem bodyBuf, cl_mem inertiaBuf, int numContacts, cl_mem contactBuf, const struct b3Config& config, int static0Index); -}; - -#endif //B3_GPU_BATCHING_PGS_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp deleted file mode 100644 index fef33ad1cd..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.cpp +++ /dev/null @@ -1,677 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "b3GpuRigidBodyPipeline.h" -#include "b3GpuRigidBodyPipelineInternalData.h" -#include "kernels/integrateKernel.h" -#include "kernels/updateAabbsKernel.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" -#include "b3GpuNarrowPhase.h" -#include "Bullet3Geometry/b3AabbUtil.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Dynamics/ConstraintSolver/b3PgsJacobiSolver.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" -#include "Bullet3Collision/BroadPhaseCollision/b3DynamicBvhBroadphase.h" - -//#define TEST_OTHER_GPU_SOLVER - -#define B3_RIGIDBODY_INTEGRATE_PATH "src/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl" -#define B3_RIGIDBODY_UPDATEAABB_PATH "src/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl" - -bool useBullet2CpuSolver = true; - -//choice of contact solver -bool gUseJacobi = false; -bool gUseDbvt = false; -bool gDumpContactStats = false; -bool gCalcWorldSpaceAabbOnCpu = false; -bool gUseCalculateOverlappingPairsHost = false; -bool gIntegrateOnCpu = false; -bool gClearPairsOnGpu = true; - -#define TEST_OTHER_GPU_SOLVER 1 -#ifdef TEST_OTHER_GPU_SOLVER -#include "b3GpuJacobiContactSolver.h" -#endif //TEST_OTHER_GPU_SOLVER - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" -#include "Bullet3OpenCL/RigidBody/b3GpuPgsConstraintSolver.h" - -#include "b3GpuPgsContactSolver.h" -#include "b3Solver.h" - -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" -#include "Bullet3OpenCL/Raycast/b3GpuRaycast.h" - -#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" -#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" - -b3GpuRigidBodyPipeline::b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config) -{ - m_data = new b3GpuRigidBodyPipelineInternalData; - m_data->m_constraintUid = 0; - m_data->m_config = config; - m_data->m_context = ctx; - m_data->m_device = device; - m_data->m_queue = q; - - m_data->m_solver = new b3PgsJacobiSolver(true); //new b3PgsJacobiSolver(true); - m_data->m_gpuSolver = new b3GpuPgsConstraintSolver(ctx, device, q, true); //new b3PgsJacobiSolver(true); - - m_data->m_allAabbsGPU = new b3OpenCLArray<b3SapAabb>(ctx, q, config.m_maxConvexBodies); - m_data->m_overlappingPairsGPU = new b3OpenCLArray<b3BroadphasePair>(ctx, q, config.m_maxBroadphasePairs); - - m_data->m_gpuConstraints = new b3OpenCLArray<b3GpuGenericConstraint>(ctx, q); -#ifdef TEST_OTHER_GPU_SOLVER - m_data->m_solver3 = new b3GpuJacobiContactSolver(ctx, device, q, config.m_maxBroadphasePairs); -#endif // TEST_OTHER_GPU_SOLVER - - m_data->m_solver2 = new b3GpuPgsContactSolver(ctx, device, q, config.m_maxBroadphasePairs); - - m_data->m_raycaster = new b3GpuRaycast(ctx, device, q); - - m_data->m_broadphaseDbvt = broadphaseDbvt; - m_data->m_broadphaseSap = broadphaseSap; - m_data->m_narrowphase = narrowphase; - m_data->m_gravity.setValue(0.f, -9.8f, 0.f); - - cl_int errNum = 0; - - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, integrateKernelCL, &errNum, "", B3_RIGIDBODY_INTEGRATE_PATH); - b3Assert(errNum == CL_SUCCESS); - m_data->m_integrateTransformsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, integrateKernelCL, "integrateTransformsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - clReleaseProgram(prog); - } - { - cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, &errNum, "", B3_RIGIDBODY_UPDATEAABB_PATH); - b3Assert(errNum == CL_SUCCESS); - m_data->m_updateAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "initializeGpuAabbsFull", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - - m_data->m_clearOverlappingPairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, updateAabbsKernelCL, "clearOverlappingPairsKernel", &errNum, prog); - b3Assert(errNum == CL_SUCCESS); - - clReleaseProgram(prog); - } -} - -b3GpuRigidBodyPipeline::~b3GpuRigidBodyPipeline() -{ - if (m_data->m_integrateTransformsKernel) - clReleaseKernel(m_data->m_integrateTransformsKernel); - - if (m_data->m_updateAabbsKernel) - clReleaseKernel(m_data->m_updateAabbsKernel); - - if (m_data->m_clearOverlappingPairsKernel) - clReleaseKernel(m_data->m_clearOverlappingPairsKernel); - delete m_data->m_raycaster; - delete m_data->m_solver; - delete m_data->m_allAabbsGPU; - delete m_data->m_gpuConstraints; - delete m_data->m_overlappingPairsGPU; - -#ifdef TEST_OTHER_GPU_SOLVER - delete m_data->m_solver3; -#endif //TEST_OTHER_GPU_SOLVER - - delete m_data->m_solver2; - - delete m_data; -} - -void b3GpuRigidBodyPipeline::reset() -{ - m_data->m_gpuConstraints->resize(0); - m_data->m_cpuConstraints.resize(0); - m_data->m_allAabbsGPU->resize(0); - m_data->m_allAabbsCPU.resize(0); -} - -void b3GpuRigidBodyPipeline::addConstraint(b3TypedConstraint* constraint) -{ - m_data->m_joints.push_back(constraint); -} - -void b3GpuRigidBodyPipeline::removeConstraint(b3TypedConstraint* constraint) -{ - m_data->m_joints.remove(constraint); -} - -void b3GpuRigidBodyPipeline::removeConstraintByUid(int uid) -{ - m_data->m_gpuSolver->recomputeBatches(); - //slow linear search - m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); - //remove - for (int i = 0; i < m_data->m_cpuConstraints.size(); i++) - { - if (m_data->m_cpuConstraints[i].m_uid == uid) - { - //m_data->m_cpuConstraints.remove(m_data->m_cpuConstraints[i]); - m_data->m_cpuConstraints.swap(i, m_data->m_cpuConstraints.size() - 1); - m_data->m_cpuConstraints.pop_back(); - - break; - } - } - - if (m_data->m_cpuConstraints.size()) - { - m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints); - } - else - { - m_data->m_gpuConstraints->resize(0); - } -} -int b3GpuRigidBodyPipeline::createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold) -{ - m_data->m_gpuSolver->recomputeBatches(); - b3GpuGenericConstraint c; - c.m_uid = m_data->m_constraintUid; - m_data->m_constraintUid++; - c.m_flags = B3_CONSTRAINT_FLAG_ENABLED; - c.m_rbA = bodyA; - c.m_rbB = bodyB; - c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]); - c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]); - c.m_breakingImpulseThreshold = breakingThreshold; - c.m_constraintType = B3_GPU_POINT2POINT_CONSTRAINT_TYPE; - m_data->m_cpuConstraints.push_back(c); - return c.m_uid; -} -int b3GpuRigidBodyPipeline::createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold) -{ - m_data->m_gpuSolver->recomputeBatches(); - b3GpuGenericConstraint c; - c.m_uid = m_data->m_constraintUid; - m_data->m_constraintUid++; - c.m_flags = B3_CONSTRAINT_FLAG_ENABLED; - c.m_rbA = bodyA; - c.m_rbB = bodyB; - c.m_pivotInA.setValue(pivotInA[0], pivotInA[1], pivotInA[2]); - c.m_pivotInB.setValue(pivotInB[0], pivotInB[1], pivotInB[2]); - c.m_relTargetAB.setValue(relTargetAB[0], relTargetAB[1], relTargetAB[2], relTargetAB[3]); - c.m_breakingImpulseThreshold = breakingThreshold; - c.m_constraintType = B3_GPU_FIXED_CONSTRAINT_TYPE; - - m_data->m_cpuConstraints.push_back(c); - return c.m_uid; -} - -void b3GpuRigidBodyPipeline::stepSimulation(float deltaTime) -{ - //update worldspace AABBs from local AABB/worldtransform - { - B3_PROFILE("setupGpuAabbs"); - setupGpuAabbsFull(); - } - - int numPairs = 0; - - //compute overlapping pairs - { - if (gUseDbvt) - { - { - B3_PROFILE("setAabb"); - m_data->m_allAabbsGPU->copyToHost(m_data->m_allAabbsCPU); - for (int i = 0; i < m_data->m_allAabbsCPU.size(); i++) - { - b3Vector3 aabbMin = b3MakeVector3(m_data->m_allAabbsCPU[i].m_min[0], m_data->m_allAabbsCPU[i].m_min[1], m_data->m_allAabbsCPU[i].m_min[2]); - b3Vector3 aabbMax = b3MakeVector3(m_data->m_allAabbsCPU[i].m_max[0], m_data->m_allAabbsCPU[i].m_max[1], m_data->m_allAabbsCPU[i].m_max[2]); - m_data->m_broadphaseDbvt->setAabb(i, aabbMin, aabbMax, 0); - } - } - - { - B3_PROFILE("calculateOverlappingPairs"); - m_data->m_broadphaseDbvt->calculateOverlappingPairs(); - } - numPairs = m_data->m_broadphaseDbvt->getOverlappingPairCache()->getNumOverlappingPairs(); - } - else - { - if (gUseCalculateOverlappingPairsHost) - { - m_data->m_broadphaseSap->calculateOverlappingPairsHost(m_data->m_config.m_maxBroadphasePairs); - } - else - { - m_data->m_broadphaseSap->calculateOverlappingPairs(m_data->m_config.m_maxBroadphasePairs); - } - numPairs = m_data->m_broadphaseSap->getNumOverlap(); - } - } - - //compute contact points - // printf("numPairs=%d\n",numPairs); - - int numContacts = 0; - - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - - if (numPairs) - { - cl_mem pairs = 0; - cl_mem aabbsWS = 0; - if (gUseDbvt) - { - B3_PROFILE("m_overlappingPairsGPU->copyFromHost"); - m_data->m_overlappingPairsGPU->copyFromHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); - pairs = m_data->m_overlappingPairsGPU->getBufferCL(); - aabbsWS = m_data->m_allAabbsGPU->getBufferCL(); - } - else - { - pairs = m_data->m_broadphaseSap->getOverlappingPairBuffer(); - aabbsWS = m_data->m_broadphaseSap->getAabbBufferWS(); - } - - m_data->m_overlappingPairsGPU->resize(numPairs); - - //mark the contacts for each pair as 'unused' - if (numPairs) - { - b3OpenCLArray<b3BroadphasePair> gpuPairs(this->m_data->m_context, m_data->m_queue); - gpuPairs.setFromOpenCLBuffer(pairs, numPairs); - - if (gClearPairsOnGpu) - { - //b3AlignedObjectArray<b3BroadphasePair> hostPairs;//just for debugging - //gpuPairs.copyToHost(hostPairs); - - b3LauncherCL launcher(m_data->m_queue, m_data->m_clearOverlappingPairsKernel, "clearOverlappingPairsKernel"); - launcher.setBuffer(pairs); - launcher.setConst(numPairs); - launcher.launch1D(numPairs); - - //gpuPairs.copyToHost(hostPairs); - } - else - { - b3AlignedObjectArray<b3BroadphasePair> hostPairs; - gpuPairs.copyToHost(hostPairs); - - for (int i = 0; i < hostPairs.size(); i++) - { - hostPairs[i].z = 0xffffffff; - } - - gpuPairs.copyFromHost(hostPairs); - } - } - - m_data->m_narrowphase->computeContacts(pairs, numPairs, aabbsWS, numBodies); - numContacts = m_data->m_narrowphase->getNumContactsGpu(); - - if (gUseDbvt) - { - ///store the cached information (contact locations in the 'z' component) - B3_PROFILE("m_overlappingPairsGPU->copyToHost"); - m_data->m_overlappingPairsGPU->copyToHost(m_data->m_broadphaseDbvt->getOverlappingPairCache()->getOverlappingPairArray()); - } - if (gDumpContactStats && numContacts) - { - m_data->m_narrowphase->getContactsGpu(); - - printf("numContacts = %d\n", numContacts); - - int totalPoints = 0; - const b3Contact4* contacts = m_data->m_narrowphase->getContactsCPU(); - - for (int i = 0; i < numContacts; i++) - { - totalPoints += contacts->getNPoints(); - } - printf("totalPoints=%d\n", totalPoints); - } - } - - //convert contact points to contact constraints - - //solve constraints - - b3OpenCLArray<b3RigidBodyData> gpuBodies(m_data->m_context, m_data->m_queue, 0, true); - gpuBodies.setFromOpenCLBuffer(m_data->m_narrowphase->getBodiesGpu(), m_data->m_narrowphase->getNumRigidBodies()); - b3OpenCLArray<b3InertiaData> gpuInertias(m_data->m_context, m_data->m_queue, 0, true); - gpuInertias.setFromOpenCLBuffer(m_data->m_narrowphase->getBodyInertiasGpu(), m_data->m_narrowphase->getNumRigidBodies()); - b3OpenCLArray<b3Contact4> gpuContacts(m_data->m_context, m_data->m_queue, 0, true); - gpuContacts.setFromOpenCLBuffer(m_data->m_narrowphase->getContactsGpu(), m_data->m_narrowphase->getNumContactsGpu()); - - int numJoints = m_data->m_joints.size() ? m_data->m_joints.size() : m_data->m_cpuConstraints.size(); - if (useBullet2CpuSolver && numJoints) - { - // b3AlignedObjectArray<b3Contact4> hostContacts; - //gpuContacts.copyToHost(hostContacts); - { - bool useGpu = m_data->m_joints.size() == 0; - - // b3Contact4* contacts = numContacts? &hostContacts[0]: 0; - //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,contacts,numJoints, joints); - if (useGpu) - { - m_data->m_gpuSolver->solveJoints(m_data->m_narrowphase->getNumRigidBodies(), &gpuBodies, &gpuInertias, numJoints, m_data->m_gpuConstraints); - } - else - { - b3AlignedObjectArray<b3RigidBodyData> hostBodies; - gpuBodies.copyToHost(hostBodies); - b3AlignedObjectArray<b3InertiaData> hostInertias; - gpuInertias.copyToHost(hostInertias); - - b3TypedConstraint** joints = numJoints ? &m_data->m_joints[0] : 0; - m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumRigidBodies(), &hostBodies[0], &hostInertias[0], 0, 0, numJoints, joints); - gpuBodies.copyFromHost(hostBodies); - } - } - } - - if (numContacts) - { -#ifdef TEST_OTHER_GPU_SOLVER - - if (gUseJacobi) - { - bool useGpu = true; - if (useGpu) - { - bool forceHost = false; - if (forceHost) - { - b3AlignedObjectArray<b3RigidBodyData> hostBodies; - b3AlignedObjectArray<b3InertiaData> hostInertias; - b3AlignedObjectArray<b3Contact4> hostContacts; - - { - B3_PROFILE("copyToHost"); - gpuBodies.copyToHost(hostBodies); - gpuInertias.copyToHost(hostInertias); - gpuContacts.copyToHost(hostContacts); - } - - { - b3JacobiSolverInfo solverInfo; - m_data->m_solver3->solveGroupHost(&hostBodies[0], &hostInertias[0], hostBodies.size(), &hostContacts[0], hostContacts.size(), solverInfo); - } - { - B3_PROFILE("copyFromHost"); - gpuBodies.copyFromHost(hostBodies); - } - } - else - - { - int static0Index = m_data->m_narrowphase->getStatic0Index(); - b3JacobiSolverInfo solverInfo; - //m_data->m_solver3->solveContacts( >solveGroup(&gpuBodies, &gpuInertias, &gpuContacts,solverInfo); - //m_data->m_solver3->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); - m_data->m_solver3->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index); - } - } - else - { - b3AlignedObjectArray<b3RigidBodyData> hostBodies; - gpuBodies.copyToHost(hostBodies); - b3AlignedObjectArray<b3InertiaData> hostInertias; - gpuInertias.copyToHost(hostInertias); - b3AlignedObjectArray<b3Contact4> hostContacts; - gpuContacts.copyToHost(hostContacts); - { - //m_data->m_solver->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(),&hostBodies[0],&hostInertias[0],numContacts,&hostContacts[0]); - } - gpuBodies.copyFromHost(hostBodies); - } - } - else -#endif //TEST_OTHER_GPU_SOLVER - { - int static0Index = m_data->m_narrowphase->getStatic0Index(); - m_data->m_solver2->solveContacts(numBodies, gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL(), m_data->m_config, static0Index); - - //m_data->m_solver4->solveContacts(m_data->m_narrowphase->getNumBodiesGpu(), gpuBodies.getBufferCL(), gpuInertias.getBufferCL(), numContacts, gpuContacts.getBufferCL()); - - /*m_data->m_solver3->solveContactConstraintHost( - (b3OpenCLArray<RigidBodyBase::Body>*)&gpuBodies, - (b3OpenCLArray<RigidBodyBase::Inertia>*)&gpuInertias, - (b3OpenCLArray<Constraint4>*) &gpuContacts, - 0,numContacts,256); - */ - } - } - - integrate(deltaTime); -} - -void b3GpuRigidBodyPipeline::integrate(float timeStep) -{ - //integrate - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - float angularDamp = 0.99f; - - if (gIntegrateOnCpu) - { - if (numBodies) - { - b3GpuNarrowPhaseInternalData* npData = m_data->m_narrowphase->getInternalData(); - npData->m_bodyBufferGPU->copyToHost(*npData->m_bodyBufferCPU); - - b3RigidBodyData_t* bodies = &npData->m_bodyBufferCPU->at(0); - - for (int nodeID = 0; nodeID < numBodies; nodeID++) - { - integrateSingleTransform(bodies, nodeID, timeStep, angularDamp, m_data->m_gravity); - } - npData->m_bodyBufferGPU->copyFromHost(*npData->m_bodyBufferCPU); - } - } - else - { - b3LauncherCL launcher(m_data->m_queue, m_data->m_integrateTransformsKernel, "m_integrateTransformsKernel"); - launcher.setBuffer(m_data->m_narrowphase->getBodiesGpu()); - - launcher.setConst(numBodies); - launcher.setConst(timeStep); - launcher.setConst(angularDamp); - launcher.setConst(m_data->m_gravity); - launcher.launch1D(numBodies); - } -} - -void b3GpuRigidBodyPipeline::setupGpuAabbsFull() -{ - cl_int ciErrNum = 0; - - int numBodies = m_data->m_narrowphase->getNumRigidBodies(); - if (!numBodies) - return; - - if (gCalcWorldSpaceAabbOnCpu) - { - if (numBodies) - { - if (gUseDbvt) - { - m_data->m_allAabbsCPU.resize(numBodies); - m_data->m_narrowphase->readbackAllBodiesToCpu(); - for (int i = 0; i < numBodies; i++) - { - b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_allAabbsCPU[0]); - } - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - } - else - { - m_data->m_broadphaseSap->getAllAabbsCPU().resize(numBodies); - m_data->m_narrowphase->readbackAllBodiesToCpu(); - for (int i = 0; i < numBodies; i++) - { - b3ComputeWorldAabb(i, m_data->m_narrowphase->getBodiesCpu(), m_data->m_narrowphase->getCollidablesCpu(), m_data->m_narrowphase->getLocalSpaceAabbsCpu(), &m_data->m_broadphaseSap->getAllAabbsCPU()[0]); - } - m_data->m_broadphaseSap->getAllAabbsGPU().copyFromHost(m_data->m_broadphaseSap->getAllAabbsCPU()); - //m_data->m_broadphaseSap->writeAabbsToGpu(); - } - } - } - else - { - //__kernel void initializeGpuAabbsFull( const int numNodes, __global Body* gBodies,__global Collidable* collidables, __global b3AABBCL* plocalShapeAABB, __global b3AABBCL* pAABB) - b3LauncherCL launcher(m_data->m_queue, m_data->m_updateAabbsKernel, "m_updateAabbsKernel"); - launcher.setConst(numBodies); - cl_mem bodies = m_data->m_narrowphase->getBodiesGpu(); - launcher.setBuffer(bodies); - cl_mem collidables = m_data->m_narrowphase->getCollidablesGpu(); - launcher.setBuffer(collidables); - cl_mem localAabbs = m_data->m_narrowphase->getAabbLocalSpaceBufferGpu(); - launcher.setBuffer(localAabbs); - - cl_mem worldAabbs = 0; - if (gUseDbvt) - { - worldAabbs = m_data->m_allAabbsGPU->getBufferCL(); - } - else - { - worldAabbs = m_data->m_broadphaseSap->getAabbBufferWS(); - } - launcher.setBuffer(worldAabbs); - launcher.launch1D(numBodies); - - oclCHECKERROR(ciErrNum, CL_SUCCESS); - } - - /* - b3AlignedObjectArray<b3SapAabb> aabbs; - m_data->m_broadphaseSap->m_allAabbsGPU.copyToHost(aabbs); - - printf("numAabbs = %d\n", aabbs.size()); - - for (int i=0;i<aabbs.size();i++) - { - printf("aabb[%d].m_min=%f,%f,%f,%d\n",i,aabbs[i].m_minVec[0],aabbs[i].m_minVec[1],aabbs[i].m_minVec[2],aabbs[i].m_minIndices[3]); - printf("aabb[%d].m_max=%f,%f,%f,%d\n",i,aabbs[i].m_maxVec[0],aabbs[i].m_maxVec[1],aabbs[i].m_maxVec[2],aabbs[i].m_signedMaxIndices[3]); - - }; - */ -} - -cl_mem b3GpuRigidBodyPipeline::getBodyBuffer() -{ - return m_data->m_narrowphase->getBodiesGpu(); -} - -int b3GpuRigidBodyPipeline::getNumBodies() const -{ - return m_data->m_narrowphase->getNumRigidBodies(); -} - -void b3GpuRigidBodyPipeline::setGravity(const float* grav) -{ - m_data->m_gravity.setValue(grav[0], grav[1], grav[2]); -} - -void b3GpuRigidBodyPipeline::copyConstraintsToHost() -{ - m_data->m_gpuConstraints->copyToHost(m_data->m_cpuConstraints); -} - -void b3GpuRigidBodyPipeline::writeAllInstancesToGpu() -{ - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - m_data->m_gpuConstraints->copyFromHost(m_data->m_cpuConstraints); -} - -int b3GpuRigidBodyPipeline::registerPhysicsInstance(float mass, const float* position, const float* orientation, int collidableIndex, int userIndex, bool writeInstanceToGpu) -{ - b3Vector3 aabbMin = b3MakeVector3(0, 0, 0), aabbMax = b3MakeVector3(0, 0, 0); - - if (collidableIndex >= 0) - { - b3SapAabb localAabb = m_data->m_narrowphase->getLocalSpaceAabb(collidableIndex); - b3Vector3 localAabbMin = b3MakeVector3(localAabb.m_min[0], localAabb.m_min[1], localAabb.m_min[2]); - b3Vector3 localAabbMax = b3MakeVector3(localAabb.m_max[0], localAabb.m_max[1], localAabb.m_max[2]); - - b3Scalar margin = 0.01f; - b3Transform t; - t.setIdentity(); - t.setOrigin(b3MakeVector3(position[0], position[1], position[2])); - t.setRotation(b3Quaternion(orientation[0], orientation[1], orientation[2], orientation[3])); - b3TransformAabb(localAabbMin, localAabbMax, margin, t, aabbMin, aabbMax); - } - else - { - b3Error("registerPhysicsInstance using invalid collidableIndex\n"); - return -1; - } - - bool writeToGpu = false; - int bodyIndex = m_data->m_narrowphase->getNumRigidBodies(); - bodyIndex = m_data->m_narrowphase->registerRigidBody(collidableIndex, mass, position, orientation, &aabbMin.getX(), &aabbMax.getX(), writeToGpu); - - if (bodyIndex >= 0) - { - if (gUseDbvt) - { - m_data->m_broadphaseDbvt->createProxy(aabbMin, aabbMax, bodyIndex, 0, 1, 1); - b3SapAabb aabb; - for (int i = 0; i < 3; i++) - { - aabb.m_min[i] = aabbMin[i]; - aabb.m_max[i] = aabbMax[i]; - aabb.m_minIndices[3] = bodyIndex; - } - m_data->m_allAabbsCPU.push_back(aabb); - if (writeInstanceToGpu) - { - m_data->m_allAabbsGPU->copyFromHost(m_data->m_allAabbsCPU); - } - } - else - { - if (mass) - { - m_data->m_broadphaseSap->createProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher); - } - else - { - m_data->m_broadphaseSap->createLargeProxy(aabbMin, aabbMax, bodyIndex, 1, 1); //m_dispatcher); - } - } - } - - /* - if (mass>0.f) - m_numDynamicPhysicsInstances++; - - m_numPhysicsInstances++; - */ - - return bodyIndex; -} - -void b3GpuRigidBodyPipeline::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults) -{ - this->m_data->m_raycaster->castRays(rays, hitResults, - getNumBodies(), this->m_data->m_narrowphase->getBodiesCpu(), - m_data->m_narrowphase->getNumCollidablesGpu(), m_data->m_narrowphase->getCollidablesCpu(), - m_data->m_narrowphase->getInternalData(), m_data->m_broadphaseSap); -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h deleted file mode 100644 index 0e5c6fec12..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipeline.h +++ /dev/null @@ -1,70 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_RIGIDBODY_PIPELINE_H -#define B3_GPU_RIGIDBODY_PIPELINE_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - -#include "Bullet3Common/b3AlignedObjectArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" - -class b3GpuRigidBodyPipeline -{ -protected: - struct b3GpuRigidBodyPipelineInternalData* m_data; - - int allocateCollidable(); - -public: - b3GpuRigidBodyPipeline(cl_context ctx, cl_device_id device, cl_command_queue q, class b3GpuNarrowPhase* narrowphase, class b3GpuBroadphaseInterface* broadphaseSap, struct b3DynamicBvhBroadphase* broadphaseDbvt, const b3Config& config); - virtual ~b3GpuRigidBodyPipeline(); - - void stepSimulation(float deltaTime); - void integrate(float timeStep); - void setupGpuAabbsFull(); - - int registerConvexPolyhedron(class b3ConvexUtility* convex); - - //int registerConvexPolyhedron(const float* vertices, int strideInBytes, int numVertices, const float* scaling); - //int registerSphereShape(float radius); - //int registerPlaneShape(const b3Vector3& planeNormal, float planeConstant); - - //int registerConcaveMesh(b3AlignedObjectArray<b3Vector3>* vertices, b3AlignedObjectArray<int>* indices, const float* scaling); - //int registerCompoundShape(b3AlignedObjectArray<b3GpuChildShape>* childShapes); - - int registerPhysicsInstance(float mass, const float* position, const float* orientation, int collisionShapeIndex, int userData, bool writeInstanceToGpu); - //if you passed "writeInstanceToGpu" false in the registerPhysicsInstance method (for performance) you need to call writeAllInstancesToGpu after all instances are registered - void writeAllInstancesToGpu(); - void copyConstraintsToHost(); - void setGravity(const float* grav); - void reset(); - - int createPoint2PointConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, float breakingThreshold); - int createFixedConstraint(int bodyA, int bodyB, const float* pivotInA, const float* pivotInB, const float* relTargetAB, float breakingThreshold); - void removeConstraintByUid(int uid); - - void addConstraint(class b3TypedConstraint* constraint); - void removeConstraint(b3TypedConstraint* constraint); - - void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults); - - cl_mem getBodyBuffer(); - - int getNumBodies() const; -}; - -#endif //B3_GPU_RIGIDBODY_PIPELINE_H
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h deleted file mode 100644 index e0a26fda17..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuRigidBodyPipelineInternalData.h +++ /dev/null @@ -1,68 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H -#define B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H - -#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" - -#include "Bullet3OpenCL/BroadphaseCollision/b3SapAabb.h" -#include "Bullet3Dynamics/ConstraintSolver/b3TypedConstraint.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Config.h" - -#include "Bullet3Collision/BroadPhaseCollision/b3OverlappingPair.h" -#include "Bullet3OpenCL/RigidBody/b3GpuGenericConstraint.h" - -struct b3GpuRigidBodyPipelineInternalData -{ - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - cl_kernel m_integrateTransformsKernel; - cl_kernel m_updateAabbsKernel; - cl_kernel m_clearOverlappingPairsKernel; - - class b3PgsJacobiSolver* m_solver; - - class b3GpuPgsConstraintSolver* m_gpuSolver; - - class b3GpuPgsContactSolver* m_solver2; - class b3GpuJacobiContactSolver* m_solver3; - class b3GpuRaycast* m_raycaster; - - class b3GpuBroadphaseInterface* m_broadphaseSap; - - struct b3DynamicBvhBroadphase* m_broadphaseDbvt; - b3OpenCLArray<b3SapAabb>* m_allAabbsGPU; - b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; - b3OpenCLArray<b3BroadphasePair>* m_overlappingPairsGPU; - - b3OpenCLArray<b3GpuGenericConstraint>* m_gpuConstraints; - b3AlignedObjectArray<b3GpuGenericConstraint> m_cpuConstraints; - - b3AlignedObjectArray<b3TypedConstraint*> m_joints; - int m_constraintUid; - class b3GpuNarrowPhase* m_narrowphase; - b3Vector3 m_gravity; - - b3Config m_config; -}; - -#endif //B3_GPU_RIGIDBODY_PIPELINE_INTERNAL_DATA_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h deleted file mode 100644 index db815d9b31..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverBody.h +++ /dev/null @@ -1,210 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#ifndef B3_GPU_SOLVER_BODY_H -#define B3_GPU_SOLVER_BODY_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" - -#include "Bullet3Common/b3AlignedAllocator.h" -#include "Bullet3Common/b3TransformUtil.h" - -///Until we get other contributions, only use SIMD on Windows, when using Visual Studio 2008 or later, and not double precision -#ifdef B3_USE_SSE -#define USE_SIMD 1 -#endif // - -///The b3SolverBody is an internal datastructure for the constraint solver. Only necessary data is packed to increase cache coherence/performance. -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuSolverBody -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - // b3Transform m_worldTransformUnused; - b3Vector3 m_deltaLinearVelocity; - b3Vector3 m_deltaAngularVelocity; - b3Vector3 m_angularFactor; - b3Vector3 m_linearFactor; - b3Vector3 m_invMass; - b3Vector3 m_pushVelocity; - b3Vector3 m_turnVelocity; - b3Vector3 m_linearVelocity; - b3Vector3 m_angularVelocity; - - union { - void* m_originalBody; - int m_originalBodyIndex; - }; - - int padding[3]; - - /* - void setWorldTransform(const b3Transform& worldTransform) - { - m_worldTransform = worldTransform; - } - - const b3Transform& getWorldTransform() const - { - return m_worldTransform; - } - */ - B3_FORCE_INLINE void getVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const - { - if (m_originalBody) - velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos); - else - velocity.setValue(0, 0, 0); - } - - B3_FORCE_INLINE void getAngularVelocity(b3Vector3 & angVel) const - { - if (m_originalBody) - angVel = m_angularVelocity + m_deltaAngularVelocity; - else - angVel.setValue(0, 0, 0); - } - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void applyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - B3_FORCE_INLINE void internalApplyPushImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, b3Scalar impulseMagnitude) - { - if (m_originalBody) - { - m_pushVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_turnVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - const b3Vector3& getDeltaLinearVelocity() const - { - return m_deltaLinearVelocity; - } - - const b3Vector3& getDeltaAngularVelocity() const - { - return m_deltaAngularVelocity; - } - - const b3Vector3& getPushVelocity() const - { - return m_pushVelocity; - } - - const b3Vector3& getTurnVelocity() const - { - return m_turnVelocity; - } - - //////////////////////////////////////////////// - ///some internal methods, don't use them - - b3Vector3& internalGetDeltaLinearVelocity() - { - return m_deltaLinearVelocity; - } - - b3Vector3& internalGetDeltaAngularVelocity() - { - return m_deltaAngularVelocity; - } - - const b3Vector3& internalGetAngularFactor() const - { - return m_angularFactor; - } - - const b3Vector3& internalGetInvMass() const - { - return m_invMass; - } - - void internalSetInvMass(const b3Vector3& invMass) - { - m_invMass = invMass; - } - - b3Vector3& internalGetPushVelocity() - { - return m_pushVelocity; - } - - b3Vector3& internalGetTurnVelocity() - { - return m_turnVelocity; - } - - B3_FORCE_INLINE void internalGetVelocityInLocalPointObsolete(const b3Vector3& rel_pos, b3Vector3& velocity) const - { - velocity = m_linearVelocity + m_deltaLinearVelocity + (m_angularVelocity + m_deltaAngularVelocity).cross(rel_pos); - } - - B3_FORCE_INLINE void internalGetAngularVelocity(b3Vector3 & angVel) const - { - angVel = m_angularVelocity + m_deltaAngularVelocity; - } - - //Optimization for the iterative solver: avoid calculating constant terms involving inertia, normal, relative position - B3_FORCE_INLINE void internalApplyImpulse(const b3Vector3& linearComponent, const b3Vector3& angularComponent, const b3Scalar impulseMagnitude) - { - //if (m_originalBody) - { - m_deltaLinearVelocity += linearComponent * impulseMagnitude * m_linearFactor; - m_deltaAngularVelocity += angularComponent * (impulseMagnitude * m_angularFactor); - } - } - - void writebackVelocity() - { - //if (m_originalBody>=0) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //m_originalBody->setCompanionId(-1); - } - } - - void writebackVelocityAndTransform(b3Scalar timeStep, b3Scalar splitImpulseTurnErp) - { - (void)timeStep; - if (m_originalBody) - { - m_linearVelocity += m_deltaLinearVelocity; - m_angularVelocity += m_deltaAngularVelocity; - - //correct the position/orientation based on push/turn recovery - b3Transform newTransform; - if (m_pushVelocity[0] != 0.f || m_pushVelocity[1] != 0 || m_pushVelocity[2] != 0 || m_turnVelocity[0] != 0.f || m_turnVelocity[1] != 0 || m_turnVelocity[2] != 0) - { - // b3Quaternion orn = m_worldTransform.getRotation(); - // b3TransformUtil::integrateTransform(m_worldTransform,m_pushVelocity,m_turnVelocity*splitImpulseTurnErp,timeStep,newTransform); - // m_worldTransform = newTransform; - } - //m_worldTransform.setRotation(orn); - //m_originalBody->setCompanionId(-1); - } - } -}; - -#endif //B3_SOLVER_BODY_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h deleted file mode 100644 index 7d9eea243a..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3GpuSolverConstraint.h +++ /dev/null @@ -1,73 +0,0 @@ -/* -Bullet Continuous Collision Detection and Physics Library -Copyright (c) 2013 Erwin Coumans http://github.com/erwincoumans/bullet3 - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ - -#ifndef B3_GPU_SOLVER_CONSTRAINT_H -#define B3_GPU_SOLVER_CONSTRAINT_H - -#include "Bullet3Common/b3Vector3.h" -#include "Bullet3Common/b3Matrix3x3.h" -//#include "b3JacobianEntry.h" -#include "Bullet3Common/b3AlignedObjectArray.h" - -//#define NO_FRICTION_TANGENTIALS 1 - -///1D constraint along a normal axis between bodyA and bodyB. It can be combined to solve contact and friction constraints. -B3_ATTRIBUTE_ALIGNED16(struct) -b3GpuSolverConstraint -{ - B3_DECLARE_ALIGNED_ALLOCATOR(); - - b3Vector3 m_relpos1CrossNormal; - b3Vector3 m_contactNormal; - - b3Vector3 m_relpos2CrossNormal; - //b3Vector3 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - b3Vector3 m_angularComponentA; - b3Vector3 m_angularComponentB; - - mutable b3Scalar m_appliedPushImpulse; - mutable b3Scalar m_appliedImpulse; - int m_padding1; - int m_padding2; - b3Scalar m_friction; - b3Scalar m_jacDiagABInv; - b3Scalar m_rhs; - b3Scalar m_cfm; - - b3Scalar m_lowerLimit; - b3Scalar m_upperLimit; - b3Scalar m_rhsPenetration; - union { - void* m_originalContactPoint; - int m_originalConstraintIndex; - b3Scalar m_unusedPadding4; - }; - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - - enum b3SolverConstraintType - { - B3_SOLVER_CONTACT_1D = 0, - B3_SOLVER_FRICTION_1D - }; -}; - -typedef b3AlignedObjectArray<b3GpuSolverConstraint> b3GpuConstraintArray; - -#endif //B3_GPU_SOLVER_CONSTRAINT_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp deleted file mode 100644 index ccf67da1a8..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ /dev/null @@ -1,1128 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "b3Solver.h" - -///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments -bool useNewBatchingKernel = true; -bool gConvertConstraintOnCpu = false; - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" - -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" - -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Common/b3Vector3.h" - -struct SolverDebugInfo -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - int m_valInt4; - int m_valInt5; - int m_valInt6; - int m_valInt7; - - int m_valInt8; - int m_valInt9; - int m_valInt10; - int m_valInt11; - - int m_valInt12; - int m_valInt13; - int m_valInt14; - int m_valInt15; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -}; - -class SolverDeviceInl -{ -public: - struct ParallelSolveData - { - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - }; -}; - -b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : m_context(ctx), - m_device(device), - m_queue(queue), - m_batchSizes(ctx, queue), - m_nIterations(4) -{ - m_sort32 = new b3RadixSort32CL(ctx, device, queue); - m_scan = new b3PrefixScanCL(ctx, device, queue, B3_SOLVER_N_CELLS); - m_search = new b3BoundSearchCL(ctx, device, queue, B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512); - - m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx, queue, sortSize); - m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx, queue); - - m_numConstraints = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS); - m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_offsets = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS); - m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; - // const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - { - cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - b3Assert(solverSetup2Prog); - - cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_solveFrictionKernel); - - m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_solveContactKernel); - - m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros); - b3Assert(m_contactToConstraintKernel); - - m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_setSortDataKernel); - - m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_reorderContactKernel); - - m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_copyConstraintKernel); - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH); - //cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true); - b3Assert(batchingProg); - - m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros); - b3Assert(m_batchingKernel); - } - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros); - //m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros ); - b3Assert(m_batchingKernelNew); - } -} - -b3Solver::~b3Solver() -{ - delete m_offsets; - delete m_numConstraints; - delete m_sortDataBuffer; - delete m_contactBuffer2; - - delete m_sort32; - delete m_scan; - delete m_search; - - clReleaseKernel(m_batchingKernel); - clReleaseKernel(m_batchingKernelNew); - - clReleaseKernel(m_solveContactKernel); - clReleaseKernel(m_solveFrictionKernel); - - clReleaseKernel(m_contactToConstraintKernel); - clReleaseKernel(m_setSortDataKernel); - clReleaseKernel(m_reorderContactKernel); - clReleaseKernel(m_copyConstraintKernel); -} - -template <bool JACOBI> -static __inline void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - b3Vector3 dLinVelA; - dLinVelA.setZero(); - b3Vector3 dAngVelA; - dAngVelA.setZero(); - b3Vector3 dLinVelB; - dLinVelB.setZero(); - b3Vector3 dAngVelB; - dAngVelB.setZero(); - - for (int ic = 0; ic < 4; ic++) - { - // dont necessary because this makes change to 0 - if (cs.m_jacCoeffInv[ic] == 0.f) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, &linear, &angular0, &angular1); - - float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB) + - cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[ic]); - updated = b3Min(updated, maxRambdaDt[ic]); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if (JACOBI) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - else - { - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - } - } - - if (JACOBI) - { - linVelA += dLinVelA; - angVelA += dAngVelA; - linVelB += dLinVelB; - angVelB += dAngVelB; - } -} - -static __inline void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1(n, tangent[0], tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0] - center; - tangent[0] = cross3(n, r); - tangent[1] = cross3(tangent[0], n); - tangent[0] = normalize3(tangent[0]); - tangent[1] = normalize3(tangent[1]); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for (int i = 0; i < 2; i++) - { - setLinearAndAngular(tangent[i], r0, r1, &linear, &angular0, &angular1); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[i]); - updated = b3Min(updated, maxRambdaDt[i]); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - - { // angular damping for point constraint - b3Vector3 ab = (posB - posA).normalized(); - b3Vector3 ac = (center - posA).normalized(); - if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot(n, angVelA); - float angNB = b3Dot(n, angVelB); - - angVelA -= (angNA * 0.1f) * n; - angVelB -= (angNB * 0.1f) * n; - } - } -} -/* - b3AlignedObjectArray<b3RigidBodyData>& m_bodies; - b3AlignedObjectArray<b3InertiaData>& m_shapes; - b3AlignedObjectArray<b3GpuConstraint4>& m_constraints; - b3AlignedObjectArray<int>* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; - */ - -struct SolveTask // : public ThreadPool::Task -{ - SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints, - int start, int nConstraints, int maxNumBatches, b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex) - : m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_batchSizes(batchSizes), m_cellIndex(cellIndex), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches) - { - } - - unsigned short int getType() { return 0; } - - void run(int tIdx) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = m_batchSizes->at(m_cellIndex * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - for (int jj = 0; jj < numInBatch; jj++) - { - int i = m_start + offset + jj; - int batchId = m_constraints[i].m_batchIdx; - b3Assert(batchId == ii); - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; - // int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if (!m_solveFriction) - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - solveContact<false>(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt); - } - else - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - float sum = 0; - for (int j = 0; j < 4; j++) - { - sum += m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for (int j = 0; j < 4; j++) - { - maxRambdaDt[j] = frictionCoeff * sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt); - } - } - offset += numInBatch; - } - /* for (int bb=0;bb<m_maxNumBatches;bb++) - { - //for(int ic=m_nConstraints-1; ic>=0; ic--) - for(int ic=0; ic<m_nConstraints; ic++) - { - - int i = m_start + ic; - if (m_constraints[i].m_batchIdx != bb) - continue; - - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; - int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if( !m_solveFriction ) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - } - else - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - - } - } - } - */ - } - - b3AlignedObjectArray<b3RigidBodyData>& m_bodies; - b3AlignedObjectArray<b3InertiaData>& m_shapes; - b3AlignedObjectArray<b3GpuConstraint4>& m_constraints; - b3AlignedObjectArray<int>* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; -}; - -void b3Solver::solveContactConstraintHost(b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray<int>* batchSizes) -{ -#if 0 - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; - for (int z=0;z<4;z++) - { - for (int y=0;y<4;y++) - { - for (int x=0;x<4;x++) - { - int newIndex = (x+y*nSplitX+z*nSplitX*nSplitY); - // printf("newIndex=%d\n",newIndex); - - int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - // printf("newIndex=%d\n",newIndex); - } - } - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) - { - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) - { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - - /*int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - */ - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - // printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - } - } - } -#endif - - b3AlignedObjectArray<b3RigidBodyData> bodyNative; - bodyBuf->copyToHost(bodyNative); - b3AlignedObjectArray<b3InertiaData> shapeNative; - shapeBuf->copyToHost(shapeNative); - b3AlignedObjectArray<b3GpuConstraint4> constraintNative; - constraint->copyToHost(constraintNative); - - b3AlignedObjectArray<unsigned int> numConstraintsHost; - m_numConstraints->copyToHost(numConstraintsHost); - - //printf("------------------------\n"); - b3AlignedObjectArray<unsigned int> offsetsHost; - m_offsets->copyToHost(offsetsHost); - static int frame = 0; - bool useBatches = true; - if (useBatches) - { - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - //printf("cell Batch %d\n",cellBatch); - b3AlignedObjectArray<int> usedBodies[B3_SOLVER_N_CELLS]; - for (int i = 0; i < B3_SOLVER_N_CELLS; i++) - { - usedBodies[i].resize(0); - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); - int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); - int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - - if (numConstraintsHost[cellIdx] == 0) - continue; - - //printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - //printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]); - if (zIdx) - { - //printf("?\n"); - } - - if (iter == 0) - { - //printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx); - //printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]); - } - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, usedBodies, wgIdx, batchSizes, cellIdx); - task.m_solveFriction = false; - task.run(0); - } - } - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); - int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); - int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - - if (numConstraintsHost[cellIdx] == 0) - continue; - - //printf("yIdx=%d\n",yIdx); - - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, 0, 0, batchSizes, cellIdx); - task.m_solveFriction = true; - task.run(0); - } - } - } - } - else - { - for (int iter = 0; iter < m_nIterations; iter++) - { - SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); - task.m_solveFriction = false; - task.run(0); - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); - task.m_solveFriction = true; - task.run(0); - } - } - - bodyBuf->copyFromHost(bodyNative); - shapeBuf->copyFromHost(shapeNative); - constraint->copyFromHost(constraintNative); - frame++; -} - -void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, - b3OpenCLArray<unsigned int>* m_numConstraints, - b3OpenCLArray<unsigned int>* m_offsets, - int batchId) -{ - // b3BufferInfoCL( m_numConstraints->getBufferCL() ), - // b3BufferInfoCL( m_offsets->getBufferCL() ) - - int cellBatch = batchId; - const int nn = B3_SOLVER_N_CELLS; - // int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; - - b3AlignedObjectArray<unsigned int> gN; - m_numConstraints->copyToHost(gN); - b3AlignedObjectArray<unsigned int> gOffsets; - m_offsets->copyToHost(gOffsets); - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - // int bIdx = batchId; - - b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints; - constraint->copyToHost(cpuConstraints); - - printf("batch = %d\n", batchId); - - int numWorkgroups = nn / B3_SOLVER_N_BATCHES; - b3AlignedObjectArray<int> usedBodies; - - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - printf("wgIdx = %d ", wgIdx); - - int zIdx = (wgIdx / ((nSplitX * nSplitY)) / 2) * 2 + ((cellBatch & 4) >> 2); - int remain = wgIdx % ((nSplitX * nSplitY)); - int yIdx = (remain % (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain / (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - printf("cellIdx=%d\n", cellIdx); - if (gN[cellIdx] == 0) - continue; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - for (int c = start; c < end; c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - //printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB); - if (usedBodies.findLinearSearch(constraint.m_bodyA) < usedBodies.size()) - { - printf("error?\n"); - } - if (usedBodies.findLinearSearch(constraint.m_bodyB) < usedBodies.size()) - { - printf("error?\n"); - } - } - - for (int c = start; c < end; c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - usedBodies.push_back(constraint.m_bodyA); - usedBodies.push_back(constraint.m_bodyB); - } - } -} - -static bool verify = false; - -void b3Solver::solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches) -{ - b3Int4 cdata = b3MakeInt4(n, 0, 0, 0); - { - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches; //250; - - int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); -#endif - - { - B3_PROFILE("m_batchSolveKernel iterations"); - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - if (verify) - { - checkConstraintBatch(bodyBuf, shapeBuf, constraint, m_numConstraints, m_offsets, ib); - } - -#ifdef DEBUG_ME - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - - cdata.z = ib; - - b3LauncherCL launcher(m_queue, m_solveContactKernel, "m_solveContactKernel"); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - launcher.launch1D(numWorkItems, 64); - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D(num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt2 > 0) - { - printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - } - } - - clFinish(m_queue); - } - - cdata.x = 1; - bool applyFriction = true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher(m_queue, m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - - launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64); - } - } - clFinish(m_queue); - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } -} - -void b3Solver::convertToConstraints(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg) -{ - // b3OpenCLArray<b3GpuConstraint4>* constraintNative =0; - contactCOut->resize(nContacts); - struct CB - { - int m_nContacts; - float m_dt; - float m_positionDrift; - float m_positionConstraintCoeff; - }; - - { - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_dt = cfg.m_dt; - cdata.m_positionDrift = cfg.m_positionDrift; - cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - - if (gConvertConstraintOnCpu) - { - b3AlignedObjectArray<b3RigidBodyData> gBodies; - bodyBuf->copyToHost(gBodies); - - b3AlignedObjectArray<b3Contact4> gContact; - contactsIn->copyToHost(gContact); - - b3AlignedObjectArray<b3InertiaData> gShapes; - shapeBuf->copyToHost(gShapes); - - b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut; - gConstraintOut.resize(nContacts); - - B3_PROFILE("cpu contactToConstraintKernel"); - for (int gIdx = 0; gIdx < nContacts; gIdx++) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Float4 posA = gBodies[aIdx].m_pos; - b3Float4 linVelA = gBodies[aIdx].m_linVel; - b3Float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; - - b3Float4 posB = gBodies[bIdx].m_pos; - b3Float4 linVelB = gBodies[bIdx].m_linVel; - b3Float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - - b3ContactConstraint4_t cs; - - setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff, - &cs); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = (b3GpuConstraint4&)cs; - } - - contactCOut->copyFromHost(gConstraintOut); - } - else - { - B3_PROFILE("gpu m_contactToConstraintKernel"); - - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(contactsIn->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(contactCOut->getBufferCL())}; - b3LauncherCL launcher(m_queue, m_contactToConstraintKernel, "m_contactToConstraintKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata ); - - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_dt); - launcher.setConst(cdata.m_positionDrift); - launcher.setConst(cdata.m_positionConstraintCoeff); - - launcher.launch1D(nContacts, 64); - clFinish(m_queue); - } - } -} - -/* -void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactsIn, void* additionalData, - int nContacts, const b3Solver::ConstraintCfg& cfg ) -{ - - - - const int sortAlignment = 512; // todo. get this out of sort - if( cfg.m_enableParallelSolve ) - { - - - int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); - - b3OpenCLArray<unsigned int>* countsNative = m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost ); - b3OpenCLArray<unsigned int>* offsetsNative = m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost ); - - { // 2. set cell idx - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; - }; - - b3Assert( sortSize%64 == 0 ); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = cfg.m_staticIdx; - cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent); - cdata.m_nSplit = B3_SOLVER_N_SPLIT; - - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_setSortDataKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( sortSize, 64 ); - } - - { // 3. sort by cell idx - int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - m_sort32->execute(*m_sortDataBuffer,sortSize); - } - { // 4. find entries - m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, b3BoundSearchCL::COUNT); - - m_scan->execute( *countsNative, *offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT ); - } - - { // 5. sort constraints by cellIdx - // todo. preallocate this -// b3Assert( contactsIn->getType() == TYPE_HOST ); -// b3OpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer - - { - - - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_reorderContactKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - } -// BufferUtils::unmap<true>( out, contactsIn, nContacts ); - } - } - - -} - -*/ -void b3Solver::batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx) -{ - int numWorkItems = 64 * B3_SOLVER_N_CELLS; - { - B3_PROFILE("batch generation"); - - b3Int4 cdata; - cdata.x = nContacts; - cdata.y = 0; - cdata.z = staticIdx; - -#ifdef BATCH_DEBUG - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - -#if 0 - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( contacts->getBufferCL() ), - b3BufferInfoCL( m_contactBuffer2->getBufferCL()), - b3BufferInfoCL( nNative->getBufferCL() ), - b3BufferInfoCL( offsetsNative->getBufferCL() ), -#ifdef BATCH_DEBUG - , b3BufferInfoCL(&gpuDebugInfo) -#endif - }; -#endif - - { - m_batchSizes.resize(nNative->size()); - B3_PROFILE("batchingKernel"); - //b3LauncherCL launcher( m_queue, m_batchingKernel); - cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; - - b3LauncherCL launcher(m_queue, k, "*batchingKernel"); - if (!useNewBatchingKernel) - { - launcher.setBuffer(contacts->getBufferCL()); - } - launcher.setBuffer(m_contactBuffer2->getBufferCL()); - launcher.setBuffer(nNative->getBufferCL()); - launcher.setBuffer(offsetsNative->getBufferCL()); - - launcher.setBuffer(m_batchSizes.getBufferCL()); - - //launcher.setConst( cdata ); - launcher.setConst(staticIdx); - - launcher.launch1D(numWorkItems, 64); - //clFinish(m_queue); - //b3AlignedObjectArray<int> batchSizesCPU; - //m_batchSizes.copyToHost(batchSizesCPU); - //printf(".\n"); - } - -#ifdef BATCH_DEBUG - aaaa - b3Contact4* hostContacts = new b3Contact4[nContacts]; - m_contactBuffer->read(hostContacts, nContacts); - clFinish(m_queue); - - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt1 > 0) - { - printf("catch\n"); - } - if (debugInfo[i].m_valInt2 > 0) - { - printf("catch22\n"); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("catch666\n"); - } - - if (debugInfo[i].m_valInt4 > 0) - { - printf("catch777\n"); - } - } - delete[] debugInfo; -#endif //BATCH_DEBUG - } - - // copy buffer to buffer - //b3Assert(m_contactBuffer->size()==nContacts); - //contacts->copyFromOpenCLArray( *m_contactBuffer); - //clFinish(m_queue);//needed? -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.h deleted file mode 100644 index ee63531d78..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#ifndef __ADL_SOLVER_H -#define __ADL_SOLVER_H - -#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" -#include "b3GpuConstraint4.h" - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" -#include "Bullet3Collision/NarrowPhaseCollision/b3Contact4.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanCL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" -#include "Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.h" - -#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" - -#define B3NEXTMULTIPLEOF(num, alignment) (((num) / (alignment) + (((num) % (alignment) == 0) ? 0 : 1)) * (alignment)) - -enum -{ - B3_SOLVER_N_SPLIT_X = 8, //16,//4, - B3_SOLVER_N_SPLIT_Y = 4, //16,//4, - B3_SOLVER_N_SPLIT_Z = 8, //, - B3_SOLVER_N_CELLS = B3_SOLVER_N_SPLIT_X * B3_SOLVER_N_SPLIT_Y * B3_SOLVER_N_SPLIT_Z, - B3_SOLVER_N_BATCHES = 8, //4,//8,//4, - B3_MAX_NUM_BATCHES = 128, -}; - -class b3SolverBase -{ -public: - struct ConstraintCfg - { - ConstraintCfg(float dt = 0.f) : m_positionDrift(0.005f), m_positionConstraintCoeff(0.2f), m_dt(dt), m_staticIdx(-1) {} - - float m_positionDrift; - float m_positionConstraintCoeff; - float m_dt; - bool m_enableParallelSolve; - float m_batchCellSize; - int m_staticIdx; - }; -}; - -class b3Solver : public b3SolverBase -{ -public: - cl_context m_context; - cl_device_id m_device; - cl_command_queue m_queue; - - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - b3OpenCLArray<int> m_batchSizes; - - int m_nIterations; - cl_kernel m_batchingKernel; - cl_kernel m_batchingKernelNew; - cl_kernel m_solveContactKernel; - cl_kernel m_solveFrictionKernel; - cl_kernel m_contactToConstraintKernel; - cl_kernel m_setSortDataKernel; - cl_kernel m_reorderContactKernel; - cl_kernel m_copyConstraintKernel; - - class b3RadixSort32CL* m_sort32; - class b3BoundSearchCL* m_search; - class b3PrefixScanCL* m_scan; - - b3OpenCLArray<b3SortData>* m_sortDataBuffer; - b3OpenCLArray<b3Contact4>* m_contactBuffer2; - - enum - { - DYNAMIC_CONTACT_ALLOCATION_THRESHOLD = 2000000, - }; - - b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity); - - virtual ~b3Solver(); - - void solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* inertiaBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches); - - void solveContactConstraintHost(b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray<int>* batchSizes); - - void convertToConstraints(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg); - - void batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* n, b3OpenCLArray<unsigned int>* offsets, int staticIdx); -}; - -#endif //__ADL_SOLVER_H diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl deleted file mode 100644 index 3b891b863d..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl +++ /dev/null @@ -1,353 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -#define WG_SIZE 64 - - - - - -typedef struct -{ - int m_n; - int m_start; - int m_staticIdx; - int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_a; - int m_b; - u32 m_idx; -}Elem; - -#define STACK_SIZE (WG_SIZE*10) -//#define STACK_SIZE (WG_SIZE) -#define RING_SIZE 1024 -#define RING_SIZE_MASK (RING_SIZE-1) -#define CHECK_SIZE (WG_SIZE) - - -#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd) -#define RING_END ldsTmp - -u32 readBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - return buff[bufIdx] & (1<<bitIdx); -} - -void writeBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; -// buff[bufIdx] |= (1<<bitIdx); - atom_or( &buff[bufIdx], (1<<bitIdx) ); -} - -u32 tryWrite(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) ); - return ((ans >> bitIdx)&1) == 0; -} - -// batching on the GPU -__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut, - __global const u32* gN, __global const u32* gStart, __global int* batchSizes, - int m_staticIdx ) -{ - __local u32 ldsStackIdx[STACK_SIZE]; - __local u32 ldsStackEnd; - __local Elem ldsRingElem[RING_SIZE]; - __local u32 ldsRingEnd; - __local u32 ldsTmp; - __local u32 ldsCheckBuffer[CHECK_SIZE]; - __local u32 ldsFixedBuffer[CHECK_SIZE]; - __local u32 ldsGEnd; - __local u32 ldsDstEnd; - - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - - const int m_n = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - - if( lIdx == 0 ) - { - ldsRingEnd = 0; - ldsGEnd = 0; - ldsStackEnd = 0; - ldsDstEnd = m_start; - } - - - -// while(1) -//was 250 - int ie=0; - int maxBatch = 0; - for(ie=0; ie<50; ie++) - { - ldsFixedBuffer[lIdx] = 0; - - for(int giter=0; giter<4; giter++) - { - int ringCap = GET_RING_CAPACITY; - - // 1. fill ring - if( ldsGEnd < m_n ) - { - while( ringCap > WG_SIZE ) - { - if( ldsGEnd >= m_n ) break; - if( lIdx < ringCap - WG_SIZE ) - { - int srcIdx; - AtomInc1( ldsGEnd, srcIdx ); - if( srcIdx < m_n ) - { - int dstIdx; - AtomInc1( ldsRingEnd, dstIdx ); - - int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit; - int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit; - ldsRingElem[dstIdx].m_a = (a>b)? b:a; - ldsRingElem[dstIdx].m_b = (a>b)? a:b; - ldsRingElem[dstIdx].m_idx = srcIdx; - } - } - ringCap = GET_RING_CAPACITY; - } - } - - GROUP_LDS_BARRIER; - - // 2. fill stack - __local Elem* dst = ldsRingElem; - if( lIdx == 0 ) RING_END = 0; - - int srcIdx=lIdx; - int end = ldsRingEnd; - - { - for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE) - { - Elem e; - if(srcIdx<end) e = ldsRingElem[srcIdx]; - bool done = (srcIdx<end)?false:true; - - for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0; - - if( !done ) - { - int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a)); - int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b)); - - if( aUsed==0 && bUsed==0 ) - { - int aAvailable=1; - int bAvailable=1; - int ea = abs(e.m_a); - int eb = abs(e.m_b); - - bool aStatic = (e.m_a<0) ||(ea==m_staticIdx); - bool bStatic = (e.m_b<0) ||(eb==m_staticIdx); - - if (!aStatic) - aAvailable = tryWrite( ldsCheckBuffer, ea ); - if (!bStatic) - bAvailable = tryWrite( ldsCheckBuffer, eb ); - - //aAvailable = aStatic? 1: aAvailable; - //bAvailable = bStatic? 1: bAvailable; - - bool success = (aAvailable && bAvailable); - if(success) - { - - if (!aStatic) - writeBuf( ldsFixedBuffer, ea ); - if (!bStatic) - writeBuf( ldsFixedBuffer, eb ); - } - done = success; - } - } - - // put it aside - if(srcIdx<end) - { - if( done ) - { - int dstIdx; AtomInc1( ldsStackEnd, dstIdx ); - if( dstIdx < STACK_SIZE ) - ldsStackIdx[dstIdx] = e.m_idx; - else{ - done = false; - AtomAdd( ldsStackEnd, -1 ); - } - } - if( !done ) - { - int dstIdx; AtomInc1( RING_END, dstIdx ); - dst[dstIdx] = e; - } - } - - // if filled, flush - if( ldsStackEnd == STACK_SIZE ) - { - for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE) - { - int idx = m_start + ldsStackIdx[i]; - int dstIdx; AtomInc1( ldsDstEnd, dstIdx ); - gConstraintsOut[ dstIdx ] = gConstraints[ idx ]; - gConstraintsOut[ dstIdx ].m_batchIdx = ie; - } - if( lIdx == 0 ) ldsStackEnd = 0; - - //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) - ldsFixedBuffer[lIdx] = 0; - } - } - } - - if( lIdx == 0 ) ldsRingEnd = RING_END; - } - - GROUP_LDS_BARRIER; - - for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE) - { - int idx = m_start + ldsStackIdx[i]; - int dstIdx; AtomInc1( ldsDstEnd, dstIdx ); - gConstraintsOut[ dstIdx ] = gConstraints[ idx ]; - gConstraintsOut[ dstIdx ].m_batchIdx = ie; - } - - // in case it couldn't consume any pair. Flush them - // todo. Serial batch worth while? - if( ldsStackEnd == 0 ) - { - for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE) - { - int idx = m_start + ldsRingElem[i].m_idx; - int dstIdx; AtomInc1( ldsDstEnd, dstIdx ); - gConstraintsOut[ dstIdx ] = gConstraints[ idx ]; - int curBatch = 100+i; - if (maxBatch < curBatch) - maxBatch = curBatch; - - gConstraintsOut[ dstIdx ].m_batchIdx = curBatch; - - } - GROUP_LDS_BARRIER; - if( lIdx == 0 ) ldsRingEnd = 0; - } - - if( lIdx == 0 ) ldsStackEnd = 0; - - GROUP_LDS_BARRIER; - - // termination - if( ldsGEnd == m_n && ldsRingEnd == 0 ) - break; - } - - if( lIdx == 0 ) - { - if (maxBatch < ie) - maxBatch=ie; - batchSizes[wgIdx]=maxBatch; - } - -} - - - - - - - - - - - - - - - - - - - - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h deleted file mode 100644 index 7c73c96baa..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ /dev/null @@ -1,387 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* batchingKernelsCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "#define WG_SIZE 64\n" - "typedef struct \n" - "{\n" - " int m_n;\n" - " int m_start;\n" - " int m_staticIdx;\n" - " int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct \n" - "{\n" - " int m_a;\n" - " int m_b;\n" - " u32 m_idx;\n" - "}Elem;\n" - "#define STACK_SIZE (WG_SIZE*10)\n" - "//#define STACK_SIZE (WG_SIZE)\n" - "#define RING_SIZE 1024\n" - "#define RING_SIZE_MASK (RING_SIZE-1)\n" - "#define CHECK_SIZE (WG_SIZE)\n" - "#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n" - "#define RING_END ldsTmp\n" - "u32 readBuf(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - " return buff[bufIdx] & (1<<bitIdx);\n" - "}\n" - "void writeBuf(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - "// buff[bufIdx] |= (1<<bitIdx);\n" - " atom_or( &buff[bufIdx], (1<<bitIdx) );\n" - "}\n" - "u32 tryWrite(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - " u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" - " return ((ans >> bitIdx)&1) == 0;\n" - "}\n" - "// batching on the GPU\n" - "__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n" - " __global const u32* gN, __global const u32* gStart, __global int* batchSizes, \n" - " int m_staticIdx )\n" - "{\n" - " __local u32 ldsStackIdx[STACK_SIZE];\n" - " __local u32 ldsStackEnd;\n" - " __local Elem ldsRingElem[RING_SIZE];\n" - " __local u32 ldsRingEnd;\n" - " __local u32 ldsTmp;\n" - " __local u32 ldsCheckBuffer[CHECK_SIZE];\n" - " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" - " __local u32 ldsGEnd;\n" - " __local u32 ldsDstEnd;\n" - " int wgIdx = GET_GROUP_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " \n" - " const int m_n = gN[wgIdx];\n" - " const int m_start = gStart[wgIdx];\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsRingEnd = 0;\n" - " ldsGEnd = 0;\n" - " ldsStackEnd = 0;\n" - " ldsDstEnd = m_start;\n" - " }\n" - " \n" - " \n" - " \n" - "// while(1)\n" - "//was 250\n" - " int ie=0;\n" - " int maxBatch = 0;\n" - " for(ie=0; ie<50; ie++)\n" - " {\n" - " ldsFixedBuffer[lIdx] = 0;\n" - " for(int giter=0; giter<4; giter++)\n" - " {\n" - " int ringCap = GET_RING_CAPACITY;\n" - " \n" - " // 1. fill ring\n" - " if( ldsGEnd < m_n )\n" - " {\n" - " while( ringCap > WG_SIZE )\n" - " {\n" - " if( ldsGEnd >= m_n ) break;\n" - " if( lIdx < ringCap - WG_SIZE )\n" - " {\n" - " int srcIdx;\n" - " AtomInc1( ldsGEnd, srcIdx );\n" - " if( srcIdx < m_n )\n" - " {\n" - " int dstIdx;\n" - " AtomInc1( ldsRingEnd, dstIdx );\n" - " \n" - " int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n" - " int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n" - " ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" - " ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" - " ldsRingElem[dstIdx].m_idx = srcIdx;\n" - " }\n" - " }\n" - " ringCap = GET_RING_CAPACITY;\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " // 2. fill stack\n" - " __local Elem* dst = ldsRingElem;\n" - " if( lIdx == 0 ) RING_END = 0;\n" - " int srcIdx=lIdx;\n" - " int end = ldsRingEnd;\n" - " {\n" - " for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n" - " {\n" - " Elem e;\n" - " if(srcIdx<end) e = ldsRingElem[srcIdx];\n" - " bool done = (srcIdx<end)?false:true;\n" - " for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n" - " \n" - " if( !done )\n" - " {\n" - " int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a));\n" - " int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b));\n" - " if( aUsed==0 && bUsed==0 )\n" - " {\n" - " int aAvailable=1;\n" - " int bAvailable=1;\n" - " int ea = abs(e.m_a);\n" - " int eb = abs(e.m_b);\n" - " bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n" - " bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n" - " \n" - " if (!aStatic)\n" - " aAvailable = tryWrite( ldsCheckBuffer, ea );\n" - " if (!bStatic)\n" - " bAvailable = tryWrite( ldsCheckBuffer, eb );\n" - " \n" - " //aAvailable = aStatic? 1: aAvailable;\n" - " //bAvailable = bStatic? 1: bAvailable;\n" - " bool success = (aAvailable && bAvailable);\n" - " if(success)\n" - " {\n" - " \n" - " if (!aStatic)\n" - " writeBuf( ldsFixedBuffer, ea );\n" - " if (!bStatic)\n" - " writeBuf( ldsFixedBuffer, eb );\n" - " }\n" - " done = success;\n" - " }\n" - " }\n" - " // put it aside\n" - " if(srcIdx<end)\n" - " {\n" - " if( done )\n" - " {\n" - " int dstIdx; AtomInc1( ldsStackEnd, dstIdx );\n" - " if( dstIdx < STACK_SIZE )\n" - " ldsStackIdx[dstIdx] = e.m_idx;\n" - " else{\n" - " done = false;\n" - " AtomAdd( ldsStackEnd, -1 );\n" - " }\n" - " }\n" - " if( !done )\n" - " {\n" - " int dstIdx; AtomInc1( RING_END, dstIdx );\n" - " dst[dstIdx] = e;\n" - " }\n" - " }\n" - " // if filled, flush\n" - " if( ldsStackEnd == STACK_SIZE )\n" - " {\n" - " for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)\n" - " {\n" - " int idx = m_start + ldsStackIdx[i];\n" - " int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" - " gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" - " gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" - " }\n" - " if( lIdx == 0 ) ldsStackEnd = 0;\n" - " //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n" - " ldsFixedBuffer[lIdx] = 0;\n" - " }\n" - " }\n" - " }\n" - " if( lIdx == 0 ) ldsRingEnd = RING_END;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n" - " {\n" - " int idx = m_start + ldsStackIdx[i];\n" - " int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" - " gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" - " gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" - " }\n" - " // in case it couldn't consume any pair. Flush them\n" - " // todo. Serial batch worth while?\n" - " if( ldsStackEnd == 0 )\n" - " {\n" - " for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)\n" - " {\n" - " int idx = m_start + ldsRingElem[i].m_idx;\n" - " int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" - " gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" - " int curBatch = 100+i;\n" - " if (maxBatch < curBatch)\n" - " maxBatch = curBatch;\n" - " \n" - " gConstraintsOut[ dstIdx ].m_batchIdx = curBatch;\n" - " \n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " if( lIdx == 0 ) ldsRingEnd = 0;\n" - " }\n" - " if( lIdx == 0 ) ldsStackEnd = 0;\n" - " GROUP_LDS_BARRIER;\n" - " // termination\n" - " if( ldsGEnd == m_n && ldsRingEnd == 0 )\n" - " break;\n" - " }\n" - " if( lIdx == 0 )\n" - " {\n" - " if (maxBatch < ie)\n" - " maxBatch=ie;\n" - " batchSizes[wgIdx]=maxBatch;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl deleted file mode 100644 index ba1b66d2c3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl +++ /dev/null @@ -1,231 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile __global int* -#endif - -#define SIMD_WIDTH 64 - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -#define WG_SIZE 64 - - - - - -typedef struct -{ - int m_n; - int m_start; - int m_staticIdx; - int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_a; - int m_b; - u32 m_idx; -}Elem; - - - - - -// batching on the GPU -__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx ) -{ - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - - const int m_n = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - - if( lIdx == 0 ) - { - for (int i=0;i<m_n;i++) - { - int srcIdx = i+m_start; - int batchIndex = i; - gConstraints[ srcIdx ].m_batchIdx = batchIndex; - } - } -} - - -#define CHECK_SIZE (WG_SIZE) - - - - -u32 readBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - return buff[bufIdx] & (1<<bitIdx); -} - -void writeBuf(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - buff[bufIdx] |= (1<<bitIdx); - //atom_or( &buff[bufIdx], (1<<bitIdx) ); -} - -u32 tryWrite(__local u32* buff, int idx) -{ - idx = idx % (32*CHECK_SIZE); - int bitIdx = idx%32; - int bufIdx = idx/32; - u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) ); - return ((ans >> bitIdx)&1) == 0; -} - - -// batching on the GPU -__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx ) -{ - int wgIdx = GET_GROUP_IDX; - int lIdx = GET_LOCAL_IDX; - const int numConstraints = gN[wgIdx]; - const int m_start = gStart[wgIdx]; - b3Contact4Data_t tmp; - - __local u32 ldsFixedBuffer[CHECK_SIZE]; - - - - - - if( lIdx == 0 ) - { - - - __global struct b3Contact4Data* cs = &gConstraints[m_start]; - - - int numValidConstraints = 0; - int batchIdx = 0; - - while( numValidConstraints < numConstraints) - { - int nCurrentBatch = 0; - // clear flag - - for(int i=0; i<CHECK_SIZE; i++) - ldsFixedBuffer[i] = 0; - - for(int i=numValidConstraints; i<numConstraints; i++) - { - - int bodyAS = cs[i].m_bodyAPtrAndSignBit; - int bodyBS = cs[i].m_bodyBPtrAndSignBit; - int bodyA = abs(bodyAS); - int bodyB = abs(bodyBS); - bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx; - bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx; - int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA); - int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB); - - if( aUnavailable==0 && bUnavailable==0 ) // ok - { - if (!aIsStatic) - { - writeBuf( ldsFixedBuffer, bodyA ); - } - if (!bIsStatic) - { - writeBuf( ldsFixedBuffer, bodyB ); - } - - cs[i].m_batchIdx = batchIdx; - - if (i!=numValidConstraints) - { - - tmp = cs[i]; - cs[i] = cs[numValidConstraints]; - cs[numValidConstraints] = tmp; - - - } - - numValidConstraints++; - - nCurrentBatch++; - if( nCurrentBatch == SIMD_WIDTH) - { - nCurrentBatch = 0; - for(int i=0; i<CHECK_SIZE; i++) - ldsFixedBuffer[i] = 0; - - } - } - }//for - batchIdx ++; - }//while - - batchSizes[wgIdx] = batchIdx; - - }//if( lIdx == 0 ) - - //return batchIdx; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h deleted file mode 100644 index 05800656cb..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ /dev/null @@ -1,290 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* batchingKernelsNewCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile __global int*\n" - "#endif\n" - "#define SIMD_WIDTH 64\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "#define WG_SIZE 64\n" - "typedef struct \n" - "{\n" - " int m_n;\n" - " int m_start;\n" - " int m_staticIdx;\n" - " int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct \n" - "{\n" - " int m_a;\n" - " int m_b;\n" - " u32 m_idx;\n" - "}Elem;\n" - "// batching on the GPU\n" - "__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" - "{\n" - " int wgIdx = GET_GROUP_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " \n" - " const int m_n = gN[wgIdx];\n" - " const int m_start = gStart[wgIdx];\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " for (int i=0;i<m_n;i++)\n" - " {\n" - " int srcIdx = i+m_start;\n" - " int batchIndex = i;\n" - " gConstraints[ srcIdx ].m_batchIdx = batchIndex; \n" - " }\n" - " }\n" - "}\n" - "#define CHECK_SIZE (WG_SIZE)\n" - "u32 readBuf(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - " return buff[bufIdx] & (1<<bitIdx);\n" - "}\n" - "void writeBuf(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - " buff[bufIdx] |= (1<<bitIdx);\n" - " //atom_or( &buff[bufIdx], (1<<bitIdx) );\n" - "}\n" - "u32 tryWrite(__local u32* buff, int idx)\n" - "{\n" - " idx = idx % (32*CHECK_SIZE);\n" - " int bitIdx = idx%32;\n" - " int bufIdx = idx/32;\n" - " u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" - " return ((ans >> bitIdx)&1) == 0;\n" - "}\n" - "// batching on the GPU\n" - "__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx )\n" - "{\n" - " int wgIdx = GET_GROUP_IDX;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " const int numConstraints = gN[wgIdx];\n" - " const int m_start = gStart[wgIdx];\n" - " b3Contact4Data_t tmp;\n" - " \n" - " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" - " \n" - " \n" - " \n" - " \n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " \n" - " \n" - " __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n" - " \n" - " \n" - " int numValidConstraints = 0;\n" - " int batchIdx = 0;\n" - " while( numValidConstraints < numConstraints)\n" - " {\n" - " int nCurrentBatch = 0;\n" - " // clear flag\n" - " \n" - " for(int i=0; i<CHECK_SIZE; i++) \n" - " ldsFixedBuffer[i] = 0; \n" - " for(int i=numValidConstraints; i<numConstraints; i++)\n" - " {\n" - " int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n" - " int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n" - " int bodyA = abs(bodyAS);\n" - " int bodyB = abs(bodyBS);\n" - " bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;\n" - " bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;\n" - " int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);\n" - " int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);\n" - " \n" - " if( aUnavailable==0 && bUnavailable==0 ) // ok\n" - " {\n" - " if (!aIsStatic)\n" - " {\n" - " writeBuf( ldsFixedBuffer, bodyA );\n" - " }\n" - " if (!bIsStatic)\n" - " {\n" - " writeBuf( ldsFixedBuffer, bodyB );\n" - " }\n" - " cs[i].m_batchIdx = batchIdx;\n" - " if (i!=numValidConstraints)\n" - " {\n" - " tmp = cs[i];\n" - " cs[i] = cs[numValidConstraints];\n" - " cs[numValidConstraints] = tmp;\n" - " }\n" - " numValidConstraints++;\n" - " \n" - " nCurrentBatch++;\n" - " if( nCurrentBatch == SIMD_WIDTH)\n" - " {\n" - " nCurrentBatch = 0;\n" - " for(int i=0; i<CHECK_SIZE; i++) \n" - " ldsFixedBuffer[i] = 0;\n" - " \n" - " }\n" - " }\n" - " }//for\n" - " batchIdx ++;\n" - " }//while\n" - " \n" - " batchSizes[wgIdx] = batchIdx;\n" - " }//if( lIdx == 0 )\n" - " \n" - " //return batchIdx;\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl deleted file mode 100644 index e22bc9bc33..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.cl +++ /dev/null @@ -1,32 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" - -#include "Bullet3Dynamics/shared/b3IntegrateTransforms.h" - - - -__kernel void - integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration) -{ - int nodeID = get_global_id(0); - - if( nodeID < numNodes) - { - integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h deleted file mode 100644 index 6e9c53e161..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ /dev/null @@ -1,432 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* integrateKernelCL = - "/*\n" - "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" - "{\n" - " \n" - " if (bodies[nodeID].m_invMass != 0.f)\n" - " {\n" - " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" - " //angular velocity\n" - " {\n" - " b3Float4 axis;\n" - " //add some hardcoded angular damping\n" - " bodies[nodeID].m_angVel.x *= angularDamping;\n" - " bodies[nodeID].m_angVel.y *= angularDamping;\n" - " bodies[nodeID].m_angVel.z *= angularDamping;\n" - " \n" - " b3Float4 angvel = bodies[nodeID].m_angVel;\n" - " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" - " \n" - " //limit the angular motion\n" - " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" - " {\n" - " fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" - " }\n" - " if(fAngle < 0.001f)\n" - " {\n" - " // use Taylor's expansions of sync function\n" - " axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" - " }\n" - " else\n" - " {\n" - " // sync(fAngle) = sin(c*fAngle)/t\n" - " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" - " }\n" - " \n" - " b3Quat dorn;\n" - " dorn.x = axis.x;\n" - " dorn.y = axis.y;\n" - " dorn.z = axis.z;\n" - " dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" - " b3Quat orn0 = bodies[nodeID].m_quat;\n" - " b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" - " predictedOrn = b3QuatNormalized(predictedOrn);\n" - " bodies[nodeID].m_quat=predictedOrn;\n" - " }\n" - " //linear velocity \n" - " bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n" - " \n" - " //apply gravity\n" - " bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" - " \n" - " }\n" - " \n" - "}\n" - "inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" - "{\n" - " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" - " \n" - " if( (body->m_invMass != 0.f))\n" - " {\n" - " //angular velocity\n" - " {\n" - " b3Float4 axis;\n" - " //add some hardcoded angular damping\n" - " body->m_angVel.x *= angularDamping;\n" - " body->m_angVel.y *= angularDamping;\n" - " body->m_angVel.z *= angularDamping;\n" - " \n" - " b3Float4 angvel = body->m_angVel;\n" - " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" - " //limit the angular motion\n" - " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" - " {\n" - " fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" - " }\n" - " if(fAngle < 0.001f)\n" - " {\n" - " // use Taylor's expansions of sync function\n" - " axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" - " }\n" - " else\n" - " {\n" - " // sync(fAngle) = sin(c*fAngle)/t\n" - " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" - " }\n" - " b3Quat dorn;\n" - " dorn.x = axis.x;\n" - " dorn.y = axis.y;\n" - " dorn.z = axis.z;\n" - " dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" - " b3Quat orn0 = body->m_quat;\n" - " b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" - " predictedOrn = b3QuatNormalized(predictedOrn);\n" - " body->m_quat=predictedOrn;\n" - " }\n" - " //apply gravity\n" - " body->m_linVel += gravityAcceleration * timeStep;\n" - " //linear velocity \n" - " body->m_pos += body->m_linVel * timeStep;\n" - " \n" - " }\n" - " \n" - "}\n" - "__kernel void \n" - " integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" - "{\n" - " int nodeID = get_global_id(0);\n" - " \n" - " if( nodeID < numNodes)\n" - " {\n" - " integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl deleted file mode 100644 index 7f5dabe274..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.cl +++ /dev/null @@ -1,877 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define B3_CONSTRAINT_FLAG_ENABLED 1 - -#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3 -#define B3_GPU_FIXED_CONSTRAINT_TYPE 4 - -#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails -#define B3_INFINITY 1e30f - -#define mymake_float4 (float4) - - -__inline float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - -typedef float4 Quaternion; - - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - - - -typedef struct -{ - Matrix3x3 m_invInertiaWorld; - Matrix3x3 m_initInvInertia; -} BodyInertia; - - -typedef struct -{ - Matrix3x3 m_basis;//orientation - float4 m_origin;//transform -}b3Transform; - -typedef struct -{ -// b3Transform m_worldTransformUnused; - float4 m_deltaLinearVelocity; - float4 m_deltaAngularVelocity; - float4 m_angularFactor; - float4 m_linearFactor; - float4 m_invMass; - float4 m_pushVelocity; - float4 m_turnVelocity; - float4 m_linearVelocity; - float4 m_angularVelocity; - - union - { - void* m_originalBody; - int m_originalBodyIndex; - }; - int padding[3]; - -} b3GpuSolverBody; - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - unsigned int m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} b3RigidBodyCL; - -typedef struct -{ - - float4 m_relpos1CrossNormal; - float4 m_contactNormal; - - float4 m_relpos2CrossNormal; - //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal - - float4 m_angularComponentA; - float4 m_angularComponentB; - - float m_appliedPushImpulse; - float m_appliedImpulse; - int m_padding1; - int m_padding2; - float m_friction; - float m_jacDiagABInv; - float m_rhs; - float m_cfm; - - float m_lowerLimit; - float m_upperLimit; - float m_rhsPenetration; - int m_originalConstraint; - - - int m_overrideNumSolverIterations; - int m_frictionIndex; - int m_solverBodyIdA; - int m_solverBodyIdB; - -} b3SolverConstraint; - -typedef struct -{ - int m_bodyAPtrAndSignBit; - int m_bodyBPtrAndSignBit; - int m_originalConstraintIndex; - int m_batchId; -} b3BatchConstraint; - - - - - - -typedef struct -{ - int m_constraintType; - int m_rbA; - int m_rbB; - float m_breakingImpulseThreshold; - - float4 m_pivotInA; - float4 m_pivotInB; - Quaternion m_relTargetAB; - - int m_flags; - int m_padding[3]; -} b3GpuGenericConstraint; - - -/*b3Transform getWorldTransform(b3RigidBodyCL* rb) -{ - b3Transform newTrans; - newTrans.setOrigin(rb->m_pos); - newTrans.setRotation(rb->m_quat); - return newTrans; -}*/ - - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float4 fastNormalize4(float4 v) -{ - v = mymake_float4(v.xyz,0.f); - return fast_normalize(v); -} - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - - -__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude) -{ - body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor; - body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor); -} - - -void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c) -{ - float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm; - float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity); - float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity); - - deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv; - deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv; - - float sum = c->m_appliedImpulse + deltaImpulse; - if (sum < c->m_lowerLimit) - { - deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse; - c->m_appliedImpulse = c->m_lowerLimit; - } - else if (sum > c->m_upperLimit) - { - deltaImpulse = c->m_upperLimit-c->m_appliedImpulse; - c->m_appliedImpulse = c->m_upperLimit; - } - else - { - c->m_appliedImpulse = sum; - } - - internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse); - internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse); - -} - -__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies, - __global b3BatchConstraint* batchConstraints, - __global b3SolverConstraint* rows, - __global unsigned int* numConstraintRowsInfo1, - __global unsigned int* rowOffsets, - __global b3GpuGenericConstraint* constraints, - int batchOffset, - int numConstraintsInBatch - ) -{ - int b = get_global_id(0); - if (b>=numConstraintsInBatch) - return; - - __global b3BatchConstraint* c = &batchConstraints[b+batchOffset]; - int originalConstraintIndex = c->m_originalConstraintIndex; - if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED) - { - int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex]; - int rowOffset = rowOffsets[originalConstraintIndex]; - for (int jj=0;jj<numConstraintRows;jj++) - { - __global b3SolverConstraint* constraint = &rows[rowOffset+jj]; - resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint); - } - } -}; - -__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies) -{ - int i = get_global_id(0); - if (i>=numBodies) - return; - - __global b3GpuSolverBody* solverBody = &solverBodies[i]; - __global b3RigidBodyCL* bodyCL = &bodiesCL[i]; - - solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f); - solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f); - solverBody->m_originalBodyIndex = i; - solverBody->m_angularFactor = (float4)(1,1,1,0); - solverBody->m_linearFactor = (float4) (1,1,1,0); - solverBody->m_linearVelocity = bodyCL->m_linVel; - solverBody->m_angularVelocity = bodyCL->m_angVel; -} - -__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints) -{ - int cid = get_global_id(0); - if (cid>=numConstraints) - return; - int numRows = numConstraintRows[cid]; - if (numRows) - { - for (int i=0;i<numRows;i++) - { - int rowIndex = rowOffsets[cid]+i; - float breakingThreshold = constraints[cid].m_breakingImpulseThreshold; - if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold) - { - constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED; - } - } - } -} - - - -__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints) -{ - int i = get_global_id(0); - if (i>=numConstraints) - return; - - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - switch (constraint->m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - infos[i] = 3; - break; - } - case B3_GPU_FIXED_CONSTRAINT_TYPE: - { - infos[i] = 6; - break; - } - default: - { - } - } -} - -__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, - __global b3BatchConstraint* batchConstraints, - __global b3GpuGenericConstraint* constraints, - __global b3RigidBodyCL* bodies, - int numConstraints) -{ - int i = get_global_id(0); - if (i>=numConstraints) - return; - - int rbA = constraints[i].m_rbA; - int rbB = constraints[i].m_rbB; - - batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA; - batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB; - batchConstraints[i].m_batchId = -1; - batchConstraints[i].m_originalConstraintIndex = i; - -} - - - - -typedef struct -{ - // integrator parameters: frames per second (1/stepsize), default error - // reduction parameter (0..1). - float fps,erp; - - // for the first and second body, pointers to two (linear and angular) - // n*3 jacobian sub matrices, stored by rows. these matrices will have - // been initialized to 0 on entry. if the second body is zero then the - // J2xx pointers may be 0. - union - { - __global float4* m_J1linearAxisFloat4; - __global float* m_J1linearAxis; - }; - union - { - __global float4* m_J1angularAxisFloat4; - __global float* m_J1angularAxis; - - }; - union - { - __global float4* m_J2linearAxisFloat4; - __global float* m_J2linearAxis; - }; - union - { - __global float4* m_J2angularAxisFloat4; - __global float* m_J2angularAxis; - }; - // elements to jump from one row to the next in J's - int rowskip; - - // right hand sides of the equation J*v = c + cfm * lambda. cfm is the - // "constraint force mixing" vector. c is set to zero on entry, cfm is - // set to a constant value (typically very small or zero) value on entry. - __global float* m_constraintError; - __global float* cfm; - - // lo and hi limits for variables (set to -/+ infinity on entry). - __global float* m_lowerLimit; - __global float* m_upperLimit; - - // findex vector for variables. see the LCP solver interface for a - // description of what this does. this is set to -1 on entry. - // note that the returned indexes are relative to the first index of - // the constraint. - __global int *findex; - // number of solver iterations - int m_numIterations; - - //damping of the velocity - float m_damping; -} b3GpuConstraintInfo2; - - -void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2) -{ - *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f); - *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f); - *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f); -} - - -void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies) -{ - float4 posA = bodies[constraint->m_rbA].m_pos; - Quaternion rotA = bodies[constraint->m_rbA].m_quat; - - float4 posB = bodies[constraint->m_rbB].m_pos; - Quaternion rotB = bodies[constraint->m_rbB].m_quat; - - - - // anchor points in global coordinates with respect to body PORs. - - // set jacobian - info->m_J1linearAxis[0] = 1; - info->m_J1linearAxis[info->rowskip+1] = 1; - info->m_J1linearAxis[2*info->rowskip+2] = 1; - - float4 a1 = qtRotate(rotA,constraint->m_pivotInA); - - { - __global float4* angular0 = (__global float4*)(info->m_J1angularAxis); - __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip); - __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip); - float4 a1neg = -a1; - getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2); - } - if (info->m_J2linearAxis) - { - info->m_J2linearAxis[0] = -1; - info->m_J2linearAxis[info->rowskip+1] = -1; - info->m_J2linearAxis[2*info->rowskip+2] = -1; - } - - float4 a2 = qtRotate(rotB,constraint->m_pivotInB); - - { - // float4 a2n = -a2; - __global float4* angular0 = (__global float4*)(info->m_J2angularAxis); - __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip); - __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip); - getSkewSymmetricMatrix(a2,angular0,angular1,angular2); - } - - // set right hand side -// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp; - float currERP = info->erp; - - float k = info->fps * currERP; - int j; - float4 result = a2 + posB - a1 - posA; - float* resultPtr = &result; - - for (j=0; j<3; j++) - { - info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]); - } -} - -Quaternion nearest( Quaternion first, Quaternion qd) -{ - Quaternion diff,sum; - diff = first- qd; - sum = first + qd; - - if( dot(diff,diff) < dot(sum,sum) ) - return qd; - return (-qd); -} - -float b3Acos(float x) -{ - if (x<-1) - x=-1; - if (x>1) - x=1; - return acos(x); -} - -float getAngle(Quaternion orn) -{ - if (orn.w>=1.f) - orn.w=1.f; - float s = 2.f * b3Acos(orn.w); - return s; -} - -void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle) -{ - Quaternion orn1 = nearest(orn0,orn1a); - - Quaternion dorn = qtMul(orn1,qtInvert(orn0)); - *angle = getAngle(dorn); - *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f); - - //check for axis length - float len = dot3F4(*axis,*axis); - if (len < FLT_EPSILON*FLT_EPSILON) - *axis = (float4)(1,0,0,0); - else - *axis /= sqrt(len); -} - - - -void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row) -{ - Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat; - Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat; - - int s = info->rowskip; - int start_index = start_row * s; - - // 3 rows to make body rotations equal - info->m_J1angularAxis[start_index] = 1; - info->m_J1angularAxis[start_index + s + 1] = 1; - info->m_J1angularAxis[start_index + s*2+2] = 1; - if ( info->m_J2angularAxis) - { - info->m_J2angularAxis[start_index] = -1; - info->m_J2angularAxis[start_index + s+1] = -1; - info->m_J2angularAxis[start_index + s*2+2] = -1; - } - - float currERP = info->erp; - float k = info->fps * currERP; - float4 diff; - float angle; - float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB)); - - calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle); - diff*=-angle; - - float* resultPtr = &diff; - - for (int j=0; j<3; j++) - { - info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j]; - } - - -} - - -__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies) -{ - int i = get_global_id(0); - if (i>=numBodies) - return; - - if (bodies[i].m_invMass) - { -// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP) - { - bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity; - } -// if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP) - { - bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity; - } - } -} - - -__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, - __global unsigned int* infos, - __global unsigned int* constraintRowOffsets, - __global b3GpuGenericConstraint* constraints, - __global b3BatchConstraint* batchConstraints, - __global b3RigidBodyCL* bodies, - __global BodyInertia* inertias, - __global b3GpuSolverBody* solverBodies, - float timeStep, - float globalErp, - float globalCfm, - float globalDamping, - int globalNumIterations, - int numConstraints) -{ - - int i = get_global_id(0); - if (i>=numConstraints) - return; - - //for now, always initialize the batch info - int info1 = infos[i]; - - __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]]; - __global b3GpuGenericConstraint* constraint = &constraints[i]; - - __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA]; - __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB]; - - int solverBodyIdA = constraint->m_rbA; - int solverBodyIdB = constraint->m_rbB; - - __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA]; - __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB]; - - - if (rbA->m_invMass) - { - batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA; - } else - { -// if (!solverBodyIdA) -// m_staticIdx = 0; - batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA; - } - - if (rbB->m_invMass) - { - batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB; - } else - { -// if (!solverBodyIdB) -// m_staticIdx = 0; - batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB; - } - - if (info1) - { - int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations; -// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations) - // m_maxOverrideNumSolverIterations = overrideNumSolverIterations; - - - int j; - for ( j=0;j<info1;j++) - { -// memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint)); - currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0); - currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0); - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_cfm = 0.f; - currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0); - currentConstraintRow[j].m_friction = 0.f; - currentConstraintRow[j].m_frictionIndex = 0; - currentConstraintRow[j].m_jacDiagABInv = 0.f; - currentConstraintRow[j].m_lowerLimit = 0.f; - currentConstraintRow[j].m_upperLimit = 0.f; - - currentConstraintRow[j].m_originalConstraint = i; - currentConstraintRow[j].m_overrideNumSolverIterations = 0; - currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0); - currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0); - currentConstraintRow[j].m_rhs = 0.f; - currentConstraintRow[j].m_rhsPenetration = 0.f; - currentConstraintRow[j].m_solverBodyIdA = 0; - currentConstraintRow[j].m_solverBodyIdB = 0; - - currentConstraintRow[j].m_lowerLimit = -B3_INFINITY; - currentConstraintRow[j].m_upperLimit = B3_INFINITY; - currentConstraintRow[j].m_appliedImpulse = 0.f; - currentConstraintRow[j].m_appliedPushImpulse = 0.f; - currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA; - currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB; - currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; - } - - bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0); - bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0); - bodyAPtr->m_pushVelocity = (float4)(0,0,0,0); - bodyAPtr->m_turnVelocity = (float4)(0,0,0,0); - bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0); - bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0); - bodyBPtr->m_pushVelocity = (float4)(0,0,0,0); - bodyBPtr->m_turnVelocity = (float4)(0,0,0,0); - - int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this - - - - - b3GpuConstraintInfo2 info2; - info2.fps = 1.f/timeStep; - info2.erp = globalErp; - info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal; - info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal; - info2.m_J2linearAxisFloat4 = 0; - info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal; - info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this - - ///the size of b3SolverConstraint needs be a multiple of float -// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint)); - info2.m_constraintError = ¤tConstraintRow->m_rhs; - currentConstraintRow->m_cfm = globalCfm; - info2.m_damping = globalDamping; - info2.cfm = ¤tConstraintRow->m_cfm; - info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit; - info2.m_upperLimit = ¤tConstraintRow->m_upperLimit; - info2.m_numIterations = globalNumIterations; - - switch (constraint->m_constraintType) - { - case B3_GPU_POINT2POINT_CONSTRAINT_TYPE: - { - getInfo2Point2Point(constraint,&info2,bodies); - break; - } - case B3_GPU_FIXED_CONSTRAINT_TYPE: - { - getInfo2Point2Point(constraint,&info2,bodies); - - getInfo2FixedOrientation(constraint,&info2,bodies,3); - - break; - } - - default: - { - } - } - - ///finalize the constraint setup - for ( j=0;j<info1;j++) - { - __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j]; - - if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold) - { - solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold; - } - - if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold) - { - solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold; - } - -// solverConstraint->m_originalContactPoint = constraint; - - Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld; - { - - //float4 angularFactorA(1,1,1); - float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal; - solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA; - } - - Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld; - { - - float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal; - solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor(); - } - - { - //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal - //because it gets multiplied iMJlB - float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass; - float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA); - float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal? - float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB); - - float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal); - sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal); - sum += dot3F4(iMJlB,solverConstraint->m_contactNormal); - sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal); - float fsum = fabs(sum); - if (fsum>FLT_EPSILON) - { - solverConstraint->m_jacDiagABInv = 1.f/sum; - } else - { - solverConstraint->m_jacDiagABInv = 0.f; - } - } - - - ///fix rhs - ///todo: add force/torque accelerators - { - float rel_vel; - float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel); - float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel); - - rel_vel = vel1Dotn+vel2Dotn; - - float restitution = 0.f; - float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2 - float velocityError = restitution - rel_vel * info2.m_damping; - float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv; - float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv; - solverConstraint->m_rhs = penetrationImpulse+velocityImpulse; - solverConstraint->m_appliedImpulse = 0.f; - - } - } - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h deleted file mode 100644 index c94b55851e..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h +++ /dev/null @@ -1,720 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveConstraintRowsCL = - "/*\n" - "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#define B3_CONSTRAINT_FLAG_ENABLED 1\n" - "#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n" - "#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n" - "#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n" - "#define B3_INFINITY 1e30f\n" - "#define mymake_float4 (float4)\n" - "__inline float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = mymake_float4(a.xyz,0.f);\n" - " float4 b1 = mymake_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "typedef float4 Quaternion;\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertiaWorld;\n" - " Matrix3x3 m_initInvInertia;\n" - "} BodyInertia;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_basis;//orientation\n" - " float4 m_origin;//transform\n" - "}b3Transform;\n" - "typedef struct\n" - "{\n" - "// b3Transform m_worldTransformUnused;\n" - " float4 m_deltaLinearVelocity;\n" - " float4 m_deltaAngularVelocity;\n" - " float4 m_angularFactor;\n" - " float4 m_linearFactor;\n" - " float4 m_invMass;\n" - " float4 m_pushVelocity;\n" - " float4 m_turnVelocity;\n" - " float4 m_linearVelocity;\n" - " float4 m_angularVelocity;\n" - " union \n" - " {\n" - " void* m_originalBody;\n" - " int m_originalBodyIndex;\n" - " };\n" - " int padding[3];\n" - "} b3GpuSolverBody;\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " unsigned int m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} b3RigidBodyCL;\n" - "typedef struct\n" - "{\n" - " float4 m_relpos1CrossNormal;\n" - " float4 m_contactNormal;\n" - " float4 m_relpos2CrossNormal;\n" - " //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n" - " float4 m_angularComponentA;\n" - " float4 m_angularComponentB;\n" - " \n" - " float m_appliedPushImpulse;\n" - " float m_appliedImpulse;\n" - " int m_padding1;\n" - " int m_padding2;\n" - " float m_friction;\n" - " float m_jacDiagABInv;\n" - " float m_rhs;\n" - " float m_cfm;\n" - " \n" - " float m_lowerLimit;\n" - " float m_upperLimit;\n" - " float m_rhsPenetration;\n" - " int m_originalConstraint;\n" - " int m_overrideNumSolverIterations;\n" - " int m_frictionIndex;\n" - " int m_solverBodyIdA;\n" - " int m_solverBodyIdB;\n" - "} b3SolverConstraint;\n" - "typedef struct \n" - "{\n" - " int m_bodyAPtrAndSignBit;\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_originalConstraintIndex;\n" - " int m_batchId;\n" - "} b3BatchConstraint;\n" - "typedef struct \n" - "{\n" - " int m_constraintType;\n" - " int m_rbA;\n" - " int m_rbB;\n" - " float m_breakingImpulseThreshold;\n" - " float4 m_pivotInA;\n" - " float4 m_pivotInB;\n" - " Quaternion m_relTargetAB;\n" - " int m_flags;\n" - " int m_padding[3];\n" - "} b3GpuGenericConstraint;\n" - "/*b3Transform getWorldTransform(b3RigidBodyCL* rb)\n" - "{\n" - " b3Transform newTrans;\n" - " newTrans.setOrigin(rb->m_pos);\n" - " newTrans.setRotation(rb->m_quat);\n" - " return newTrans;\n" - "}*/\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " v = mymake_float4(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n" - "{\n" - " body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n" - " body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n" - "}\n" - "void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n" - "{\n" - " float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n" - " float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n" - " float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n" - " deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n" - " deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n" - " float sum = c->m_appliedImpulse + deltaImpulse;\n" - " if (sum < c->m_lowerLimit)\n" - " {\n" - " deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse;\n" - " c->m_appliedImpulse = c->m_lowerLimit;\n" - " }\n" - " else if (sum > c->m_upperLimit) \n" - " {\n" - " deltaImpulse = c->m_upperLimit-c->m_appliedImpulse;\n" - " c->m_appliedImpulse = c->m_upperLimit;\n" - " }\n" - " else\n" - " {\n" - " c->m_appliedImpulse = sum;\n" - " }\n" - " internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n" - " internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n" - "}\n" - "__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" - " __global b3BatchConstraint* batchConstraints,\n" - " __global b3SolverConstraint* rows,\n" - " __global unsigned int* numConstraintRowsInfo1, \n" - " __global unsigned int* rowOffsets,\n" - " __global b3GpuGenericConstraint* constraints,\n" - " int batchOffset,\n" - " int numConstraintsInBatch\n" - " )\n" - "{\n" - " int b = get_global_id(0);\n" - " if (b>=numConstraintsInBatch)\n" - " return;\n" - " __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" - " int originalConstraintIndex = c->m_originalConstraintIndex;\n" - " if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" - " {\n" - " int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];\n" - " int rowOffset = rowOffsets[originalConstraintIndex];\n" - " for (int jj=0;jj<numConstraintRows;jj++)\n" - " {\n" - " __global b3SolverConstraint* constraint = &rows[rowOffset+jj];\n" - " resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" - " }\n" - " }\n" - "};\n" - "__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numBodies)\n" - " return;\n" - " __global b3GpuSolverBody* solverBody = &solverBodies[i];\n" - " __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n" - " solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" - " solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f);\n" - " solverBody->m_originalBodyIndex = i;\n" - " solverBody->m_angularFactor = (float4)(1,1,1,0);\n" - " solverBody->m_linearFactor = (float4) (1,1,1,0);\n" - " solverBody->m_linearVelocity = bodyCL->m_linVel;\n" - " solverBody->m_angularVelocity = bodyCL->m_angVel;\n" - "}\n" - "__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" - "{\n" - " int cid = get_global_id(0);\n" - " if (cid>=numConstraints)\n" - " return;\n" - " int numRows = numConstraintRows[cid];\n" - " if (numRows)\n" - " {\n" - " for (int i=0;i<numRows;i++)\n" - " {\n" - " int rowIndex = rowOffsets[cid]+i;\n" - " float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;\n" - " if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)\n" - " {\n" - " constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConstraints)\n" - " return;\n" - " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" - " switch (constraint->m_constraintType)\n" - " {\n" - " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" - " {\n" - " infos[i] = 3;\n" - " break;\n" - " }\n" - " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" - " {\n" - " infos[i] = 6;\n" - " break;\n" - " }\n" - " default:\n" - " {\n" - " }\n" - " }\n" - "}\n" - "__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" - " __global b3BatchConstraint* batchConstraints, \n" - " __global b3GpuGenericConstraint* constraints,\n" - " __global b3RigidBodyCL* bodies,\n" - " int numConstraints)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConstraints)\n" - " return;\n" - " int rbA = constraints[i].m_rbA;\n" - " int rbB = constraints[i].m_rbB;\n" - " batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA;\n" - " batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB;\n" - " batchConstraints[i].m_batchId = -1;\n" - " batchConstraints[i].m_originalConstraintIndex = i;\n" - "}\n" - "typedef struct\n" - "{\n" - " // integrator parameters: frames per second (1/stepsize), default error\n" - " // reduction parameter (0..1).\n" - " float fps,erp;\n" - " // for the first and second body, pointers to two (linear and angular)\n" - " // n*3 jacobian sub matrices, stored by rows. these matrices will have\n" - " // been initialized to 0 on entry. if the second body is zero then the\n" - " // J2xx pointers may be 0.\n" - " union \n" - " {\n" - " __global float4* m_J1linearAxisFloat4;\n" - " __global float* m_J1linearAxis;\n" - " };\n" - " union\n" - " {\n" - " __global float4* m_J1angularAxisFloat4;\n" - " __global float* m_J1angularAxis;\n" - " };\n" - " union\n" - " {\n" - " __global float4* m_J2linearAxisFloat4;\n" - " __global float* m_J2linearAxis;\n" - " };\n" - " union\n" - " {\n" - " __global float4* m_J2angularAxisFloat4;\n" - " __global float* m_J2angularAxis;\n" - " };\n" - " // elements to jump from one row to the next in J's\n" - " int rowskip;\n" - " // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n" - " // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n" - " // set to a constant value (typically very small or zero) value on entry.\n" - " __global float* m_constraintError;\n" - " __global float* cfm;\n" - " // lo and hi limits for variables (set to -/+ infinity on entry).\n" - " __global float* m_lowerLimit;\n" - " __global float* m_upperLimit;\n" - " // findex vector for variables. see the LCP solver interface for a\n" - " // description of what this does. this is set to -1 on entry.\n" - " // note that the returned indexes are relative to the first index of\n" - " // the constraint.\n" - " __global int *findex;\n" - " // number of solver iterations\n" - " int m_numIterations;\n" - " //damping of the velocity\n" - " float m_damping;\n" - "} b3GpuConstraintInfo2;\n" - "void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n" - "{\n" - " *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n" - " *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n" - " *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n" - "}\n" - "void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n" - "{\n" - " float4 posA = bodies[constraint->m_rbA].m_pos;\n" - " Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n" - " float4 posB = bodies[constraint->m_rbB].m_pos;\n" - " Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n" - " // anchor points in global coordinates with respect to body PORs.\n" - " \n" - " // set jacobian\n" - " info->m_J1linearAxis[0] = 1;\n" - " info->m_J1linearAxis[info->rowskip+1] = 1;\n" - " info->m_J1linearAxis[2*info->rowskip+2] = 1;\n" - " float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n" - " {\n" - " __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n" - " __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n" - " __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip);\n" - " float4 a1neg = -a1;\n" - " getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2);\n" - " }\n" - " if (info->m_J2linearAxis)\n" - " {\n" - " info->m_J2linearAxis[0] = -1;\n" - " info->m_J2linearAxis[info->rowskip+1] = -1;\n" - " info->m_J2linearAxis[2*info->rowskip+2] = -1;\n" - " }\n" - " \n" - " float4 a2 = qtRotate(rotB,constraint->m_pivotInB);\n" - " \n" - " {\n" - " // float4 a2n = -a2;\n" - " __global float4* angular0 = (__global float4*)(info->m_J2angularAxis);\n" - " __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip);\n" - " __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip);\n" - " getSkewSymmetricMatrix(a2,angular0,angular1,angular2);\n" - " }\n" - " \n" - " // set right hand side\n" - "// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n" - " float currERP = info->erp;\n" - " float k = info->fps * currERP;\n" - " int j;\n" - " float4 result = a2 + posB - a1 - posA;\n" - " float* resultPtr = &result;\n" - " for (j=0; j<3; j++)\n" - " {\n" - " info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n" - " }\n" - "}\n" - "Quaternion nearest( Quaternion first, Quaternion qd)\n" - "{\n" - " Quaternion diff,sum;\n" - " diff = first- qd;\n" - " sum = first + qd;\n" - " \n" - " if( dot(diff,diff) < dot(sum,sum) )\n" - " return qd;\n" - " return (-qd);\n" - "}\n" - "float b3Acos(float x) \n" - "{ \n" - " if (x<-1) \n" - " x=-1; \n" - " if (x>1) \n" - " x=1;\n" - " return acos(x); \n" - "}\n" - "float getAngle(Quaternion orn)\n" - "{\n" - " if (orn.w>=1.f)\n" - " orn.w=1.f;\n" - " float s = 2.f * b3Acos(orn.w);\n" - " return s;\n" - "}\n" - "void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" - "{\n" - " Quaternion orn1 = nearest(orn0,orn1a);\n" - " \n" - " Quaternion dorn = qtMul(orn1,qtInvert(orn0));\n" - " *angle = getAngle(dorn);\n" - " *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);\n" - " \n" - " //check for axis length\n" - " float len = dot3F4(*axis,*axis);\n" - " if (len < FLT_EPSILON*FLT_EPSILON)\n" - " *axis = (float4)(1,0,0,0);\n" - " else\n" - " *axis /= sqrt(len);\n" - "}\n" - "void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" - "{\n" - " Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n" - " Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n" - " int s = info->rowskip;\n" - " int start_index = start_row * s;\n" - " // 3 rows to make body rotations equal\n" - " info->m_J1angularAxis[start_index] = 1;\n" - " info->m_J1angularAxis[start_index + s + 1] = 1;\n" - " info->m_J1angularAxis[start_index + s*2+2] = 1;\n" - " if ( info->m_J2angularAxis)\n" - " {\n" - " info->m_J2angularAxis[start_index] = -1;\n" - " info->m_J2angularAxis[start_index + s+1] = -1;\n" - " info->m_J2angularAxis[start_index + s*2+2] = -1;\n" - " }\n" - " \n" - " float currERP = info->erp;\n" - " float k = info->fps * currERP;\n" - " float4 diff;\n" - " float angle;\n" - " float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));\n" - " \n" - " calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);\n" - " diff*=-angle;\n" - " \n" - " float* resultPtr = &diff;\n" - " \n" - " for (int j=0; j<3; j++)\n" - " {\n" - " info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" - " }\n" - " \n" - "}\n" - "__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numBodies)\n" - " return;\n" - " if (bodies[i].m_invMass)\n" - " {\n" - "// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP)\n" - " {\n" - " bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity;\n" - " }\n" - "// if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP)\n" - " {\n" - " bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity;\n" - " } \n" - " }\n" - "}\n" - "__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, \n" - " __global unsigned int* infos, \n" - " __global unsigned int* constraintRowOffsets, \n" - " __global b3GpuGenericConstraint* constraints, \n" - " __global b3BatchConstraint* batchConstraints, \n" - " __global b3RigidBodyCL* bodies,\n" - " __global BodyInertia* inertias,\n" - " __global b3GpuSolverBody* solverBodies,\n" - " float timeStep,\n" - " float globalErp,\n" - " float globalCfm,\n" - " float globalDamping,\n" - " int globalNumIterations,\n" - " int numConstraints)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numConstraints)\n" - " return;\n" - " \n" - " //for now, always initialize the batch info\n" - " int info1 = infos[i];\n" - " \n" - " __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" - " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" - " __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" - " __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" - " int solverBodyIdA = constraint->m_rbA;\n" - " int solverBodyIdB = constraint->m_rbB;\n" - " __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" - " __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" - " if (rbA->m_invMass)\n" - " {\n" - " batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" - " } else\n" - " {\n" - "// if (!solverBodyIdA)\n" - "// m_staticIdx = 0;\n" - " batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" - " }\n" - " if (rbB->m_invMass)\n" - " {\n" - " batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" - " } else\n" - " {\n" - "// if (!solverBodyIdB)\n" - "// m_staticIdx = 0;\n" - " batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" - " }\n" - " if (info1)\n" - " {\n" - " int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" - "// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" - " // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" - " int j;\n" - " for ( j=0;j<info1;j++)\n" - " {\n" - "// memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint));\n" - " currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0);\n" - " currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0);\n" - " currentConstraintRow[j].m_appliedImpulse = 0.f;\n" - " currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" - " currentConstraintRow[j].m_cfm = 0.f;\n" - " currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0);\n" - " currentConstraintRow[j].m_friction = 0.f;\n" - " currentConstraintRow[j].m_frictionIndex = 0;\n" - " currentConstraintRow[j].m_jacDiagABInv = 0.f;\n" - " currentConstraintRow[j].m_lowerLimit = 0.f;\n" - " currentConstraintRow[j].m_upperLimit = 0.f;\n" - " currentConstraintRow[j].m_originalConstraint = i;\n" - " currentConstraintRow[j].m_overrideNumSolverIterations = 0;\n" - " currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);\n" - " currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0);\n" - " currentConstraintRow[j].m_rhs = 0.f;\n" - " currentConstraintRow[j].m_rhsPenetration = 0.f;\n" - " currentConstraintRow[j].m_solverBodyIdA = 0;\n" - " currentConstraintRow[j].m_solverBodyIdB = 0;\n" - " \n" - " currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;\n" - " currentConstraintRow[j].m_upperLimit = B3_INFINITY;\n" - " currentConstraintRow[j].m_appliedImpulse = 0.f;\n" - " currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" - " currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;\n" - " currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;\n" - " currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; \n" - " }\n" - " bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" - " bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" - " bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n" - " bodyAPtr->m_turnVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n" - " bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n" - " int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" - " \n" - " b3GpuConstraintInfo2 info2;\n" - " info2.fps = 1.f/timeStep;\n" - " info2.erp = globalErp;\n" - " info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal;\n" - " info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal;\n" - " info2.m_J2linearAxisFloat4 = 0;\n" - " info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n" - " info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" - " ///the size of b3SolverConstraint needs be a multiple of float\n" - "// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n" - " info2.m_constraintError = ¤tConstraintRow->m_rhs;\n" - " currentConstraintRow->m_cfm = globalCfm;\n" - " info2.m_damping = globalDamping;\n" - " info2.cfm = ¤tConstraintRow->m_cfm;\n" - " info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n" - " info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n" - " info2.m_numIterations = globalNumIterations;\n" - " switch (constraint->m_constraintType)\n" - " {\n" - " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" - " {\n" - " getInfo2Point2Point(constraint,&info2,bodies);\n" - " break;\n" - " }\n" - " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" - " {\n" - " getInfo2Point2Point(constraint,&info2,bodies);\n" - " getInfo2FixedOrientation(constraint,&info2,bodies,3);\n" - " break;\n" - " }\n" - " default:\n" - " {\n" - " }\n" - " }\n" - " ///finalize the constraint setup\n" - " for ( j=0;j<info1;j++)\n" - " {\n" - " __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j];\n" - " if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold)\n" - " {\n" - " solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n" - " }\n" - " if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n" - " {\n" - " solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n" - " }\n" - "// solverConstraint->m_originalContactPoint = constraint;\n" - " \n" - " Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n" - " {\n" - " //float4 angularFactorA(1,1,1);\n" - " float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n" - " solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n" - " }\n" - " \n" - " Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n" - " {\n" - " float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n" - " solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n" - " }\n" - " {\n" - " //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n" - " //because it gets multiplied iMJlB\n" - " float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass;\n" - " float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n" - " float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n" - " float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n" - " float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n" - " sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n" - " sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n" - " sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal);\n" - " float fsum = fabs(sum);\n" - " if (fsum>FLT_EPSILON)\n" - " {\n" - " solverConstraint->m_jacDiagABInv = 1.f/sum;\n" - " } else\n" - " {\n" - " solverConstraint->m_jacDiagABInv = 0.f;\n" - " }\n" - " }\n" - " ///fix rhs\n" - " ///todo: add force/torque accelerators\n" - " {\n" - " float rel_vel;\n" - " float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n" - " float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n" - " rel_vel = vel1Dotn+vel2Dotn;\n" - " float restitution = 0.f;\n" - " float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n" - " float velocityError = restitution - rel_vel * info2.m_damping;\n" - " float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv;\n" - " float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n" - " solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n" - " solverConstraint->m_appliedImpulse = 0.f;\n" - " }\n" - " }\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.cl deleted file mode 100644 index 5c4d62e4ec..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.cl +++ /dev/null @@ -1,501 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define mymake_float4 (float4) -//#define make_float2 (float2) -//#define make_uint4 (uint4) -//#define make_int4 (int4) -//#define make_uint2 (uint2) -//#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// - - - - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = mymake_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - - - - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - - - - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1); - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = mymake_float4(-n.xyz,0.f); - *angular0 = -cross3(r0, n); - *angular1 = cross3(r1, n); -} - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ); - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1); - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/(jmj0+jmj1+jmj2+jmj3); -} - - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB); - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - float4 angular0, angular1, linear; - float4 r0 = cs->m_worldPos[ic] - posA; - float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt ); - updated = min2( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - *linVelA += linImp0; - *angVelA += angImp0; - *linVelB += linImp1; - *angVelB += angImp1; - } -} - -void btPlaneSpace1 (const float4* n, float4* p, float4* q); - void btPlaneSpace1 (const float4* n, float4* p, float4* q) -{ - if (fabs(n[0].z) > 0.70710678f) { - // choose p in y-z plane - float a = n[0].y*n[0].y + n[0].z*n[0].z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n[0].z*k; - p[0].z = n[0].y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n[0].x*p[0].z; - q[0].z = n[0].x*p[0].y; - } - else { - // choose p in x-y plane - float a = n[0].x*n[0].x + n[0].y*n[0].y; - float k = 1.f/sqrt(a); - p[0].x = -n[0].y*k; - p[0].y = n[0].x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n[0].z*p[0].y; - q[0].y = n[0].z*p[0].x; - q[0].z = a*k; - } -} - -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs); -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) -{ - //float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, - posB, &linVelB, &angVelB, invMassB, invInertiaB ); - - if (gBodies[aIdx].m_invMass) - { - gBodies[aIdx].m_linVel = linVelA; - gBodies[aIdx].m_angVel = angVelA; - } else - { - gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0); - - } - if (gBodies[bIdx].m_invMass) - { - gBodies[bIdx].m_linVel = linVelB; - gBodies[bIdx].m_angVel = angVelB; - } else - { - gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0); - - } - -} - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void BatchSolveKernelContact(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - __global int* gN, - __global int* gOffsets, - __global int* batchSizes, - int maxBatch1, - int cellBatch, - int4 nSplit - ) -{ - //__local int ldsBatchIdx[WG_SIZE+1]; - __local int ldsCurBatch; - __local int ldsNextBatch; - __local int ldsStart; - - int lIdx = GET_LOCAL_IDX; - int wgIdx = GET_GROUP_IDX; - -// int gIdx = GET_GLOBAL_IDX; -// debugInfo[gIdx].m_valInt0 = gIdx; - //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; - - - - - int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplit.x*nSplit.y)/4)); - int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y); - - //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1); - //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1); - //int cellIdx = xIdx+yIdx*nSplit; - - if( gN[cellIdx] == 0 ) - return; - - int maxBatch = batchSizes[cellIdx]; - - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - - - - if( lIdx == 0 ) - { - ldsCurBatch = 0; - ldsNextBatch = 0; - ldsStart = start; - } - - - GROUP_LDS_BARRIER; - - int idx=ldsStart+lIdx; - while (ldsCurBatch < maxBatch) - { - for(; idx<end; ) - { - if (gConstraints[idx].m_batchIdx == ldsCurBatch) - { - solveContactConstraint( gBodies, gShapes, &gConstraints[idx] ); - - idx+=64; - } else - { - break; - } - } - GROUP_LDS_BARRIER; - - if( lIdx == 0 ) - { - ldsCurBatch++; - } - GROUP_LDS_BARRIER; - } - - -} - - - -__kernel void solveSingleContactKernel(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - int cellIdx, - int batchOffset, - int numConstraintsInBatch - ) -{ - - int index = get_global_id(0); - if (index < numConstraintsInBatch) - { - int idx=batchOffset+index; - solveContactConstraint( gBodies, gShapes, &gConstraints[idx] ); - } -} diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h deleted file mode 100644 index 6e14ad51fc..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h +++ /dev/null @@ -1,392 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveContactCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define mymake_float4 (float4)\n" - "//#define make_float2 (float2)\n" - "//#define make_uint4 (uint4)\n" - "//#define make_int4 (int4)\n" - "//#define make_uint2 (uint2)\n" - "//#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = mymake_float4(a.xyz,0.f);\n" - " float4 b1 = mymake_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "///////////////////////////////////////\n" - "// Matrix3x3\n" - "///////////////////////////////////////\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertia;\n" - " Matrix3x3 m_initInvInertia;\n" - "} Shape;\n" - "typedef struct\n" - "{\n" - " float4 m_linear;\n" - " float4 m_worldPos[4];\n" - " float4 m_center; \n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; \n" - " float m_fAppliedRambdaDt[2]; \n" - " u32 m_bodyA;\n" - " u32 m_bodyB;\n" - " int m_batchIdx;\n" - " u32 m_paddings[1];\n" - "} Constraint4;\n" - "typedef struct\n" - "{\n" - " int m_nConstraints;\n" - " int m_start;\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct\n" - "{\n" - " int m_solveFriction;\n" - " int m_maxBatch; // long batch really kills the performance\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBufferBatchSolve;\n" - "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" - "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" - "{\n" - " *linear = mymake_float4(-n.xyz,0.f);\n" - " *angular0 = -cross3(r0, n);\n" - " *angular1 = cross3(r1, n);\n" - "}\n" - "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" - "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" - "{\n" - " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" - "}\n" - "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" - " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" - "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" - " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" - "{\n" - " // linear0,1 are normlized\n" - " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" - " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" - " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" - " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" - " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" - "}\n" - "void solveContact(__global Constraint4* cs,\n" - " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" - " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);\n" - "void solveContact(__global Constraint4* cs,\n" - " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" - " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n" - "{\n" - " float minRambdaDt = 0;\n" - " float maxRambdaDt = FLT_MAX;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = cs->m_worldPos[ic] - posA;\n" - " float4 r1 = cs->m_worldPos[ic] - posB;\n" - " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" - " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" - " *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" - " rambdaDt *= cs->m_jacCoeffInv[ic];\n" - " {\n" - " float prevSum = cs->m_appliedRambdaDt[ic];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt );\n" - " updated = min2( updated, maxRambdaDt );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_appliedRambdaDt[ic] = updated;\n" - " }\n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " *linVelA += linImp0;\n" - " *angVelA += angImp0;\n" - " *linVelB += linImp1;\n" - " *angVelB += angImp1;\n" - " }\n" - "}\n" - "void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" - " void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" - "{\n" - " if (fabs(n[0].z) > 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n[0].z*k;\n" - " p[0].z = n[0].y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n[0].x*p[0].z;\n" - " q[0].z = n[0].x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n[0].y*k;\n" - " p[0].y = n[0].x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n[0].z*p[0].y;\n" - " q[0].y = n[0].z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" - "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" - "{\n" - " //float frictionCoeff = ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - " posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" - " if (gBodies[aIdx].m_invMass)\n" - " {\n" - " gBodies[aIdx].m_linVel = linVelA;\n" - " gBodies[aIdx].m_angVel = angVelA;\n" - " } else\n" - " {\n" - " gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " \n" - " }\n" - " if (gBodies[bIdx].m_invMass)\n" - " {\n" - " gBodies[bIdx].m_linVel = linVelB;\n" - " gBodies[bIdx].m_angVel = angVelB;\n" - " } else\n" - " {\n" - " gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " \n" - " }\n" - "}\n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void BatchSolveKernelContact(__global Body* gBodies,\n" - " __global Shape* gShapes,\n" - " __global Constraint4* gConstraints,\n" - " __global int* gN,\n" - " __global int* gOffsets,\n" - " __global int* batchSizes,\n" - " int maxBatch1,\n" - " int cellBatch,\n" - " int4 nSplit\n" - " )\n" - "{\n" - " //__local int ldsBatchIdx[WG_SIZE+1];\n" - " __local int ldsCurBatch;\n" - " __local int ldsNextBatch;\n" - " __local int ldsStart;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " int wgIdx = GET_GROUP_IDX;\n" - "// int gIdx = GET_GLOBAL_IDX;\n" - "// debugInfo[gIdx].m_valInt0 = gIdx;\n" - " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" - " \n" - " \n" - " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" - " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" - " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" - " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" - " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" - " //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" - " //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" - " //int cellIdx = xIdx+yIdx*nSplit;\n" - " \n" - " if( gN[cellIdx] == 0 ) \n" - " return;\n" - " int maxBatch = batchSizes[cellIdx];\n" - " \n" - " \n" - " const int start = gOffsets[cellIdx];\n" - " const int end = start + gN[cellIdx];\n" - " \n" - " \n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsCurBatch = 0;\n" - " ldsNextBatch = 0;\n" - " ldsStart = start;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " int idx=ldsStart+lIdx;\n" - " while (ldsCurBatch < maxBatch)\n" - " {\n" - " for(; idx<end; )\n" - " {\n" - " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" - " {\n" - " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" - " idx+=64;\n" - " } else\n" - " {\n" - " break;\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsCurBatch++;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " \n" - " \n" - "}\n" - "__kernel void solveSingleContactKernel(__global Body* gBodies,\n" - " __global Shape* gShapes,\n" - " __global Constraint4* gConstraints,\n" - " int cellIdx,\n" - " int batchOffset,\n" - " int numConstraintsInBatch\n" - " )\n" - "{\n" - " int index = get_global_id(0);\n" - " if (index < numConstraintsInBatch)\n" - " {\n" - " int idx=batchOffset+index;\n" - " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" - " } \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl deleted file mode 100644 index 1d70fbbae3..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl +++ /dev/null @@ -1,527 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -//#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define mymake_float4 (float4) -//#define make_float2 (float2) -//#define make_uint4 (uint4) -//#define make_int4 (int4) -//#define make_uint2 (uint2) -//#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// - - - - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = mymake_float4(a.xyz,0.f); - float4 b1 = mymake_float4(b.xyz,0.f); - return dot(a1, b1); -} - - - - -__inline -float4 normalize3(const float4 a) -{ - float4 n = mymake_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - - - - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - - - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - - - - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - - - - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1); - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = mymake_float4(-n.xyz,0.f); - *angular0 = -cross3(r0, n); - *angular1 = cross3(r1, n); -} - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ); - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1); - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/(jmj0+jmj1+jmj2+jmj3); -} -void btPlaneSpace1 (const float4* n, float4* p, float4* q); - void btPlaneSpace1 (const float4* n, float4* p, float4* q) -{ - if (fabs(n[0].z) > 0.70710678f) { - // choose p in y-z plane - float a = n[0].y*n[0].y + n[0].z*n[0].z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n[0].z*k; - p[0].z = n[0].y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n[0].x*p[0].z; - q[0].z = n[0].x*p[0].y; - } - else { - // choose p in x-y plane - float a = n[0].x*n[0].x + n[0].y*n[0].y; - float k = 1.f/sqrt(a); - p[0].x = -n[0].y*k; - p[0].y = n[0].x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n[0].z*p[0].y; - q[0].y = n[0].z*p[0].x; - q[0].z = a*k; - } -} - - -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs); -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs) -{ - float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=ldsCs[0].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - -// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, -// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); - - - { - - __global Constraint4* cs = ldsCs; - - if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; - const float4 center = cs->m_center; - - float4 n = -cs->m_linear; - - float4 tangent[2]; - btPlaneSpace1(&n,&tangent[0],&tangent[1]); - float4 angular0, angular1, linear; - float4 r0 = center - posA; - float4 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ); - rambdaDt *= cs->m_fJacCoeffInv[i]; - - { - float prevSum = cs->m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt[i] ); - updated = min2( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs->m_fAppliedRambdaDt[i] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - { // angular damping for point constraint - float4 ab = normalize3( posB - posA ); - float4 ac = normalize3( center - posA ); - if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = dot3F4( n, angVelA ); - float angNB = dot3F4( n, angVelB ); - - angVelA -= (angNA*0.1f)*n; - angVelB -= (angNB*0.1f)*n; - } - } - } - - - - } - - if (gBodies[aIdx].m_invMass) - { - gBodies[aIdx].m_linVel = linVelA; - gBodies[aIdx].m_angVel = angVelA; - } else - { - gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0); - } - if (gBodies[bIdx].m_invMass) - { - gBodies[bIdx].m_linVel = linVelB; - gBodies[bIdx].m_angVel = angVelB; - } else - { - gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0); - gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0); - } - - -} - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void BatchSolveKernelFriction(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - __global int* gN, - __global int* gOffsets, - __global int* batchSizes, - int maxBatch1, - int cellBatch, - int4 nSplit - ) -{ - //__local int ldsBatchIdx[WG_SIZE+1]; - __local int ldsCurBatch; - __local int ldsNextBatch; - __local int ldsStart; - - int lIdx = GET_LOCAL_IDX; - int wgIdx = GET_GROUP_IDX; - -// int gIdx = GET_GLOBAL_IDX; -// debugInfo[gIdx].m_valInt0 = gIdx; - //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE; - - - int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplit.x*nSplit.y)/4)); - int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y); - - - if( gN[cellIdx] == 0 ) - return; - - int maxBatch = batchSizes[cellIdx]; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - - if( lIdx == 0 ) - { - ldsCurBatch = 0; - ldsNextBatch = 0; - ldsStart = start; - } - - - GROUP_LDS_BARRIER; - - int idx=ldsStart+lIdx; - while (ldsCurBatch < maxBatch) - { - for(; idx<end; ) - { - if (gConstraints[idx].m_batchIdx == ldsCurBatch) - { - - solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] ); - - idx+=64; - } else - { - break; - } - } - GROUP_LDS_BARRIER; - if( lIdx == 0 ) - { - ldsCurBatch++; - } - GROUP_LDS_BARRIER; - } - - -} - - - - - - -__kernel void solveSingleFrictionKernel(__global Body* gBodies, - __global Shape* gShapes, - __global Constraint4* gConstraints, - int cellIdx, - int batchOffset, - int numConstraintsInBatch - ) -{ - - int index = get_global_id(0); - if (index < numConstraintsInBatch) - { - - int idx=batchOffset+index; - - solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] ); - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h deleted file mode 100644 index 9707cdb25d..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h +++ /dev/null @@ -1,420 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveFrictionCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define mymake_float4 (float4)\n" - "//#define make_float2 (float2)\n" - "//#define make_uint4 (uint4)\n" - "//#define make_int4 (int4)\n" - "//#define make_uint2 (uint2)\n" - "//#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = mymake_float4(a.xyz,0.f);\n" - " float4 b1 = mymake_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "///////////////////////////////////////\n" - "// Matrix3x3\n" - "///////////////////////////////////////\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertia;\n" - " Matrix3x3 m_initInvInertia;\n" - "} Shape;\n" - "typedef struct\n" - "{\n" - " float4 m_linear;\n" - " float4 m_worldPos[4];\n" - " float4 m_center; \n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; \n" - " float m_fAppliedRambdaDt[2]; \n" - " u32 m_bodyA;\n" - " u32 m_bodyB;\n" - " int m_batchIdx;\n" - " u32 m_paddings[1];\n" - "} Constraint4;\n" - "typedef struct\n" - "{\n" - " int m_nConstraints;\n" - " int m_start;\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct\n" - "{\n" - " int m_solveFriction;\n" - " int m_maxBatch; // long batch really kills the performance\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBufferBatchSolve;\n" - "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" - "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" - "{\n" - " *linear = mymake_float4(-n.xyz,0.f);\n" - " *angular0 = -cross3(r0, n);\n" - " *angular1 = cross3(r1, n);\n" - "}\n" - "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" - "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" - "{\n" - " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" - "}\n" - "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" - " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" - "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" - " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" - "{\n" - " // linear0,1 are normlized\n" - " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" - " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" - " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" - " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" - " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" - "}\n" - "void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" - " void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" - "{\n" - " if (fabs(n[0].z) > 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n[0].z*k;\n" - " p[0].z = n[0].y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n[0].x*p[0].z;\n" - " q[0].z = n[0].x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n[0].y*k;\n" - " p[0].y = n[0].x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n[0].z*p[0].y;\n" - " q[0].y = n[0].z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" - "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" - "{\n" - " float frictionCoeff = ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " \n" - " {\n" - " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" - " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" - " float sum = 0;\n" - " for(int j=0; j<4; j++)\n" - " {\n" - " sum +=ldsCs[0].m_appliedRambdaDt[j];\n" - " }\n" - " frictionCoeff = 0.7f;\n" - " for(int j=0; j<4; j++)\n" - " {\n" - " maxRambdaDt[j] = frictionCoeff*sum;\n" - " minRambdaDt[j] = -maxRambdaDt[j];\n" - " }\n" - " \n" - "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" - " \n" - " \n" - " {\n" - " \n" - " __global Constraint4* cs = ldsCs;\n" - " \n" - " if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" - " const float4 center = cs->m_center;\n" - " \n" - " float4 n = -cs->m_linear;\n" - " \n" - " float4 tangent[2];\n" - " btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = center - posA;\n" - " float4 r1 = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" - " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA, angVelA, linVelB, angVelB );\n" - " rambdaDt *= cs->m_fJacCoeffInv[i];\n" - " \n" - " {\n" - " float prevSum = cs->m_fAppliedRambdaDt[i];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt[i] );\n" - " updated = min2( updated, maxRambdaDt[i] );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_fAppliedRambdaDt[i] = updated;\n" - " }\n" - " \n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " \n" - " linVelA += linImp0;\n" - " angVelA += angImp0;\n" - " linVelB += linImp1;\n" - " angVelB += angImp1;\n" - " }\n" - " { // angular damping for point constraint\n" - " float4 ab = normalize3( posB - posA );\n" - " float4 ac = normalize3( center - posA );\n" - " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" - " {\n" - " float angNA = dot3F4( n, angVelA );\n" - " float angNB = dot3F4( n, angVelB );\n" - " \n" - " angVelA -= (angNA*0.1f)*n;\n" - " angVelB -= (angNB*0.1f)*n;\n" - " }\n" - " }\n" - " }\n" - " \n" - " \n" - " }\n" - " if (gBodies[aIdx].m_invMass)\n" - " {\n" - " gBodies[aIdx].m_linVel = linVelA;\n" - " gBodies[aIdx].m_angVel = angVelA;\n" - " } else\n" - " {\n" - " gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " }\n" - " if (gBodies[bIdx].m_invMass)\n" - " {\n" - " gBodies[bIdx].m_linVel = linVelB;\n" - " gBodies[bIdx].m_angVel = angVelB;\n" - " } else\n" - " {\n" - " gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" - " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" - " }\n" - " \n" - "}\n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void BatchSolveKernelFriction(__global Body* gBodies,\n" - " __global Shape* gShapes,\n" - " __global Constraint4* gConstraints,\n" - " __global int* gN,\n" - " __global int* gOffsets,\n" - " __global int* batchSizes,\n" - " int maxBatch1,\n" - " int cellBatch,\n" - " int4 nSplit\n" - " )\n" - "{\n" - " //__local int ldsBatchIdx[WG_SIZE+1];\n" - " __local int ldsCurBatch;\n" - " __local int ldsNextBatch;\n" - " __local int ldsStart;\n" - " int lIdx = GET_LOCAL_IDX;\n" - " int wgIdx = GET_GROUP_IDX;\n" - "// int gIdx = GET_GLOBAL_IDX;\n" - "// debugInfo[gIdx].m_valInt0 = gIdx;\n" - " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" - " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" - " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" - " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" - " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" - " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" - " \n" - " if( gN[cellIdx] == 0 ) \n" - " return;\n" - " int maxBatch = batchSizes[cellIdx];\n" - " const int start = gOffsets[cellIdx];\n" - " const int end = start + gN[cellIdx];\n" - " \n" - " if( lIdx == 0 )\n" - " {\n" - " ldsCurBatch = 0;\n" - " ldsNextBatch = 0;\n" - " ldsStart = start;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " int idx=ldsStart+lIdx;\n" - " while (ldsCurBatch < maxBatch)\n" - " {\n" - " for(; idx<end; )\n" - " {\n" - " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" - " {\n" - " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" - " idx+=64;\n" - " } else\n" - " {\n" - " break;\n" - " }\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " if( lIdx == 0 )\n" - " {\n" - " ldsCurBatch++;\n" - " }\n" - " GROUP_LDS_BARRIER;\n" - " }\n" - " \n" - " \n" - "}\n" - "__kernel void solveSingleFrictionKernel(__global Body* gBodies,\n" - " __global Shape* gShapes,\n" - " __global Constraint4* gConstraints,\n" - " int cellIdx,\n" - " int batchOffset,\n" - " int numConstraintsInBatch\n" - " )\n" - "{\n" - " int index = get_global_id(0);\n" - " if (index < numConstraintsInBatch)\n" - " {\n" - " \n" - " int idx=batchOffset+index;\n" - " \n" - " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" - " } \n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl deleted file mode 100644 index 8e2de7b5a6..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl +++ /dev/null @@ -1,277 +0,0 @@ - -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - - - -#define WG_SIZE 64 - - - - - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - - - - - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - - - -typedef struct -{ - int m_nContacts; - float m_dt; - float m_positionDrift; - float m_positionConstraintCoeff; -} ConstBufferCTC; - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, -int nContacts, -float dt, -float positionDrift, -float positionConstraintCoeff -) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - - b3ContactConstraint4_t cs; - - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff, - &cs ); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -} - - - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h deleted file mode 100644 index d53db03181..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ /dev/null @@ -1,702 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetupCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#ifndef B3_CONTACT_CONSTRAINT5_H\n" - "#define B3_CONTACT_CONSTRAINT5_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n" - "struct b3ContactConstraint4\n" - "{\n" - " b3Float4 m_linear;//normal?\n" - " b3Float4 m_worldPos[4];\n" - " b3Float4 m_center; // friction\n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; // friction\n" - " float m_fAppliedRambdaDt[2]; // friction\n" - " unsigned int m_bodyA;\n" - " unsigned int m_bodyB;\n" - " int m_batchIdx;\n" - " unsigned int m_paddings;\n" - "};\n" - "//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n" - "inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n" - "{\n" - " return constraint->m_linear.w; \n" - "}\n" - "#endif //B3_CONTACT_CONSTRAINT5_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n" - " void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n" - "{\n" - " if (b3Fabs(n.z) > 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n.y*n.y + n.z*n.z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n.z*k;\n" - " p[0].z = n.y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n.x*p[0].z;\n" - " q[0].z = n.x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n.x*n.x + n.y*n.y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n.y*k;\n" - " p[0].y = n.x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n.z*p[0].y;\n" - " q[0].y = n.z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - " \n" - "void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n" - "{\n" - " *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n" - " *angular0 = b3Cross3(r0, n);\n" - " *angular1 = -b3Cross3(r1, n);\n" - "}\n" - "float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n" - " b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n" - "{\n" - " return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n" - "}\n" - "float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n" - " float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n" - "{\n" - " // linear0,1 are normlized\n" - " float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n" - " float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" - " float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n" - " float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" - " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" - "}\n" - "void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n" - " b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n" - " __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" - " b3ContactConstraint4_t* dstC )\n" - "{\n" - " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" - " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" - " float dtInv = 1.f/dt;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" - " dstC->m_linear = src->m_worldNormalOnB;\n" - " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " b3Float4 r0 = src->m_worldPosB[ic] - posA;\n" - " b3Float4 r1 = src->m_worldPosB[ic] - posB;\n" - " if( ic >= src->m_worldNormalOnB.w )//npoints\n" - " {\n" - " dstC->m_jacCoeffInv[ic] = 0.f;\n" - " continue;\n" - " }\n" - " float relVelN;\n" - " {\n" - " b3Float4 linear, angular0, angular1;\n" - " setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" - " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" - " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA, angVelA, linVelB, angVelB);\n" - " float e = 0.f;//src->getRestituitionCoeff();\n" - " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" - " dstC->m_b[ic] = e*relVelN;\n" - " //float penetration = src->m_worldPosB[ic].w;\n" - " dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " }\n" - " if( src->m_worldNormalOnB.w > 0 )//npoints\n" - " { // prepare friction\n" - " b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" - " for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" - " center += src->m_worldPosB[i];\n" - " center /= (float)src->m_worldNormalOnB.w;\n" - " b3Float4 tangent[2];\n" - " b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" - " \n" - " b3Float4 r[2];\n" - " r[0] = center - posA;\n" - " r[1] = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " b3Float4 linear, angular0, angular1;\n" - " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" - " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" - " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" - " }\n" - " dstC->m_center = center;\n" - " }\n" - " for(int i=0; i<4; i++)\n" - " {\n" - " if( i<src->m_worldNormalOnB.w )\n" - " {\n" - " dstC->m_worldPos[i] = src->m_worldPosB[i];\n" - " }\n" - " else\n" - " {\n" - " dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" - " }\n" - " }\n" - "}\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float fastSqrtf(float f2)\n" - "{\n" - " return native_sqrt(f2);\n" - "// return sqrt(f2);\n" - "}\n" - "__inline\n" - "float fastRSqrt(float f2)\n" - "{\n" - " return native_rsqrt(f2);\n" - "}\n" - "__inline\n" - "float fastLength4(float4 v)\n" - "{\n" - " return fast_length(v);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float sqrtf(float a)\n" - "{\n" - "// return sqrt(a);\n" - " return native_sqrt(a);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float length3(const float4 a)\n" - "{\n" - " return sqrtf(dot3F4(a,a));\n" - "}\n" - "__inline\n" - "float dot4(const float4 a, const float4 b)\n" - "{\n" - " return dot( a, b );\n" - "}\n" - "// for height\n" - "__inline\n" - "float dot3w1(const float4 point, const float4 eqn)\n" - "{\n" - " return dot3F4(point,eqn) + eqn.w;\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 normalize4(const float4 a)\n" - "{\n" - " float length = sqrtf(dot4(a, a));\n" - " return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" - "{\n" - " float4 eqn;\n" - " float4 ab = b-a;\n" - " float4 ac = c-a;\n" - " eqn = normalize3( cross3(ab, ac) );\n" - " eqn.w = -dot3F4(eqn,a);\n" - " return eqn;\n" - "}\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " int m_nConstraints;\n" - " int m_start;\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct\n" - "{\n" - " int m_solveFriction;\n" - " int m_maxBatch; // long batch really kills the performance\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBufferBatchSolve;\n" - " \n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "typedef struct\n" - "{\n" - " int m_nContacts;\n" - " float m_dt;\n" - " float m_positionDrift;\n" - " float m_positionConstraintCoeff;\n" - "} ConstBufferCTC;\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n" - "int nContacts,\n" - "float dt,\n" - "float positionDrift,\n" - "float positionConstraintCoeff\n" - ")\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nContacts )\n" - " {\n" - " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" - " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n" - " b3ContactConstraint4_t cs;\n" - " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" - " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" - " &cs );\n" - " \n" - " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" - " gConstraintOut[gIdx] = cs;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl deleted file mode 100644 index 3dc48d4350..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl +++ /dev/null @@ -1,613 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a, float4 b) -{ - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - - int m_batchIdx; - u32 m_paddings[1]; -} Constraint4; - - - -typedef struct -{ - int m_nConstraints; - int m_start; - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBuffer; - -typedef struct -{ - int m_solveFriction; - int m_maxBatch; // long batch really kills the performance - int m_batchIdx; - int m_nSplit; -// int m_paddings[1]; -} ConstBufferBatchSolve; - - - - - -typedef struct -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -} SolverDebugInfo; - - - - -// others -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb ) -{ - int nContacts = cb.x; - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int srcIdx = sortData[gIdx].y; - out[gIdx] = in[srcIdx]; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sd; - sd.x = contactsIn[gIdx].m_childIndexB; - sd.y = gIdx; - sortDataOut[gIdx] = sd; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_childIndexA; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - -__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int2 sdIn; - sdIn = sortDataInOut[gIdx]; - int2 sdOut; - sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit; - sdOut.y = sdIn.y; - sortDataInOut[gIdx] = sdOut; - } -} - - - - -typedef struct -{ - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; -} ConstBufferSSD; - - -__constant const int gridTable4x4[] = -{ - 0,1,17,16, - 1,2,18,19, - 17,18,32,3, - 16,19,3,34 -}; - -__constant const int gridTable8x8[] = -{ - 0, 2, 3, 16, 17, 18, 19, 1, - 66, 64, 80, 67, 82, 81, 65, 83, - 131,144,128,130,147,129,145,146, - 208,195,194,192,193,211,210,209, - 21, 22, 23, 5, 4, 6, 7, 20, - 86, 85, 69, 87, 70, 68, 84, 71, - 151,133,149,150,135,148,132,134, - 197,27,214,213,212,199,198,196 - -}; - - - - -#define USE_SPATIAL_BATCHING 1 -#define USE_4x4_GRID 1 - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, -int nContacts,float scale,int4 nSplit,int staticIdx) - -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit; - int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit; - - int aIdx = abs(aPtrAndSignBit ); - int bIdx = abs(bPtrAndSignBit); - - bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx); - bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx); - -#if USE_SPATIAL_BATCHING - int idx = (aStatic)? bIdx: aIdx; - float4 p = gBodies[idx].m_pos; - int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1); - int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1); - int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1); - int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y); - -#else//USE_SPATIAL_BATCHING - #if USE_4x4_GRID - int aa = aIdx&3; - int bb = bIdx&3; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*4; - int newIndex = gridTable4x4[gridIndex]; - #else//USE_4x4_GRID - int aa = aIdx&7; - int bb = bIdx&7; - if (aStatic) - aa = bb; - if (bStatic) - bb = aa; - - int gridIndex = aa + bb*8; - int newIndex = gridTable8x8[gridIndex]; - #endif//USE_4x4_GRID -#endif//USE_SPATIAL_BATCHING - - - gSortDataOut[gIdx].x = newIndex; - gSortDataOut[gIdx].y = gIdx; - } - else - { - gSortDataOut[gIdx].x = 0xffffffff; - } -} - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb ) -{ - int gIdx = GET_GLOBAL_IDX; - if( gIdx < cb.x ) - { - gOut[gIdx] = gIn[gIdx]; - } -} - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h deleted file mode 100644 index 1e6e3579b6..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ /dev/null @@ -1,600 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetup2CL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Takahiro Harada\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float fastSqrtf(float f2)\n" - "{\n" - " return native_sqrt(f2);\n" - "// return sqrt(f2);\n" - "}\n" - "__inline\n" - "float fastRSqrt(float f2)\n" - "{\n" - " return native_rsqrt(f2);\n" - "}\n" - "__inline\n" - "float fastLength4(float4 v)\n" - "{\n" - " return fast_length(v);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float sqrtf(float a)\n" - "{\n" - "// return sqrt(a);\n" - " return native_sqrt(a);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a, float4 b)\n" - "{\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float length3(const float4 a)\n" - "{\n" - " return sqrtf(dot3F4(a,a));\n" - "}\n" - "__inline\n" - "float dot4(const float4 a, const float4 b)\n" - "{\n" - " return dot( a, b );\n" - "}\n" - "// for height\n" - "__inline\n" - "float dot3w1(const float4 point, const float4 eqn)\n" - "{\n" - " return dot3F4(point,eqn) + eqn.w;\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 normalize4(const float4 a)\n" - "{\n" - " float length = sqrtf(dot4(a, a));\n" - " return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" - "{\n" - " float4 eqn;\n" - " float4 ab = b-a;\n" - " float4 ac = c-a;\n" - " eqn = normalize3( cross3(ab, ac) );\n" - " eqn.w = -dot3F4(eqn,a);\n" - " return eqn;\n" - "}\n" - "///////////////////////////////////////\n" - "// Matrix3x3\n" - "///////////////////////////////////////\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "Matrix3x3 mtZero();\n" - "__inline\n" - "Matrix3x3 mtIdentity();\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m);\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "Matrix3x3 mtZero()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(0.f);\n" - " m.m_row[1] = (float4)(0.f);\n" - " m.m_row[2] = (float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtIdentity()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(1,0,0,0);\n" - " m.m_row[1] = (float4)(0,1,0,0);\n" - " m.m_row[2] = (float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m)\n" - "{\n" - " Matrix3x3 out;\n" - " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" - "{\n" - " Matrix3x3 transB;\n" - " transB = mtTranspose( b );\n" - " Matrix3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertia;\n" - " Matrix3x3 m_initInvInertia;\n" - "} Shape;\n" - "typedef struct\n" - "{\n" - " float4 m_linear;\n" - " float4 m_worldPos[4];\n" - " float4 m_center; \n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; \n" - " float m_fAppliedRambdaDt[2]; \n" - " u32 m_bodyA;\n" - " u32 m_bodyB;\n" - " int m_batchIdx;\n" - " u32 m_paddings[1];\n" - "} Constraint4;\n" - "typedef struct\n" - "{\n" - " int m_nConstraints;\n" - " int m_start;\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBuffer;\n" - "typedef struct\n" - "{\n" - " int m_solveFriction;\n" - " int m_maxBatch; // long batch really kills the performance\n" - " int m_batchIdx;\n" - " int m_nSplit;\n" - "// int m_paddings[1];\n" - "} ConstBufferBatchSolve;\n" - " \n" - "typedef struct \n" - "{\n" - " int m_valInt0;\n" - " int m_valInt1;\n" - " int m_valInt2;\n" - " int m_valInt3;\n" - " float m_val0;\n" - " float m_val1;\n" - " float m_val2;\n" - " float m_val3;\n" - "} SolverDebugInfo;\n" - "// others\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" - "{\n" - " int nContacts = cb.x;\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int srcIdx = sortData[gIdx].y;\n" - " out[gIdx] = in[srcIdx];\n" - " }\n" - "}\n" - "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sd;\n" - " sd.x = contactsIn[gIdx].m_childIndexB;\n" - " sd.y = gIdx;\n" - " sortDataOut[gIdx] = sd;\n" - " }\n" - "}\n" - "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sdIn;\n" - " sdIn = sortDataInOut[gIdx];\n" - " int2 sdOut;\n" - " sdOut.x = contactsIn[sdIn.y].m_childIndexA;\n" - " sdOut.y = sdIn.y;\n" - " sortDataInOut[gIdx] = sdOut;\n" - " }\n" - "}\n" - "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sdIn;\n" - " sdIn = sortDataInOut[gIdx];\n" - " int2 sdOut;\n" - " sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;\n" - " sdOut.y = sdIn.y;\n" - " sortDataInOut[gIdx] = sdOut;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < nContacts )\n" - " {\n" - " int2 sdIn;\n" - " sdIn = sortDataInOut[gIdx];\n" - " int2 sdOut;\n" - " sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;\n" - " sdOut.y = sdIn.y;\n" - " sortDataInOut[gIdx] = sdOut;\n" - " }\n" - "}\n" - "typedef struct\n" - "{\n" - " int m_nContacts;\n" - " int m_staticIdx;\n" - " float m_scale;\n" - " int m_nSplit;\n" - "} ConstBufferSSD;\n" - "__constant const int gridTable4x4[] = \n" - "{\n" - " 0,1,17,16,\n" - " 1,2,18,19,\n" - " 17,18,32,3,\n" - " 16,19,3,34\n" - "};\n" - "__constant const int gridTable8x8[] = \n" - "{\n" - " 0, 2, 3, 16, 17, 18, 19, 1,\n" - " 66, 64, 80, 67, 82, 81, 65, 83,\n" - " 131,144,128,130,147,129,145,146,\n" - " 208,195,194,192,193,211,210,209,\n" - " 21, 22, 23, 5, 4, 6, 7, 20,\n" - " 86, 85, 69, 87, 70, 68, 84, 71,\n" - " 151,133,149,150,135,148,132,134,\n" - " 197,27,214,213,212,199,198,196\n" - " \n" - "};\n" - "#define USE_SPATIAL_BATCHING 1\n" - "#define USE_4x4_GRID 1\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" - "int nContacts,float scale,int4 nSplit,int staticIdx)\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nContacts )\n" - " {\n" - " int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n" - " int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n" - " int aIdx = abs(aPtrAndSignBit );\n" - " int bIdx = abs(bPtrAndSignBit);\n" - " bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n" - " bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n" - "#if USE_SPATIAL_BATCHING \n" - " int idx = (aStatic)? bIdx: aIdx;\n" - " float4 p = gBodies[idx].m_pos;\n" - " int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);\n" - " int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);\n" - " int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);\n" - " int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);\n" - " \n" - "#else//USE_SPATIAL_BATCHING\n" - " #if USE_4x4_GRID\n" - " int aa = aIdx&3;\n" - " int bb = bIdx&3;\n" - " if (aStatic)\n" - " aa = bb;\n" - " if (bStatic)\n" - " bb = aa;\n" - " int gridIndex = aa + bb*4;\n" - " int newIndex = gridTable4x4[gridIndex];\n" - " #else//USE_4x4_GRID\n" - " int aa = aIdx&7;\n" - " int bb = bIdx&7;\n" - " if (aStatic)\n" - " aa = bb;\n" - " if (bStatic)\n" - " bb = aa;\n" - " int gridIndex = aa + bb*8;\n" - " int newIndex = gridTable8x8[gridIndex];\n" - " #endif//USE_4x4_GRID\n" - "#endif//USE_SPATIAL_BATCHING\n" - " gSortDataOut[gIdx].x = newIndex;\n" - " gSortDataOut[gIdx].y = gIdx;\n" - " }\n" - " else\n" - " {\n" - " gSortDataOut[gIdx].x = 0xffffffff;\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " if( gIdx < cb.x )\n" - " {\n" - " gOut[gIdx] = gIn[gIdx];\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl deleted file mode 100644 index a21a08c3b4..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.cl +++ /dev/null @@ -1,968 +0,0 @@ -/* -Copyright (c) 2013 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" - -#pragma OPENCL EXTENSION cl_amd_printf : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable -#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable -#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable - - -#ifdef cl_ext_atomic_counters_32 -#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable -#else -#define counter32_t volatile global int* -#endif - -typedef unsigned int u32; -typedef unsigned short u16; -typedef unsigned char u8; - -#define GET_GROUP_IDX get_group_id(0) -#define GET_LOCAL_IDX get_local_id(0) -#define GET_GLOBAL_IDX get_global_id(0) -#define GET_GROUP_SIZE get_local_size(0) -#define GET_NUM_GROUPS get_num_groups(0) -#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) -#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) -#define AtomInc(x) atom_inc(&(x)) -#define AtomInc1(x, out) out = atom_inc(&(x)) -#define AppendInc(x, out) out = atomic_inc(x) -#define AtomAdd(x, value) atom_add(&(x), value) -#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) -#define AtomXhg(x, value) atom_xchg ( &(x), value ) - - -#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) - -#define make_float4 (float4) -#define make_float2 (float2) -#define make_uint4 (uint4) -#define make_int4 (int4) -#define make_uint2 (uint2) -#define make_int2 (int2) - - -#define max2 max -#define min2 min - - -/////////////////////////////////////// -// Vector -/////////////////////////////////////// -__inline -float fastDiv(float numerator, float denominator) -{ - return native_divide(numerator, denominator); -// return numerator/denominator; -} - -__inline -float4 fastDiv4(float4 numerator, float4 denominator) -{ - return native_divide(numerator, denominator); -} - -__inline -float fastSqrtf(float f2) -{ - return native_sqrt(f2); -// return sqrt(f2); -} - -__inline -float fastRSqrt(float f2) -{ - return native_rsqrt(f2); -} - -__inline -float fastLength4(float4 v) -{ - return fast_length(v); -} - -__inline -float4 fastNormalize4(float4 v) -{ - return fast_normalize(v); -} - - -__inline -float sqrtf(float a) -{ -// return sqrt(a); - return native_sqrt(a); -} - -__inline -float4 cross3(float4 a1, float4 b1) -{ - - float4 a=make_float4(a1.xyz,0.f); - float4 b=make_float4(b1.xyz,0.f); - //float4 a=a1; - //float4 b=b1; - return cross(a,b); -} - -__inline -float dot3F4(float4 a, float4 b) -{ - float4 a1 = make_float4(a.xyz,0.f); - float4 b1 = make_float4(b.xyz,0.f); - return dot(a1, b1); -} - -__inline -float length3(const float4 a) -{ - return sqrtf(dot3F4(a,a)); -} - -__inline -float dot4(const float4 a, const float4 b) -{ - return dot( a, b ); -} - -// for height -__inline -float dot3w1(const float4 point, const float4 eqn) -{ - return dot3F4(point,eqn) + eqn.w; -} - -__inline -float4 normalize3(const float4 a) -{ - float4 n = make_float4(a.x, a.y, a.z, 0.f); - return fastNormalize4( n ); -// float length = sqrtf(dot3F4(a, a)); -// return 1.f/length * a; -} - -__inline -float4 normalize4(const float4 a) -{ - float length = sqrtf(dot4(a, a)); - return 1.f/length * a; -} - -__inline -float4 createEquation(const float4 a, const float4 b, const float4 c) -{ - float4 eqn; - float4 ab = b-a; - float4 ac = c-a; - eqn = normalize3( cross3(ab, ac) ); - eqn.w = -dot3F4(eqn,a); - return eqn; -} - -/////////////////////////////////////// -// Matrix3x3 -/////////////////////////////////////// - -typedef struct -{ - float4 m_row[3]; -}Matrix3x3; - -__inline -Matrix3x3 mtZero(); - -__inline -Matrix3x3 mtIdentity(); - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m); - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b); - -__inline -float4 mtMul1(Matrix3x3 a, float4 b); - -__inline -float4 mtMul3(float4 a, Matrix3x3 b); - -__inline -Matrix3x3 mtZero() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(0.f); - m.m_row[1] = (float4)(0.f); - m.m_row[2] = (float4)(0.f); - return m; -} - -__inline -Matrix3x3 mtIdentity() -{ - Matrix3x3 m; - m.m_row[0] = (float4)(1,0,0,0); - m.m_row[1] = (float4)(0,1,0,0); - m.m_row[2] = (float4)(0,0,1,0); - return m; -} - -__inline -Matrix3x3 mtTranspose(Matrix3x3 m) -{ - Matrix3x3 out; - out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f); - out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f); - out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f); - return out; -} - -__inline -Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b) -{ - Matrix3x3 transB; - transB = mtTranspose( b ); - Matrix3x3 ans; - // why this doesn't run when 0ing in the for{} - a.m_row[0].w = 0.f; - a.m_row[1].w = 0.f; - a.m_row[2].w = 0.f; - for(int i=0; i<3; i++) - { -// a.m_row[i].w = 0.f; - ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]); - ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]); - ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]); - ans.m_row[i].w = 0.f; - } - return ans; -} - -__inline -float4 mtMul1(Matrix3x3 a, float4 b) -{ - float4 ans; - ans.x = dot3F4( a.m_row[0], b ); - ans.y = dot3F4( a.m_row[1], b ); - ans.z = dot3F4( a.m_row[2], b ); - ans.w = 0.f; - return ans; -} - -__inline -float4 mtMul3(float4 a, Matrix3x3 b) -{ - float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0); - float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0); - float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0); - - float4 ans; - ans.x = dot3F4( a, colx ); - ans.y = dot3F4( a, coly ); - ans.z = dot3F4( a, colz ); - return ans; -} - -/////////////////////////////////////// -// Quaternion -/////////////////////////////////////// - -typedef float4 Quaternion; - -__inline -Quaternion qtMul(Quaternion a, Quaternion b); - -__inline -Quaternion qtNormalize(Quaternion in); - -__inline -float4 qtRotate(Quaternion q, float4 vec); - -__inline -Quaternion qtInvert(Quaternion q); - - - - - -__inline -Quaternion qtMul(Quaternion a, Quaternion b) -{ - Quaternion ans; - ans = cross3( a, b ); - ans += a.w*b+b.w*a; -// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); - ans.w = a.w*b.w - dot3F4(a, b); - return ans; -} - -__inline -Quaternion qtNormalize(Quaternion in) -{ - return fastNormalize4(in); -// in /= length( in ); -// return in; -} -__inline -float4 qtRotate(Quaternion q, float4 vec) -{ - Quaternion qInv = qtInvert( q ); - float4 vcpy = vec; - vcpy.w = 0.f; - float4 out = qtMul(qtMul(q,vcpy),qInv); - return out; -} - -__inline -Quaternion qtInvert(Quaternion q) -{ - return (Quaternion)(-q.xyz, q.w); -} - -__inline -float4 qtInvRotate(const Quaternion q, float4 vec) -{ - return qtRotate( qtInvert( q ), vec ); -} - - - - -#define WG_SIZE 64 - -typedef struct -{ - float4 m_pos; - Quaternion m_quat; - float4 m_linVel; - float4 m_angVel; - - u32 m_shapeIdx; - float m_invMass; - float m_restituitionCoeff; - float m_frictionCoeff; -} Body; - - - -typedef struct -{ - Matrix3x3 m_invInertia; - Matrix3x3 m_initInvInertia; -} Shape; - -typedef struct -{ - float4 m_linear; - float4 m_worldPos[4]; - float4 m_center; - float m_jacCoeffInv[4]; - float m_b[4]; - float m_appliedRambdaDt[4]; - - float m_fJacCoeffInv[2]; - float m_fAppliedRambdaDt[2]; - - u32 m_bodyA; - u32 m_bodyB; - int m_batchIdx; - u32 m_paddings; -} Constraint4; - - - - - - -__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex) -{ - int i = GET_GLOBAL_IDX; - - if( i < numContactManifolds) - { - int pa = manifoldPtr[i].m_bodyAPtrAndSignBit; - bool isFixedA = (pa <0) || (pa == fixedBodyIndex); - int bodyIndexA = abs(pa); - if (!isFixedA) - { - AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x); - } - barrier(CLK_GLOBAL_MEM_FENCE); - int pb = manifoldPtr[i].m_bodyBPtrAndSignBit; - bool isFixedB = (pb <0) || (pb == fixedBodyIndex); - int bodyIndexB = abs(pb); - if (!isFixedB) - { - AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y); - } - } -} - -__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies) -{ - int i = GET_GLOBAL_IDX; - - if( i < numSplitBodies) - { - linearVelocities[i] = make_float4(0); - angularVelocities[i] = make_float4(0); - } -} - - -__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, -__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) -{ - int i = GET_GLOBAL_IDX; - if (i<numBodies) - { - if (gBodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - float factor = 1.f/((float)count); - float4 averageLinVel = make_float4(0.f); - float4 averageAngVel = make_float4(0.f); - - for (int j=0;j<count;j++) - { - averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor; - averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor; - } - - for (int j=0;j<count;j++) - { - deltaLinearVelocities[bodyOffset+j] = averageLinVel; - deltaAngularVelocities[bodyOffset+j] = averageAngVel; - } - - }//bodies[i].m_invMass - }//i<numBodies -} - - - -void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1) -{ - *linear = make_float4(n.xyz,0.f); - *angular0 = cross3(r0, n); - *angular1 = -cross3(r1, n); -} - - -float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 ) -{ - return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1); -} - - -float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1, - float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB) -{ - // linear0,1 are normlized - float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0; - float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0); - float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1; - float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1); - return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB); -} - - -void btPlaneSpace1 (float4 n, float4* p, float4* q); - void btPlaneSpace1 (float4 n, float4* p, float4* q) -{ - if (fabs(n.z) > 0.70710678f) { - // choose p in y-z plane - float a = n.y*n.y + n.z*n.z; - float k = 1.f/sqrt(a); - p[0].x = 0; - p[0].y = -n.z*k; - p[0].z = n.y*k; - // set q = n x p - q[0].x = a*k; - q[0].y = -n.x*p[0].z; - q[0].z = n.x*p[0].y; - } - else { - // choose p in x-y plane - float a = n.x*n.x + n.y*n.y; - float k = 1.f/sqrt(a); - p[0].x = -n.y*k; - p[0].y = n.x*k; - p[0].z = 0; - // set q = n x p - q[0].x = -n.z*p[0].y; - q[0].y = n.z*p[0].x; - q[0].z = a*k; - } -} - - - - - -void solveContact(__global Constraint4* cs, - float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA, - float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB, - float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB) -{ - float minRambdaDt = 0; - float maxRambdaDt = FLT_MAX; - - for(int ic=0; ic<4; ic++) - { - if( cs->m_jacCoeffInv[ic] == 0.f ) continue; - - float4 angular0, angular1, linear; - float4 r0 = cs->m_worldPos[ic] - posA; - float4 r1 = cs->m_worldPos[ic] - posB; - setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 ); - - - - float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, - *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic]; - rambdaDt *= cs->m_jacCoeffInv[ic]; - - - { - float prevSum = cs->m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt ); - updated = min2( updated, maxRambdaDt ); - rambdaDt = updated - prevSum; - cs->m_appliedRambdaDt[ic] = updated; - } - - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - - if (invMassA) - { - *dLinVelA += linImp0; - *dAngVelA += angImp0; - } - if (invMassB) - { - *dLinVelB += linImp1; - *dAngVelB += angImp1; - } - } -} - - -// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - - -void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, -__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, -__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) -{ - - //float frictionCoeff = ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - float4 dLinVelA = make_float4(0,0,0,0); - float4 dAngVelA = make_float4(0,0,0,0); - float4 dLinVelB = make_float4(0,0,0,0); - float4 dAngVelB = make_float4(0,0,0,0); - - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[0].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - - if (invMassA) - { - dLinVelA = deltaLinearVelocities[splitIndexA]; - dAngVelA = deltaAngularVelocities[splitIndexA]; - } - - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[0].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - - if (invMassB) - { - dLinVelB = deltaLinearVelocities[splitIndexB]; - dAngVelB = deltaAngularVelocities[splitIndexB]; - } - - solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, - posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB); - - if (invMassA) - { - deltaLinearVelocities[splitIndexA] = dLinVelA; - deltaAngularVelocities[splitIndexA] = dAngVelA; - } - if (invMassB) - { - deltaLinearVelocities[splitIndexB] = dLinVelB; - deltaAngularVelocities[splitIndexB] = dAngVelB; - } - -} - - -__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , -__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, -float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds -) -{ - int i = GET_GLOBAL_IDX; - if (i<numManifolds) - { - solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - } -} - - - - -void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, - __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities) -{ - float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w; - int aIdx = ldsCs[0].m_bodyA; - int bIdx = ldsCs[0].m_bodyB; - - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - - float4 dLinVelA = make_float4(0,0,0,0); - float4 dAngVelA = make_float4(0,0,0,0); - float4 dLinVelB = make_float4(0,0,0,0); - float4 dAngVelB = make_float4(0,0,0,0); - - int bodyOffsetA = offsetSplitBodies[aIdx]; - int constraintOffsetA = contactConstraintOffsets[0].x; - int splitIndexA = bodyOffsetA+constraintOffsetA; - - if (invMassA) - { - dLinVelA = deltaLinearVelocities[splitIndexA]; - dAngVelA = deltaAngularVelocities[splitIndexA]; - } - - int bodyOffsetB = offsetSplitBodies[bIdx]; - int constraintOffsetB = contactConstraintOffsets[0].y; - int splitIndexB= bodyOffsetB+constraintOffsetB; - - if (invMassB) - { - dLinVelB = deltaLinearVelocities[splitIndexB]; - dAngVelB = deltaAngularVelocities[splitIndexB]; - } - - - - - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=ldsCs[0].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - - -// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA, -// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt ); - - - { - - __global Constraint4* cs = ldsCs; - - if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return; - const float4 center = cs->m_center; - - float4 n = -cs->m_linear; - - float4 tangent[2]; - btPlaneSpace1(n,&tangent[0],&tangent[1]); - float4 angular0, angular1, linear; - float4 r0 = center - posA; - float4 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB ); - rambdaDt *= cs->m_fJacCoeffInv[i]; - - { - float prevSum = cs->m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = max2( updated, minRambdaDt[i] ); - updated = min2( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs->m_fAppliedRambdaDt[i] = updated; - } - - float4 linImp0 = invMassA*linear*rambdaDt; - float4 linImp1 = invMassB*(-linear)*rambdaDt; - float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt; - float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt; - - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - { // angular damping for point constraint - float4 ab = normalize3( posB - posA ); - float4 ac = normalize3( center - posA ); - if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = dot3F4( n, angVelA ); - float angNB = dot3F4( n, angVelB ); - - dAngVelA -= (angNA*0.1f)*n; - dAngVelB -= (angNB*0.1f)*n; - } - } - } - - - - } - - if (invMassA) - { - deltaLinearVelocities[splitIndexA] = dLinVelA; - deltaAngularVelocities[splitIndexA] = dAngVelA; - } - if (invMassB) - { - deltaLinearVelocities[splitIndexB] = dLinVelB; - deltaAngularVelocities[splitIndexB] = dAngVelB; - } - - -} - - -__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes , - __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, - float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds -) -{ - int i = GET_GLOBAL_IDX; - if (i<numManifolds) - { - solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities); - } -} - - -__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount, - __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies) -{ - int i = GET_GLOBAL_IDX; - if (i<numBodies) - { - if (gBodies[i].m_invMass) - { - int bodyOffset = offsetSplitBodies[i]; - int count = bodyCount[i]; - if (count) - { - gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset]; - gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset]; - } - } - } -} - - - -void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA, - const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, - __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB, - Constraint4* dstC ) -{ - dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit); - dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit); - - float dtInv = 1.f/dt; - for(int ic=0; ic<4; ic++) - { - dstC->m_appliedRambdaDt[ic] = 0.f; - } - dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f; - - - dstC->m_linear = src->m_worldNormalOnB; - dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() ); - for(int ic=0; ic<4; ic++) - { - float4 r0 = src->m_worldPosB[ic] - posA; - float4 r1 = src->m_worldPosB[ic] - posB; - - if( ic >= src->m_worldNormalOnB.w )//npoints - { - dstC->m_jacCoeffInv[ic] = 0.f; - continue; - } - - float relVelN; - { - float4 linear, angular0, angular1; - setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1); - - dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB); - - relVelN = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - - float e = 0.f;//src->getRestituitionCoeff(); - if( relVelN*relVelN < 0.004f ) e = 0.f; - - dstC->m_b[ic] = e*relVelN; - //float penetration = src->m_worldPosB[ic].w; - dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv; - dstC->m_appliedRambdaDt[ic] = 0.f; - } - } - - if( src->m_worldNormalOnB.w > 0 )//npoints - { // prepare friction - float4 center = make_float4(0.f); - for(int i=0; i<src->m_worldNormalOnB.w; i++) - center += src->m_worldPosB[i]; - center /= (float)src->m_worldNormalOnB.w; - - float4 tangent[2]; - btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]); - - float4 r[2]; - r[0] = center - posA; - r[1] = center - posB; - - for(int i=0; i<2; i++) - { - float4 linear, angular0, angular1; - setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1); - - dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1, - invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB); - dstC->m_fAppliedRambdaDt[i] = 0.f; - } - dstC->m_center = center; - } - - for(int i=0; i<4; i++) - { - if( i<src->m_worldNormalOnB.w ) - { - dstC->m_worldPos[i] = src->m_worldPosB[i]; - } - else - { - dstC->m_worldPos[i] = make_float4(0.f); - } - } -} - - -__kernel -__attribute__((reqd_work_group_size(WG_SIZE,1,1))) -void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, -__global const unsigned int* bodyCount, -int nContacts, -float dt, -float positionDrift, -float positionConstraintCoeff -) -{ - int gIdx = GET_GLOBAL_IDX; - - if( gIdx < nContacts ) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - float4 posA = gBodies[aIdx].m_pos; - float4 linVelA = gBodies[aIdx].m_linVel; - float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia; - - float4 posB = gBodies[bIdx].m_pos; - float4 linVelB = gBodies[bIdx].m_linVel; - float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia; - - Constraint4 cs; - - float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1; - float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1; - - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB, - &cs ); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = cs; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h deleted file mode 100644 index f4d98d9941..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ /dev/null @@ -1,908 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverUtilsCL = - "/*\n" - "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#ifndef B3_CONTACT4DATA_H\n" - "#define B3_CONTACT4DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "typedef struct b3Contact4Data b3Contact4Data_t;\n" - "struct b3Contact4Data\n" - "{\n" - " b3Float4 m_worldPosB[4];\n" - "// b3Float4 m_localPosA[4];\n" - "// b3Float4 m_localPosB[4];\n" - " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" - " unsigned short m_restituitionCoeffCmp;\n" - " unsigned short m_frictionCoeffCmp;\n" - " int m_batchIdx;\n" - " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" - " int m_bodyBPtrAndSignBit;\n" - " int m_childIndexA;\n" - " int m_childIndexB;\n" - " int m_unused1;\n" - " int m_unused2;\n" - "};\n" - "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" - "{\n" - " return (int)contact->m_worldNormalOnB.w;\n" - "};\n" - "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" - "{\n" - " contact->m_worldNormalOnB.w = (float)numPoints;\n" - "};\n" - "#endif //B3_CONTACT4DATA_H\n" - "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" - "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" - "#ifdef cl_ext_atomic_counters_32\n" - "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" - "#else\n" - "#define counter32_t volatile global int*\n" - "#endif\n" - "typedef unsigned int u32;\n" - "typedef unsigned short u16;\n" - "typedef unsigned char u8;\n" - "#define GET_GROUP_IDX get_group_id(0)\n" - "#define GET_LOCAL_IDX get_local_id(0)\n" - "#define GET_GLOBAL_IDX get_global_id(0)\n" - "#define GET_GROUP_SIZE get_local_size(0)\n" - "#define GET_NUM_GROUPS get_num_groups(0)\n" - "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" - "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" - "#define AtomInc(x) atom_inc(&(x))\n" - "#define AtomInc1(x, out) out = atom_inc(&(x))\n" - "#define AppendInc(x, out) out = atomic_inc(x)\n" - "#define AtomAdd(x, value) atom_add(&(x), value)\n" - "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" - "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" - "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" - "#define make_float4 (float4)\n" - "#define make_float2 (float2)\n" - "#define make_uint4 (uint4)\n" - "#define make_int4 (int4)\n" - "#define make_uint2 (uint2)\n" - "#define make_int2 (int2)\n" - "#define max2 max\n" - "#define min2 min\n" - "///////////////////////////////////////\n" - "// Vector\n" - "///////////////////////////////////////\n" - "__inline\n" - "float fastDiv(float numerator, float denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "// return numerator/denominator; \n" - "}\n" - "__inline\n" - "float4 fastDiv4(float4 numerator, float4 denominator)\n" - "{\n" - " return native_divide(numerator, denominator); \n" - "}\n" - "__inline\n" - "float fastSqrtf(float f2)\n" - "{\n" - " return native_sqrt(f2);\n" - "// return sqrt(f2);\n" - "}\n" - "__inline\n" - "float fastRSqrt(float f2)\n" - "{\n" - " return native_rsqrt(f2);\n" - "}\n" - "__inline\n" - "float fastLength4(float4 v)\n" - "{\n" - " return fast_length(v);\n" - "}\n" - "__inline\n" - "float4 fastNormalize4(float4 v)\n" - "{\n" - " return fast_normalize(v);\n" - "}\n" - "__inline\n" - "float sqrtf(float a)\n" - "{\n" - "// return sqrt(a);\n" - " return native_sqrt(a);\n" - "}\n" - "__inline\n" - "float4 cross3(float4 a1, float4 b1)\n" - "{\n" - " float4 a=make_float4(a1.xyz,0.f);\n" - " float4 b=make_float4(b1.xyz,0.f);\n" - " //float4 a=a1;\n" - " //float4 b=b1;\n" - " return cross(a,b);\n" - "}\n" - "__inline\n" - "float dot3F4(float4 a, float4 b)\n" - "{\n" - " float4 a1 = make_float4(a.xyz,0.f);\n" - " float4 b1 = make_float4(b.xyz,0.f);\n" - " return dot(a1, b1);\n" - "}\n" - "__inline\n" - "float length3(const float4 a)\n" - "{\n" - " return sqrtf(dot3F4(a,a));\n" - "}\n" - "__inline\n" - "float dot4(const float4 a, const float4 b)\n" - "{\n" - " return dot( a, b );\n" - "}\n" - "// for height\n" - "__inline\n" - "float dot3w1(const float4 point, const float4 eqn)\n" - "{\n" - " return dot3F4(point,eqn) + eqn.w;\n" - "}\n" - "__inline\n" - "float4 normalize3(const float4 a)\n" - "{\n" - " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" - " return fastNormalize4( n );\n" - "// float length = sqrtf(dot3F4(a, a));\n" - "// return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 normalize4(const float4 a)\n" - "{\n" - " float length = sqrtf(dot4(a, a));\n" - " return 1.f/length * a;\n" - "}\n" - "__inline\n" - "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" - "{\n" - " float4 eqn;\n" - " float4 ab = b-a;\n" - " float4 ac = c-a;\n" - " eqn = normalize3( cross3(ab, ac) );\n" - " eqn.w = -dot3F4(eqn,a);\n" - " return eqn;\n" - "}\n" - "///////////////////////////////////////\n" - "// Matrix3x3\n" - "///////////////////////////////////////\n" - "typedef struct\n" - "{\n" - " float4 m_row[3];\n" - "}Matrix3x3;\n" - "__inline\n" - "Matrix3x3 mtZero();\n" - "__inline\n" - "Matrix3x3 mtIdentity();\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m);\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b);\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b);\n" - "__inline\n" - "Matrix3x3 mtZero()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(0.f);\n" - " m.m_row[1] = (float4)(0.f);\n" - " m.m_row[2] = (float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtIdentity()\n" - "{\n" - " Matrix3x3 m;\n" - " m.m_row[0] = (float4)(1,0,0,0);\n" - " m.m_row[1] = (float4)(0,1,0,0);\n" - " m.m_row[2] = (float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtTranspose(Matrix3x3 m)\n" - "{\n" - " Matrix3x3 out;\n" - " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" - "{\n" - " Matrix3x3 transB;\n" - " transB = mtTranspose( b );\n" - " Matrix3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul1(Matrix3x3 a, float4 b)\n" - "{\n" - " float4 ans;\n" - " ans.x = dot3F4( a.m_row[0], b );\n" - " ans.y = dot3F4( a.m_row[1], b );\n" - " ans.z = dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "float4 mtMul3(float4 a, Matrix3x3 b)\n" - "{\n" - " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " float4 ans;\n" - " ans.x = dot3F4( a, colx );\n" - " ans.y = dot3F4( a, coly );\n" - " ans.z = dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "///////////////////////////////////////\n" - "// Quaternion\n" - "///////////////////////////////////////\n" - "typedef float4 Quaternion;\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b);\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in);\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec);\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q);\n" - "__inline\n" - "Quaternion qtMul(Quaternion a, Quaternion b)\n" - "{\n" - " Quaternion ans;\n" - " ans = cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "__inline\n" - "Quaternion qtNormalize(Quaternion in)\n" - "{\n" - " return fastNormalize4(in);\n" - "// in /= length( in );\n" - "// return in;\n" - "}\n" - "__inline\n" - "float4 qtRotate(Quaternion q, float4 vec)\n" - "{\n" - " Quaternion qInv = qtInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "__inline\n" - "Quaternion qtInvert(Quaternion q)\n" - "{\n" - " return (Quaternion)(-q.xyz, q.w);\n" - "}\n" - "__inline\n" - "float4 qtInvRotate(const Quaternion q, float4 vec)\n" - "{\n" - " return qtRotate( qtInvert( q ), vec );\n" - "}\n" - "#define WG_SIZE 64\n" - "typedef struct\n" - "{\n" - " float4 m_pos;\n" - " Quaternion m_quat;\n" - " float4 m_linVel;\n" - " float4 m_angVel;\n" - " u32 m_shapeIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "} Body;\n" - "typedef struct\n" - "{\n" - " Matrix3x3 m_invInertia;\n" - " Matrix3x3 m_initInvInertia;\n" - "} Shape;\n" - "typedef struct\n" - "{\n" - " float4 m_linear;\n" - " float4 m_worldPos[4];\n" - " float4 m_center; \n" - " float m_jacCoeffInv[4];\n" - " float m_b[4];\n" - " float m_appliedRambdaDt[4];\n" - " float m_fJacCoeffInv[2]; \n" - " float m_fAppliedRambdaDt[2]; \n" - " u32 m_bodyA;\n" - " u32 m_bodyB;\n" - " int m_batchIdx;\n" - " u32 m_paddings;\n" - "} Constraint4;\n" - "__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " \n" - " if( i < numContactManifolds)\n" - " {\n" - " int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;\n" - " bool isFixedA = (pa <0) || (pa == fixedBodyIndex);\n" - " int bodyIndexA = abs(pa);\n" - " if (!isFixedA)\n" - " {\n" - " AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);\n" - " }\n" - " barrier(CLK_GLOBAL_MEM_FENCE);\n" - " int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;\n" - " bool isFixedB = (pb <0) || (pb == fixedBodyIndex);\n" - " int bodyIndexB = abs(pb);\n" - " if (!isFixedB)\n" - " {\n" - " AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);\n" - " } \n" - " }\n" - "}\n" - "__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " \n" - " if( i < numSplitBodies)\n" - " {\n" - " linearVelocities[i] = make_float4(0);\n" - " angularVelocities[i] = make_float4(0);\n" - " }\n" - "}\n" - "__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" - "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (i<numBodies)\n" - " {\n" - " if (gBodies[i].m_invMass)\n" - " {\n" - " int bodyOffset = offsetSplitBodies[i];\n" - " int count = bodyCount[i];\n" - " float factor = 1.f/((float)count);\n" - " float4 averageLinVel = make_float4(0.f);\n" - " float4 averageAngVel = make_float4(0.f);\n" - " \n" - " for (int j=0;j<count;j++)\n" - " {\n" - " averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;\n" - " averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;\n" - " }\n" - " \n" - " for (int j=0;j<count;j++)\n" - " {\n" - " deltaLinearVelocities[bodyOffset+j] = averageLinVel;\n" - " deltaAngularVelocities[bodyOffset+j] = averageAngVel;\n" - " }\n" - " \n" - " }//bodies[i].m_invMass\n" - " }//i<numBodies\n" - "}\n" - "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" - "{\n" - " *linear = make_float4(n.xyz,0.f);\n" - " *angular0 = cross3(r0, n);\n" - " *angular1 = -cross3(r1, n);\n" - "}\n" - "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" - "{\n" - " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" - "}\n" - "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" - " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)\n" - "{\n" - " // linear0,1 are normlized\n" - " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" - " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" - " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" - " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" - " return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);\n" - "}\n" - "void btPlaneSpace1 (float4 n, float4* p, float4* q);\n" - " void btPlaneSpace1 (float4 n, float4* p, float4* q)\n" - "{\n" - " if (fabs(n.z) > 0.70710678f) {\n" - " // choose p in y-z plane\n" - " float a = n.y*n.y + n.z*n.z;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = 0;\n" - " p[0].y = -n.z*k;\n" - " p[0].z = n.y*k;\n" - " // set q = n x p\n" - " q[0].x = a*k;\n" - " q[0].y = -n.x*p[0].z;\n" - " q[0].z = n.x*p[0].y;\n" - " }\n" - " else {\n" - " // choose p in x-y plane\n" - " float a = n.x*n.x + n.y*n.y;\n" - " float k = 1.f/sqrt(a);\n" - " p[0].x = -n.y*k;\n" - " p[0].y = n.x*k;\n" - " p[0].z = 0;\n" - " // set q = n x p\n" - " q[0].x = -n.z*p[0].y;\n" - " q[0].y = n.z*p[0].x;\n" - " q[0].z = a*k;\n" - " }\n" - "}\n" - "void solveContact(__global Constraint4* cs,\n" - " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" - " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n" - " float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)\n" - "{\n" - " float minRambdaDt = 0;\n" - " float maxRambdaDt = FLT_MAX;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = cs->m_worldPos[ic] - posA;\n" - " float4 r1 = cs->m_worldPos[ic] - posB;\n" - " setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" - " \n" - " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" - " *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" - " rambdaDt *= cs->m_jacCoeffInv[ic];\n" - " \n" - " {\n" - " float prevSum = cs->m_appliedRambdaDt[ic];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt );\n" - " updated = min2( updated, maxRambdaDt );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_appliedRambdaDt[ic] = updated;\n" - " }\n" - " \n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " \n" - " if (invMassA)\n" - " {\n" - " *dLinVelA += linImp0;\n" - " *dAngVelA += angImp0;\n" - " }\n" - " if (invMassB)\n" - " {\n" - " *dLinVelB += linImp1;\n" - " *dAngVelB += angImp1;\n" - " }\n" - " }\n" - "}\n" - "// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" - "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" - "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" - "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" - "{\n" - " //float frictionCoeff = ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " \n" - " float4 dLinVelA = make_float4(0,0,0,0);\n" - " float4 dAngVelA = make_float4(0,0,0,0);\n" - " float4 dLinVelB = make_float4(0,0,0,0);\n" - " float4 dAngVelB = make_float4(0,0,0,0);\n" - " \n" - " int bodyOffsetA = offsetSplitBodies[aIdx];\n" - " int constraintOffsetA = contactConstraintOffsets[0].x;\n" - " int splitIndexA = bodyOffsetA+constraintOffsetA;\n" - " \n" - " if (invMassA)\n" - " {\n" - " dLinVelA = deltaLinearVelocities[splitIndexA];\n" - " dAngVelA = deltaAngularVelocities[splitIndexA];\n" - " }\n" - " int bodyOffsetB = offsetSplitBodies[bIdx];\n" - " int constraintOffsetB = contactConstraintOffsets[0].y;\n" - " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" - " if (invMassB)\n" - " {\n" - " dLinVelB = deltaLinearVelocities[splitIndexB];\n" - " dAngVelB = deltaAngularVelocities[splitIndexB];\n" - " }\n" - " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - " posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n" - " if (invMassA)\n" - " {\n" - " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" - " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" - " } \n" - " if (invMassB)\n" - " {\n" - " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" - " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" - " }\n" - "}\n" - "__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" - "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" - "float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" - ")\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (i<numManifolds)\n" - " {\n" - " solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" - " }\n" - "}\n" - "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,\n" - " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" - " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" - "{\n" - " float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;\n" - " int aIdx = ldsCs[0].m_bodyA;\n" - " int bIdx = ldsCs[0].m_bodyB;\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " \n" - " float4 dLinVelA = make_float4(0,0,0,0);\n" - " float4 dAngVelA = make_float4(0,0,0,0);\n" - " float4 dLinVelB = make_float4(0,0,0,0);\n" - " float4 dAngVelB = make_float4(0,0,0,0);\n" - " \n" - " int bodyOffsetA = offsetSplitBodies[aIdx];\n" - " int constraintOffsetA = contactConstraintOffsets[0].x;\n" - " int splitIndexA = bodyOffsetA+constraintOffsetA;\n" - " \n" - " if (invMassA)\n" - " {\n" - " dLinVelA = deltaLinearVelocities[splitIndexA];\n" - " dAngVelA = deltaAngularVelocities[splitIndexA];\n" - " }\n" - " int bodyOffsetB = offsetSplitBodies[bIdx];\n" - " int constraintOffsetB = contactConstraintOffsets[0].y;\n" - " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" - " if (invMassB)\n" - " {\n" - " dLinVelB = deltaLinearVelocities[splitIndexB];\n" - " dAngVelB = deltaAngularVelocities[splitIndexB];\n" - " }\n" - " {\n" - " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" - " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" - " float sum = 0;\n" - " for(int j=0; j<4; j++)\n" - " {\n" - " sum +=ldsCs[0].m_appliedRambdaDt[j];\n" - " }\n" - " frictionCoeff = 0.7f;\n" - " for(int j=0; j<4; j++)\n" - " {\n" - " maxRambdaDt[j] = frictionCoeff*sum;\n" - " minRambdaDt[j] = -maxRambdaDt[j];\n" - " }\n" - " \n" - "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" - "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" - " \n" - " \n" - " {\n" - " \n" - " __global Constraint4* cs = ldsCs;\n" - " \n" - " if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" - " const float4 center = cs->m_center;\n" - " \n" - " float4 n = -cs->m_linear;\n" - " \n" - " float4 tangent[2];\n" - " btPlaneSpace1(n,&tangent[0],&tangent[1]);\n" - " float4 angular0, angular1, linear;\n" - " float4 r0 = center - posA;\n" - " float4 r1 = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" - " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );\n" - " rambdaDt *= cs->m_fJacCoeffInv[i];\n" - " \n" - " {\n" - " float prevSum = cs->m_fAppliedRambdaDt[i];\n" - " float updated = prevSum;\n" - " updated += rambdaDt;\n" - " updated = max2( updated, minRambdaDt[i] );\n" - " updated = min2( updated, maxRambdaDt[i] );\n" - " rambdaDt = updated - prevSum;\n" - " cs->m_fAppliedRambdaDt[i] = updated;\n" - " }\n" - " \n" - " float4 linImp0 = invMassA*linear*rambdaDt;\n" - " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" - " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" - " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" - " \n" - " dLinVelA += linImp0;\n" - " dAngVelA += angImp0;\n" - " dLinVelB += linImp1;\n" - " dAngVelB += angImp1;\n" - " }\n" - " { // angular damping for point constraint\n" - " float4 ab = normalize3( posB - posA );\n" - " float4 ac = normalize3( center - posA );\n" - " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" - " {\n" - " float angNA = dot3F4( n, angVelA );\n" - " float angNB = dot3F4( n, angVelB );\n" - " \n" - " dAngVelA -= (angNA*0.1f)*n;\n" - " dAngVelB -= (angNB*0.1f)*n;\n" - " }\n" - " }\n" - " }\n" - " \n" - " \n" - " }\n" - " if (invMassA)\n" - " {\n" - " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" - " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" - " } \n" - " if (invMassB)\n" - " {\n" - " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" - " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" - " }\n" - " \n" - "}\n" - "__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" - " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" - " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" - " float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" - ")\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (i<numManifolds)\n" - " {\n" - " solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" - " }\n" - "}\n" - "__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" - " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" - "{\n" - " int i = GET_GLOBAL_IDX;\n" - " if (i<numBodies)\n" - " {\n" - " if (gBodies[i].m_invMass)\n" - " {\n" - " int bodyOffset = offsetSplitBodies[i];\n" - " int count = bodyCount[i];\n" - " if (count)\n" - " {\n" - " gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset];\n" - " gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset];\n" - " }\n" - " }\n" - " }\n" - "}\n" - "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" - " const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" - " __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" - " Constraint4* dstC )\n" - "{\n" - " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" - " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" - " float dtInv = 1.f/dt;\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" - " dstC->m_linear = src->m_worldNormalOnB;\n" - " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" - " for(int ic=0; ic<4; ic++)\n" - " {\n" - " float4 r0 = src->m_worldPosB[ic] - posA;\n" - " float4 r1 = src->m_worldPosB[ic] - posB;\n" - " if( ic >= src->m_worldNormalOnB.w )//npoints\n" - " {\n" - " dstC->m_jacCoeffInv[ic] = 0.f;\n" - " continue;\n" - " }\n" - " float relVelN;\n" - " {\n" - " float4 linear, angular0, angular1;\n" - " setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" - " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" - " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" - " linVelA, angVelA, linVelB, angVelB);\n" - " float e = 0.f;//src->getRestituitionCoeff();\n" - " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" - " dstC->m_b[ic] = e*relVelN;\n" - " //float penetration = src->m_worldPosB[ic].w;\n" - " dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" - " dstC->m_appliedRambdaDt[ic] = 0.f;\n" - " }\n" - " }\n" - " if( src->m_worldNormalOnB.w > 0 )//npoints\n" - " { // prepare friction\n" - " float4 center = make_float4(0.f);\n" - " for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" - " center += src->m_worldPosB[i];\n" - " center /= (float)src->m_worldNormalOnB.w;\n" - " float4 tangent[2];\n" - " btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" - " \n" - " float4 r[2];\n" - " r[0] = center - posA;\n" - " r[1] = center - posB;\n" - " for(int i=0; i<2; i++)\n" - " {\n" - " float4 linear, angular0, angular1;\n" - " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" - " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" - " invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" - " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" - " }\n" - " dstC->m_center = center;\n" - " }\n" - " for(int i=0; i<4; i++)\n" - " {\n" - " if( i<src->m_worldNormalOnB.w )\n" - " {\n" - " dstC->m_worldPos[i] = src->m_worldPosB[i];\n" - " }\n" - " else\n" - " {\n" - " dstC->m_worldPos[i] = make_float4(0.f);\n" - " }\n" - " }\n" - "}\n" - "__kernel\n" - "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" - "void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" - "__global const unsigned int* bodyCount,\n" - "int nContacts,\n" - "float dt,\n" - "float positionDrift,\n" - "float positionConstraintCoeff\n" - ")\n" - "{\n" - " int gIdx = GET_GLOBAL_IDX;\n" - " \n" - " if( gIdx < nContacts )\n" - " {\n" - " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" - " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" - " float4 posA = gBodies[aIdx].m_pos;\n" - " float4 linVelA = gBodies[aIdx].m_linVel;\n" - " float4 angVelA = gBodies[aIdx].m_angVel;\n" - " float invMassA = gBodies[aIdx].m_invMass;\n" - " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" - " float4 posB = gBodies[bIdx].m_pos;\n" - " float4 linVelB = gBodies[bIdx].m_linVel;\n" - " float4 angVelB = gBodies[bIdx].m_angVel;\n" - " float invMassB = gBodies[bIdx].m_invMass;\n" - " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" - " Constraint4 cs;\n" - " float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;\n" - " float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;\n" - " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" - " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" - " &cs );\n" - " \n" - " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" - " gConstraintOut[gIdx] = cs;\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl deleted file mode 100644 index ba8ba735d0..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.cl +++ /dev/null @@ -1,22 +0,0 @@ - - -#include "Bullet3Collision/NarrowPhaseCollision/shared/b3UpdateAabbs.h" - - -__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB) -{ - int nodeID = get_global_id(0); - if( nodeID < numNodes ) - { - b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB); - } -} - -__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs) -{ - int pairId = get_global_id(0); - if( pairId< numPairs ) - { - pairs[pairId].z = 0xffffffff; - } -}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h deleted file mode 100644 index bb949b2027..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ /dev/null @@ -1,482 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* updateAabbsKernelCL = - "#ifndef B3_UPDATE_AABBS_H\n" - "#define B3_UPDATE_AABBS_H\n" - "#ifndef B3_AABB_H\n" - "#define B3_AABB_H\n" - "#ifndef B3_FLOAT4_H\n" - "#define B3_FLOAT4_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#define B3_PLATFORM_DEFINITIONS_H\n" - "struct MyTest\n" - "{\n" - " int bla;\n" - "};\n" - "#ifdef __cplusplus\n" - "#else\n" - "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" - "#define B3_LARGE_FLOAT 1e18f\n" - "#define B3_INFINITY 1e18f\n" - "#define b3Assert(a)\n" - "#define b3ConstArray(a) __global const a*\n" - "#define b3AtomicInc atomic_inc\n" - "#define b3AtomicAdd atomic_add\n" - "#define b3Fabs fabs\n" - "#define b3Sqrt native_sqrt\n" - "#define b3Sin native_sin\n" - "#define b3Cos native_cos\n" - "#define B3_STATIC\n" - "#endif\n" - "#endif\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Float4;\n" - " #define b3Float4ConstArg const b3Float4\n" - " #define b3MakeFloat4 (float4)\n" - " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return dot(a1, b1);\n" - " }\n" - " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" - " {\n" - " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" - " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" - " return cross(a1, b1);\n" - " }\n" - " #define b3MinFloat4 min\n" - " #define b3MaxFloat4 max\n" - " #define b3Normalized(a) normalize(a)\n" - "#endif \n" - " \n" - "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" - "{\n" - " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" - " return false;\n" - " return true;\n" - "}\n" - "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" - "{\n" - " float maxDot = -B3_INFINITY;\n" - " int i = 0;\n" - " int ptIndex = -1;\n" - " for( i = 0; i < vecLen; i++ )\n" - " {\n" - " float dot = b3Dot3F4(vecArray[i],vec);\n" - " \n" - " if( dot > maxDot )\n" - " {\n" - " maxDot = dot;\n" - " ptIndex = i;\n" - " }\n" - " }\n" - " b3Assert(ptIndex>=0);\n" - " if (ptIndex<0)\n" - " {\n" - " ptIndex = 0;\n" - " }\n" - " *dotOut = maxDot;\n" - " return ptIndex;\n" - "}\n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_MAT3x3_H\n" - "#define B3_MAT3x3_H\n" - "#ifndef B3_QUAT_H\n" - "#define B3_QUAT_H\n" - "#ifndef B3_PLATFORM_DEFINITIONS_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - " typedef float4 b3Quat;\n" - " #define b3QuatConstArg const b3Quat\n" - " \n" - " \n" - "inline float4 b3FastNormalize4(float4 v)\n" - "{\n" - " v = (float4)(v.xyz,0.f);\n" - " return fast_normalize(v);\n" - "}\n" - " \n" - "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" - "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" - "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" - "{\n" - " b3Quat ans;\n" - " ans = b3Cross3( a, b );\n" - " ans += a.w*b+b.w*a;\n" - "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" - " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" - " return ans;\n" - "}\n" - "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" - "{\n" - " b3Quat q;\n" - " q=in;\n" - " //return b3FastNormalize4(in);\n" - " float len = native_sqrt(dot(q, q));\n" - " if(len > 0.f)\n" - " {\n" - " q *= 1.f / len;\n" - " }\n" - " else\n" - " {\n" - " q.x = q.y = q.z = 0.f;\n" - " q.w = 1.f;\n" - " }\n" - " return q;\n" - "}\n" - "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " b3Quat qInv = b3QuatInvert( q );\n" - " float4 vcpy = vec;\n" - " vcpy.w = 0.f;\n" - " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" - " return out;\n" - "}\n" - "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" - "{\n" - " return (b3Quat)(-q.xyz, q.w);\n" - "}\n" - "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" - "{\n" - " return b3QuatRotate( b3QuatInvert( q ), vec );\n" - "}\n" - "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" - "{\n" - " return b3QuatRotate( orientation, point ) + (translation);\n" - "}\n" - " \n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "typedef struct\n" - "{\n" - " b3Float4 m_row[3];\n" - "}b3Mat3x3;\n" - "#define b3Mat3x3ConstArg const b3Mat3x3\n" - "#define b3GetRow(m,row) (m.m_row[row])\n" - "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" - "{\n" - " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" - " b3Mat3x3 out;\n" - " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" - " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" - " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" - " out.m_row[0].w = 0.f;\n" - " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" - " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" - " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" - " out.m_row[1].w = 0.f;\n" - " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" - " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" - " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" - " out.m_row[2].w = 0.f;\n" - " return out;\n" - "}\n" - "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = fabs(matIn.m_row[0]);\n" - " out.m_row[1] = fabs(matIn.m_row[1]);\n" - " out.m_row[2] = fabs(matIn.m_row[2]);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtZero();\n" - "__inline\n" - "b3Mat3x3 mtIdentity();\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" - "__inline\n" - "b3Mat3x3 mtZero()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(0.f);\n" - " m.m_row[1] = (b3Float4)(0.f);\n" - " m.m_row[2] = (b3Float4)(0.f);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtIdentity()\n" - "{\n" - " b3Mat3x3 m;\n" - " m.m_row[0] = (b3Float4)(1,0,0,0);\n" - " m.m_row[1] = (b3Float4)(0,1,0,0);\n" - " m.m_row[2] = (b3Float4)(0,0,1,0);\n" - " return m;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" - "{\n" - " b3Mat3x3 out;\n" - " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" - " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" - " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" - " return out;\n" - "}\n" - "__inline\n" - "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" - "{\n" - " b3Mat3x3 transB;\n" - " transB = mtTranspose( b );\n" - " b3Mat3x3 ans;\n" - " // why this doesn't run when 0ing in the for{}\n" - " a.m_row[0].w = 0.f;\n" - " a.m_row[1].w = 0.f;\n" - " a.m_row[2].w = 0.f;\n" - " for(int i=0; i<3; i++)\n" - " {\n" - "// a.m_row[i].w = 0.f;\n" - " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" - " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" - " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" - " ans.m_row[i].w = 0.f;\n" - " }\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" - "{\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a.m_row[0], b );\n" - " ans.y = b3Dot3F4( a.m_row[1], b );\n" - " ans.z = b3Dot3F4( a.m_row[2], b );\n" - " ans.w = 0.f;\n" - " return ans;\n" - "}\n" - "__inline\n" - "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" - "{\n" - " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" - " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" - " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" - " b3Float4 ans;\n" - " ans.x = b3Dot3F4( a, colx );\n" - " ans.y = b3Dot3F4( a, coly );\n" - " ans.z = b3Dot3F4( a, colz );\n" - " return ans;\n" - "}\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3Aabb b3Aabb_t;\n" - "struct b3Aabb\n" - "{\n" - " union\n" - " {\n" - " float m_min[4];\n" - " b3Float4 m_minVec;\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float m_max[4];\n" - " b3Float4 m_maxVec;\n" - " int m_signedMaxIndices[4];\n" - " };\n" - "};\n" - "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" - " b3Float4ConstArg pos,\n" - " b3QuatConstArg orn,\n" - " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" - "{\n" - " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" - " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" - " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" - " b3Mat3x3 m;\n" - " m = b3QuatGetRotationMatrix(orn);\n" - " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" - " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" - " \n" - " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" - " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" - " 0.f);\n" - " *aabbMinOut = center-extent;\n" - " *aabbMaxOut = center+extent;\n" - "}\n" - "/// conservative test for overlap between two aabbs\n" - "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" - " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" - " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" - " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "#endif //B3_AABB_H\n" - "#ifndef B3_COLLIDABLE_H\n" - "#define B3_COLLIDABLE_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "enum b3ShapeTypes\n" - "{\n" - " SHAPE_HEIGHT_FIELD=1,\n" - " SHAPE_CONVEX_HULL=3,\n" - " SHAPE_PLANE=4,\n" - " SHAPE_CONCAVE_TRIMESH=5,\n" - " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" - " SHAPE_SPHERE=7,\n" - " MAX_NUM_SHAPE_TYPES,\n" - "};\n" - "typedef struct b3Collidable b3Collidable_t;\n" - "struct b3Collidable\n" - "{\n" - " union {\n" - " int m_numChildShapes;\n" - " int m_bvhIndex;\n" - " };\n" - " union\n" - " {\n" - " float m_radius;\n" - " int m_compoundBvhIndex;\n" - " };\n" - " int m_shapeType;\n" - " union\n" - " {\n" - " int m_shapeIndex;\n" - " float m_height;\n" - " };\n" - "};\n" - "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" - "struct b3GpuChildShape\n" - "{\n" - " b3Float4 m_childPosition;\n" - " b3Quat m_childOrientation;\n" - " union\n" - " {\n" - " int m_shapeIndex;//used for SHAPE_COMPOUND_OF_CONVEX_HULLS\n" - " int m_capsuleAxis;\n" - " };\n" - " union \n" - " {\n" - " float m_radius;//used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES\n" - " int m_numChildShapes;//used for compound shape\n" - " };\n" - " union \n" - " {\n" - " float m_height;//used for childshape of SHAPE_COMPOUND_OF_CAPSULES\n" - " int m_collidableShapeIndex;\n" - " };\n" - " int m_shapeType;\n" - "};\n" - "struct b3CompoundOverlappingPair\n" - "{\n" - " int m_bodyIndexA;\n" - " int m_bodyIndexB;\n" - "// int m_pairType;\n" - " int m_childShapeIndexA;\n" - " int m_childShapeIndexB;\n" - "};\n" - "#endif //B3_COLLIDABLE_H\n" - "#ifndef B3_RIGIDBODY_DATA_H\n" - "#define B3_RIGIDBODY_DATA_H\n" - "#ifndef B3_FLOAT4_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_FLOAT4_H\n" - "#ifndef B3_QUAT_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif \n" - "#endif //B3_QUAT_H\n" - "#ifndef B3_MAT3x3_H\n" - "#ifdef __cplusplus\n" - "#else\n" - "#endif\n" - "#endif //B3_MAT3x3_H\n" - "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" - "struct b3RigidBodyData\n" - "{\n" - " b3Float4 m_pos;\n" - " b3Quat m_quat;\n" - " b3Float4 m_linVel;\n" - " b3Float4 m_angVel;\n" - " int m_collidableIdx;\n" - " float m_invMass;\n" - " float m_restituitionCoeff;\n" - " float m_frictionCoeff;\n" - "};\n" - "typedef struct b3InertiaData b3InertiaData_t;\n" - "struct b3InertiaData\n" - "{\n" - " b3Mat3x3 m_invInertiaWorld;\n" - " b3Mat3x3 m_initInvInertia;\n" - "};\n" - "#endif //B3_RIGIDBODY_DATA_H\n" - " \n" - "void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n" - "{\n" - " __global const b3RigidBodyData_t* body = &bodies[bodyId];\n" - " b3Float4 position = body->m_pos;\n" - " b3Quat orientation = body->m_quat;\n" - " \n" - " int collidableIndex = body->m_collidableIdx;\n" - " int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" - " \n" - " if (shapeIndex>=0)\n" - " {\n" - " \n" - " b3Aabb_t localAabb = localShapeAABB[collidableIndex];\n" - " b3Aabb_t worldAabb;\n" - " \n" - " b3Float4 aabbAMinOut,aabbAMaxOut; \n" - " float margin = 0.f;\n" - " b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);\n" - " \n" - " worldAabb.m_minVec =aabbAMinOut;\n" - " worldAabb.m_minIndices[3] = bodyId;\n" - " worldAabb.m_maxVec = aabbAMaxOut;\n" - " worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;\n" - " worldAabbs[bodyId] = worldAabb;\n" - " }\n" - "}\n" - "#endif //B3_UPDATE_AABBS_H\n" - "__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB)\n" - "{\n" - " int nodeID = get_global_id(0);\n" - " if( nodeID < numNodes )\n" - " {\n" - " b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB);\n" - " }\n" - "}\n" - "__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs)\n" - "{\n" - " int pairId = get_global_id(0);\n" - " if( pairId< numPairs )\n" - " {\n" - " pairs[pairId].z = 0xffffffff;\n" - " }\n" - "}\n"; |