diff options
author | RĂ©mi Verschelde <rverschelde@gmail.com> | 2018-01-13 14:43:30 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-01-13 14:43:30 +0100 |
commit | a3ee252993e8200c856be3fe664937f9461ee268 (patch) | |
tree | af68e434545e20c538f896e28b73f2db7d626edd /thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp | |
parent | c01575b3125ce1828f0cacb3f9f00286136f373c (diff) | |
parent | e12c89e8c9896b2e5cdd70dbd2d2acb449ff4b94 (diff) |
Merge pull request #15664 from akien-mga/thirdparty
Bugfix updates to various thirdparty libraries
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp | 1366 |
1 files changed, 1366 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp new file mode 100644 index 0000000000..c45fbbdcaa --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp @@ -0,0 +1,1366 @@ + +bool searchIncremental3dSapOnGpu = true; +#include <limits.h> +#include "b3GpuSapBroadphase.h" +#include "Bullet3Common/b3Vector3.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h" + + +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "kernels/sapKernels.h" + +#include "Bullet3Common/b3MinMax.h" + +#define B3_BROADPHASE_SAP_PATH "src/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl" + +/* + + + + + + + b3OpenCLArray<int> m_pairCount; + + + b3OpenCLArray<b3SapAabb> m_allAabbsGPU; + b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU; + + virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU() + { + return m_allAabbsGPU; + } + virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU() + { + return m_allAabbsCPU; + } + + b3OpenCLArray<b3Vector3> m_sum; + b3OpenCLArray<b3Vector3> m_sum2; + b3OpenCLArray<b3Vector3> m_dst; + + b3OpenCLArray<int> m_smallAabbsMappingGPU; + b3AlignedObjectArray<int> m_smallAabbsMappingCPU; + + b3OpenCLArray<int> m_largeAabbsMappingGPU; + b3AlignedObjectArray<int> m_largeAabbsMappingCPU; + + + b3OpenCLArray<b3Int4> m_overlappingPairs; + + //temporary gpu work memory + b3OpenCLArray<b3SortData> m_gpuSmallSortData; + b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs; + + class b3PrefixScanFloat4CL* m_prefixScanFloat4; + */ + +b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType) +:m_context(ctx), +m_device(device), +m_queue(q), + +m_objectMinMaxIndexGPUaxis0(ctx,q), +m_objectMinMaxIndexGPUaxis1(ctx,q), +m_objectMinMaxIndexGPUaxis2(ctx,q), +m_objectMinMaxIndexGPUaxis0prev(ctx,q), +m_objectMinMaxIndexGPUaxis1prev(ctx,q), +m_objectMinMaxIndexGPUaxis2prev(ctx,q), +m_sortedAxisGPU0(ctx,q), +m_sortedAxisGPU1(ctx,q), +m_sortedAxisGPU2(ctx,q), +m_sortedAxisGPU0prev(ctx,q), +m_sortedAxisGPU1prev(ctx,q), +m_sortedAxisGPU2prev(ctx,q), +m_addedHostPairsGPU(ctx,q), +m_removedHostPairsGPU(ctx,q), +m_addedCountGPU(ctx,q), +m_removedCountGPU(ctx,q), +m_currentBuffer(-1), +m_pairCount(ctx,q), +m_allAabbsGPU(ctx,q), +m_sum(ctx,q), +m_sum2(ctx,q), +m_dst(ctx,q), +m_smallAabbsMappingGPU(ctx,q), +m_largeAabbsMappingGPU(ctx,q), +m_overlappingPairs(ctx,q), +m_gpuSmallSortData(ctx,q), +m_gpuSmallSortedAabbs(ctx,q) +{ + const char* sapSrc = sapCL; + + + cl_int errNum=0; + + b3Assert(m_context); + b3Assert(m_device); + cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH); + b3Assert(errNum==CL_SUCCESS); + + + b3Assert(errNum==CL_SUCCESS); +#ifndef __APPLE__ + m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context,m_device,m_queue); +#else + m_prefixScanFloat4 = 0; +#endif + m_sapKernel = 0; + + switch (kernelType) + { + case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU: + { + m_sapKernel=0; + break; + } + case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU: + { + m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBruteForce",&errNum,sapProg ); + break; + } + + case B3_GPU_SAP_KERNEL_ORIGINAL: + { + m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg ); + break; + } + case B3_GPU_SAP_KERNEL_BARRIER: + { + m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg ); + break; + } + case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY: + { + m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg ); + break; + } + + default: + { + m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg ); + b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory"); + } + }; + + + + m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg ); + b3Assert(errNum==CL_SUCCESS); + + m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "prepareSumVarianceKernel",&errNum,sapProg ); + b3Assert(errNum==CL_SUCCESS); + + + m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg ); + + m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg ); + + m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg ); + + m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue); +} + +b3GpuSapBroadphase::~b3GpuSapBroadphase() +{ + delete m_sorter; + delete m_prefixScanFloat4; + + clReleaseKernel(m_scatterKernel); + clReleaseKernel(m_flipFloatKernel); + clReleaseKernel(m_copyAabbsKernel); + clReleaseKernel(m_sapKernel); + clReleaseKernel(m_sap2Kernel); + clReleaseKernel(m_prepareSumVarianceKernel); + + +} + +/// conservative test for overlap between two aabbs +static bool TestAabbAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1, + const b3Vector3 &aabbMin2, const b3Vector3 &aabbMax2) +{ + bool overlap = true; + overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap; + overlap = (aabbMin1.getZ() > aabbMax2.getZ() || aabbMax1.getZ() < aabbMin2.getZ()) ? false : overlap; + overlap = (aabbMin1.getY() > aabbMax2.getY() || aabbMax1.getY() < aabbMin2.getY()) ? false : overlap; + return overlap; +} + + + +//http://stereopsis.com/radix.html +static unsigned int FloatFlip(float fl) +{ + unsigned int f = *(unsigned int*)&fl; + unsigned int mask = -(int)(f >> 31) | 0x80000000; + return f ^ mask; +}; + +void b3GpuSapBroadphase::init3dSap() +{ + if (m_currentBuffer<0) + { + m_allAabbsGPU.copyToHost(m_allAabbsCPU); + + m_currentBuffer = 0; + for (int axis=0;axis<3;axis++) + { + for (int buf=0;buf<2;buf++) + { + int totalNumAabbs = m_allAabbsCPU.size(); + int numEndPoints = 2*totalNumAabbs; + m_sortedAxisCPU[axis][buf].resize(numEndPoints); + + if (buf==m_currentBuffer) + { + for (int i=0;i<totalNumAabbs;i++) + { + m_sortedAxisCPU[axis][buf][i*2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis])-1; + m_sortedAxisCPU[axis][buf][i*2].m_value = i*2; + m_sortedAxisCPU[axis][buf][i*2+1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis])+1; + m_sortedAxisCPU[axis][buf][i*2+1].m_value = i*2+1; + } + } + } + } + + for (int axis=0;axis<3;axis++) + { + m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); + } + + for (int axis=0;axis<3;axis++) + { + //int totalNumAabbs = m_allAabbsCPU.size(); + int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); + m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints); + for (int i=0;i<numEndPoints;i++) + { + int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; + int newDest = destIndex/2; + if (destIndex&1) + { + m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i; + } else + { + m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i; + } + } + } + + } +} + + +static bool b3PairCmp(const b3Int4& p, const b3Int4& q) +{ + return ((p.x<q.x) || ((p.x==q.x) && (p.y<q.y))); +} + + +static bool operator==(const b3Int4& a,const b3Int4& b) +{ + return a.x == b.x && a.y == b.y; +}; + +static bool operator<(const b3Int4& a,const b3Int4& b) +{ + return a.x < b.x || (a.x == b.x && a.y < b.y); +}; + +static bool operator>(const b3Int4& a,const b3Int4& b) +{ + return a.x > b.x || (a.x == b.x && a.y > b.y); +}; + +b3AlignedObjectArray<b3Int4> addedHostPairs; +b3AlignedObjectArray<b3Int4> removedHostPairs; + +b3AlignedObjectArray<b3SapAabb> preAabbs; + +void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap() +{ + //static int framepje = 0; + //printf("framepje=%d\n",framepje++); + + + B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap"); + + addedHostPairs.resize(0); + removedHostPairs.resize(0); + + b3Assert(m_currentBuffer>=0); + + { + preAabbs.resize(m_allAabbsCPU.size()); + for (int i=0;i<preAabbs.size();i++) + { + preAabbs[i]=m_allAabbsCPU[i]; + } + } + + + if (m_currentBuffer<0) + return; + { + B3_PROFILE("m_allAabbsGPU.copyToHost"); + m_allAabbsGPU.copyToHost(m_allAabbsCPU); + } + + b3AlignedObjectArray<b3Int4> allPairs; + { + B3_PROFILE("m_overlappingPairs.copyToHost"); + m_overlappingPairs.copyToHost(allPairs); + } + if (0) + { + { + printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n", + m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1],m_allAabbsCPU[40].m_min[2], + m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1],m_allAabbsCPU[40].m_max[2]); + } + + { + printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n", + m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1],m_allAabbsCPU[53].m_min[2], + m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1],m_allAabbsCPU[53].m_max[2]); + } + + + { + b3Int4 newPair; + newPair.x = 40; + newPair.y = 53; + int index = allPairs.findBinarySearch(newPair); + printf("hasPair(40,53)=%d out of %d\n",index, allPairs.size()); + + { + int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max,(const b3Vector3&)m_allAabbsCPU[53].m_min,(const b3Vector3&)m_allAabbsCPU[53].m_max); + printf("overlap=%d\n",overlap); + } + + if (preAabbs.size()) + { + int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max,(const b3Vector3&)preAabbs[53].m_min,(const b3Vector3&)preAabbs[53].m_max); + printf("prevoverlap=%d\n",prevOverlap); + } else + { + printf("unknown prevoverlap\n"); + } + + } + } + + + if (0) + { + for (int i=0;i<m_allAabbsCPU.size();i++) + { + //printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]); + + + } + + for (int axis=0;axis<3;axis++) + { + for (int buf=0;buf<2;buf++) + { + b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size()*2); + } + } + } + + + + m_currentBuffer = 1-m_currentBuffer; + + + + int totalNumAabbs = m_allAabbsCPU.size(); + + { + B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)"); + for (int i=0;i<totalNumAabbs;i++) + { + + + unsigned int keyMin[3]; + unsigned int keyMax[3]; + for (int axis=0;axis<3;axis++) + { + float vmin=m_allAabbsCPU[i].m_min[axis]; + float vmax = m_allAabbsCPU[i].m_max[axis]; + keyMin[axis] = FloatFlip(vmin); + keyMax[axis] = FloatFlip(vmax); + + m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_key = keyMin[axis]-1; + m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_value = i*2; + m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_key = keyMax[axis]+1; + m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_value = i*2+1; + } + //printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]); + + } + } + + + + { + B3_PROFILE("sort m_sortedAxisCPU"); + for (int axis=0;axis<3;axis++) + m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]); + } + +#if 0 + if (0) + { + for (int axis=0;axis<3;axis++) + { + //printf("axis %d\n",axis); + for (int i=0;i<m_sortedAxisCPU[axis][m_currentBuffer].size();i++) + { + //int key = m_sortedAxisCPU[axis][m_currentBuffer][i].m_key; + //int value = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; + //printf("[%d]=%d\n",i,value); + } + + } + } +#endif + + { + B3_PROFILE("assign m_objectMinMaxIndexCPU"); + for (int axis=0;axis<3;axis++) + { + int totalNumAabbs = m_allAabbsCPU.size(); + int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size(); + m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(totalNumAabbs); + for (int i=0;i<numEndPoints;i++) + { + int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value; + int newDest = destIndex/2; + if (destIndex&1) + { + m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i; + } else + { + m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i; + } + } + } + } + +#if 0 + if (0) + { + printf("==========================\n"); + for (int axis=0;axis<3;axis++) + { + unsigned int curMinIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].x; + unsigned int curMaxIndex40 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][40].y; + unsigned int prevMaxIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].y; + unsigned int prevMinIndex40 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][40].x; + + int dmin40 = curMinIndex40 - prevMinIndex40; + int dmax40 = curMinIndex40 - prevMinIndex40; + printf("axis %d curMinIndex40=%d prevMinIndex40=%d\n",axis,curMinIndex40, prevMinIndex40); + printf("axis %d curMaxIndex40=%d prevMaxIndex40=%d\n",axis,curMaxIndex40, prevMaxIndex40); + } + printf(".........................\n"); + for (int axis=0;axis<3;axis++) + { + unsigned int curMinIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].x; + unsigned int curMaxIndex53 = m_objectMinMaxIndexCPU[axis][m_currentBuffer][53].y; + unsigned int prevMaxIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].y; + unsigned int prevMinIndex53 = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][53].x; + + int dmin40 = curMinIndex53 - prevMinIndex53; + int dmax40 = curMinIndex53 - prevMinIndex53; + printf("axis %d curMinIndex53=%d prevMinIndex53=%d\n",axis,curMinIndex53, prevMinIndex53); + printf("axis %d curMaxIndex53=%d prevMaxIndex53=%d\n",axis,curMaxIndex53, prevMaxIndex53); + } + + } +#endif + + + int a = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); + int b = m_objectMinMaxIndexCPU[1][m_currentBuffer].size(); + int c = m_objectMinMaxIndexCPU[2][m_currentBuffer].size(); + b3Assert(a==b); + b3Assert(b==c); + /* + if (searchIncremental3dSapOnGpu) + { + B3_PROFILE("computePairsIncremental3dSapKernelGPU"); + int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); + int maxCapacity = 1024*1024; + { + B3_PROFILE("copy from host"); + m_objectMinMaxIndexGPUaxis0.copyFromHost(m_objectMinMaxIndexCPU[0][m_currentBuffer]); + m_objectMinMaxIndexGPUaxis1.copyFromHost(m_objectMinMaxIndexCPU[1][m_currentBuffer]); + m_objectMinMaxIndexGPUaxis2.copyFromHost(m_objectMinMaxIndexCPU[2][m_currentBuffer]); + m_objectMinMaxIndexGPUaxis0prev.copyFromHost(m_objectMinMaxIndexCPU[0][1-m_currentBuffer]); + m_objectMinMaxIndexGPUaxis1prev.copyFromHost(m_objectMinMaxIndexCPU[1][1-m_currentBuffer]); + m_objectMinMaxIndexGPUaxis2prev.copyFromHost(m_objectMinMaxIndexCPU[2][1-m_currentBuffer]); + + m_sortedAxisGPU0.copyFromHost(m_sortedAxisCPU[0][m_currentBuffer]); + m_sortedAxisGPU1.copyFromHost(m_sortedAxisCPU[1][m_currentBuffer]); + m_sortedAxisGPU2.copyFromHost(m_sortedAxisCPU[2][m_currentBuffer]); + m_sortedAxisGPU0prev.copyFromHost(m_sortedAxisCPU[0][1-m_currentBuffer]); + m_sortedAxisGPU1prev.copyFromHost(m_sortedAxisCPU[1][1-m_currentBuffer]); + m_sortedAxisGPU2prev.copyFromHost(m_sortedAxisCPU[2][1-m_currentBuffer]); + + + m_addedHostPairsGPU.resize(maxCapacity); + m_removedHostPairsGPU.resize(maxCapacity); + + m_addedCountGPU.resize(0); + m_addedCountGPU.push_back(0); + m_removedCountGPU.resize(0); + m_removedCountGPU.push_back(0); + } + + { + B3_PROFILE("launch1D"); + b3LauncherCL launcher(m_queue, m_computePairsIncremental3dSapKernel,"m_computePairsIncremental3dSapKernel"); + launcher.setBuffer(m_objectMinMaxIndexGPUaxis0.getBufferCL()); + launcher.setBuffer(m_objectMinMaxIndexGPUaxis1.getBufferCL()); + launcher.setBuffer(m_objectMinMaxIndexGPUaxis2.getBufferCL()); + launcher.setBuffer(m_objectMinMaxIndexGPUaxis0prev.getBufferCL()); + launcher.setBuffer(m_objectMinMaxIndexGPUaxis1prev.getBufferCL()); + launcher.setBuffer(m_objectMinMaxIndexGPUaxis2prev.getBufferCL()); + + launcher.setBuffer(m_sortedAxisGPU0.getBufferCL()); + launcher.setBuffer(m_sortedAxisGPU1.getBufferCL()); + launcher.setBuffer(m_sortedAxisGPU2.getBufferCL()); + launcher.setBuffer(m_sortedAxisGPU0prev.getBufferCL()); + launcher.setBuffer(m_sortedAxisGPU1prev.getBufferCL()); + launcher.setBuffer(m_sortedAxisGPU2prev.getBufferCL()); + + + launcher.setBuffer(m_addedHostPairsGPU.getBufferCL()); + launcher.setBuffer(m_removedHostPairsGPU.getBufferCL()); + launcher.setBuffer(m_addedCountGPU.getBufferCL()); + launcher.setBuffer(m_removedCountGPU.getBufferCL()); + launcher.setConst(maxCapacity); + launcher.setConst( numObjects); + launcher.launch1D( numObjects); + clFinish(m_queue); + } + + { + B3_PROFILE("copy to host"); + int addedCountGPU = m_addedCountGPU.at(0); + m_addedHostPairsGPU.resize(addedCountGPU); + m_addedHostPairsGPU.copyToHost(addedHostPairs); + + //printf("addedCountGPU=%d\n",addedCountGPU); + int removedCountGPU = m_removedCountGPU.at(0); + m_removedHostPairsGPU.resize(removedCountGPU); + m_removedHostPairsGPU.copyToHost(removedHostPairs); + //printf("removedCountGPU=%d\n",removedCountGPU); + + } + + + + } + else + */ + { + int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size(); + + B3_PROFILE("actual search"); + for (int i=0;i<numObjects;i++) + { + //int numObjects = m_objectMinMaxIndexCPU[axis][m_currentBuffer].size(); + //int checkObjects[]={40,53}; + //int numCheckObjects = sizeof(checkObjects)/sizeof(int); + + //for (int a=0;a<numCheckObjects ;a++) + + for (int axis=0;axis<3;axis++) + { + //int i = checkObjects[a]; + + unsigned int curMinIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].x; + unsigned int curMaxIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].y; + unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].x; + int dmin = curMinIndex - prevMinIndex; + + unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].y; + + + + int dmax = curMaxIndex - prevMaxIndex; + if (dmin!=0) + { + //printf("for object %d, dmin=%d\n",i,dmin); + } + if (dmax!=0) + { + //printf("for object %d, dmax=%d\n",i,dmax); + } + for (int otherbuffer = 0;otherbuffer<2;otherbuffer++) + { + if (dmin!=0) + { + int stepMin = dmin<0 ? -1 : 1; + for (int j=prevMinIndex;j!=curMinIndex;j+=stepMin) + { + int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; + int otherIndex = otherIndex2/2; + if (otherIndex!=i) + { + bool otherIsMax = ((otherIndex2&1)!=0); + + if (otherIsMax) + { + //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); + //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); + + bool overlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) + overlap=false; + } + + // b3Assert(overlap2==overlap); + + bool prevOverlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x)) + prevOverlap=false; + } + + + //b3Assert(overlap==overlap2); + + + + if (dmin<0) + { + if (overlap && !prevOverlap) + { + //add a pair + b3Int4 newPair; + if (i<=otherIndex) + { + newPair.x = i; + newPair.y = otherIndex; + } else + { + newPair.x = otherIndex; + newPair.y = i; + } + addedHostPairs.push_back(newPair); + } + } + else + { + if (!overlap && prevOverlap) + { + + //remove a pair + b3Int4 removedPair; + if (i<=otherIndex) + { + removedPair.x = i; + removedPair.y = otherIndex; + } else + { + removedPair.x = otherIndex; + removedPair.y = i; + } + removedHostPairs.push_back(removedPair); + } + }//otherisMax + }//if (dmin<0) + }//if (otherIndex!=i) + }//for (int j= + } + + if (dmax!=0) + { + int stepMax = dmax<0 ? -1 : 1; + for (int j=prevMaxIndex;j!=curMaxIndex;j+=stepMax) + { + int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y; + int otherIndex = otherIndex2/2; + if (otherIndex!=i) + { + //bool otherIsMin = ((otherIndex2&1)==0); + //if (otherIsMin) + { + //bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max); + //bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max); + + bool overlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x)) + overlap=false; + } + //b3Assert(overlap2==overlap); + + bool prevOverlap = true; + + for (int ax=0;ax<3;ax++) + { + if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) || + (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x)) + prevOverlap=false; + } + + + if (dmax>0) + { + if (overlap && !prevOverlap) + { + //add a pair + b3Int4 newPair; + if (i<=otherIndex) + { + newPair.x = i; + newPair.y = otherIndex; + } else + { + newPair.x = otherIndex; + newPair.y = i; + } + addedHostPairs.push_back(newPair); + + } + } + else + { + if (!overlap && prevOverlap) + { + //if (otherIndex2&1==0) -> min? + //remove a pair + b3Int4 removedPair; + if (i<=otherIndex) + { + removedPair.x = i; + removedPair.y = otherIndex; + } else + { + removedPair.x = otherIndex; + removedPair.y = i; + } + removedHostPairs.push_back(removedPair); + + } + } + + }//if (dmin<0) + }//if (otherIndex!=i) + }//for (int j= + } + }//for (int otherbuffer + }//for (int axis=0; + }//for (int i=0;i<numObjects + } + + //remove duplicates and add/remove then to existing m_overlappingPairs + + + + { + { + B3_PROFILE("sort allPairs"); + allPairs.quickSort(b3PairCmp); + } + { + B3_PROFILE("sort addedHostPairs"); + addedHostPairs.quickSort(b3PairCmp); + } + { + B3_PROFILE("sort removedHostPairs"); + removedHostPairs.quickSort(b3PairCmp); + } + } + + b3Int4 prevPair; + prevPair.x = -1; + prevPair.y = -1; + + int uniqueRemovedPairs = 0; + + b3AlignedObjectArray<int> removedPositions; + + { + B3_PROFILE("actual removing"); + for (int i=0;i<removedHostPairs.size();i++) + { + b3Int4 removedPair = removedHostPairs[i]; + if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y)) + { + + int index1 = allPairs.findBinarySearch(removedPair); + + //#ifdef _DEBUG + + + + int index2 = allPairs.findLinearSearch(removedPair); + b3Assert(index1==index2); + + //b3Assert(index1!=allPairs.size()); + if (index1<allPairs.size()) + //#endif//_DEBUG + { + uniqueRemovedPairs++; + removedPositions.push_back(index1); + { + //printf("framepje(%d) remove pair(%d):%d,%d\n",framepje,i,removedPair.x,removedPair.y); + } + } + } + prevPair = removedPair; + } + + if (uniqueRemovedPairs) + { + for (int i=0;i<removedPositions.size();i++) + { + allPairs[removedPositions[i]].x = INT_MAX ; + allPairs[removedPositions[i]].y = INT_MAX ; + } + allPairs.quickSort(b3PairCmp); + allPairs.resize(allPairs.size()-uniqueRemovedPairs); + } + } + //if (uniqueRemovedPairs) + // printf("uniqueRemovedPairs=%d\n",uniqueRemovedPairs); + //printf("removedHostPairs.size = %d\n",removedHostPairs.size()); + + prevPair.x = -1; + prevPair.y = -1; + + int uniqueAddedPairs=0; + b3AlignedObjectArray<b3Int4> actualAddedPairs; + + { + B3_PROFILE("actual adding"); + for (int i=0;i<addedHostPairs.size();i++) + { + b3Int4 newPair = addedHostPairs[i]; + if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y)) + { +//#ifdef _DEBUG + int index1 = allPairs.findBinarySearch(newPair); + + + int index2 = allPairs.findLinearSearch(newPair); + b3Assert(index1==index2); + + + b3Assert(index1==allPairs.size()); + if (index1!=allPairs.size()) + { + printf("??\n"); + } + + if (index1==allPairs.size()) +//#endif //_DEBUG + { + uniqueAddedPairs++; + actualAddedPairs.push_back(newPair); + } + } + prevPair = newPair; + } + for (int i=0;i<actualAddedPairs.size();i++) + { + //printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y); + allPairs.push_back(actualAddedPairs[i]); + } + } + + //if (uniqueAddedPairs) + // printf("uniqueAddedPairs=%d\n", uniqueAddedPairs); + + + { + B3_PROFILE("m_overlappingPairs.copyFromHost"); + m_overlappingPairs.copyFromHost(allPairs); + } + + +} + + + + +void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs) +{ + //test +// if (m_currentBuffer>=0) + // return calculateOverlappingPairsHostIncremental3Sap(); + + b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size()); + m_allAabbsGPU.copyToHost(m_allAabbsCPU); + + + + int axis=0; + { + B3_PROFILE("CPU compute best variance axis"); + b3Vector3 s=b3MakeVector3(0,0,0),s2=b3MakeVector3(0,0,0); + int numRigidBodies = m_smallAabbsMappingCPU.size(); + + for(int i=0;i<numRigidBodies;i++) + { + b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; + + b3Vector3 maxAabb=b3MakeVector3(aabb.m_max[0],aabb.m_max[1],aabb.m_max[2]); + b3Vector3 minAabb=b3MakeVector3(aabb.m_min[0],aabb.m_min[1],aabb.m_min[2]); + b3Vector3 centerAabb=(maxAabb+minAabb)*0.5f; + + s += centerAabb; + s2 += centerAabb*centerAabb; + } + b3Vector3 v = s2 - (s*s) / (float)numRigidBodies; + + if(v[1] > v[0]) + axis = 1; + if(v[2] > v[axis]) + axis = 2; + } + + + + + b3AlignedObjectArray<b3Int4> hostPairs; + + { + int numSmallAabbs = m_smallAabbsMappingCPU.size(); + for (int i=0;i<numSmallAabbs;i++) + { + b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; + //float reference = smallAabbi.m_max[axis]; + + for (int j=i+1;j<numSmallAabbs;j++) + { + + b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]]; + + if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, + (b3Vector3&)smallAabbj.m_min,(b3Vector3&)smallAabbj.m_max)) + { + b3Int4 pair; + int a = smallAabbi.m_minIndices[3]; + int b = smallAabbj.m_minIndices[3]; + if (a<=b) + { + pair.x = a;//store the original index in the unsorted aabb array + pair.y = b; + } else + { + pair.x = b;//store the original index in the unsorted aabb array + pair.y = a; + } + hostPairs.push_back(pair); + } + } + } + } + + + { + int numSmallAabbs = m_smallAabbsMappingCPU.size(); + for (int i=0;i<numSmallAabbs;i++) + { + b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]]; + + //float reference = smallAabbi.m_max[axis]; + int numLargeAabbs = m_largeAabbsMappingCPU.size(); + + for (int j=0;j<numLargeAabbs;j++) + { + b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]]; + if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max, + (b3Vector3&)largeAabbj.m_min,(b3Vector3&)largeAabbj.m_max)) + { + b3Int4 pair; + int a = largeAabbj.m_minIndices[3]; + int b = smallAabbi.m_minIndices[3]; + if (a<=b) + { + pair.x = a; + pair.y = b;//store the original index in the unsorted aabb array + } else + { + pair.x = b; + pair.y = a;//store the original index in the unsorted aabb array + } + + hostPairs.push_back(pair); + } + } + } + } + + if (hostPairs.size() > maxPairs) + { + hostPairs.resize(maxPairs); + } + + if (hostPairs.size()) + { + m_overlappingPairs.copyFromHost(hostPairs); + } else + { + m_overlappingPairs.resize(0); + } + + //init3dSap(); + +} + +void b3GpuSapBroadphase::reset() +{ + m_allAabbsGPU.resize(0); + m_allAabbsCPU.resize(0); + + + m_smallAabbsMappingGPU.resize(0); + m_smallAabbsMappingCPU.resize(0); + + m_pairCount.resize(0); + + m_largeAabbsMappingGPU.resize(0); + m_largeAabbsMappingCPU.resize(0); + +} + + +void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs) +{ + if (m_sapKernel==0) + { + calculateOverlappingPairsHost(maxPairs); + return; + } + + //if (m_currentBuffer>=0) + // return calculateOverlappingPairsHostIncremental3Sap(); + + //calculateOverlappingPairsHost(maxPairs); + + B3_PROFILE("GPU 1-axis SAP calculateOverlappingPairs"); + + int axis = 0; + + { + + //bool syncOnHost = false; + + int numSmallAabbs = m_smallAabbsMappingCPU.size(); + if (m_prefixScanFloat4 && numSmallAabbs) + { + B3_PROFILE("GPU compute best variance axis"); + + if (m_dst.size()!=(numSmallAabbs+1)) + { + m_dst.resize(numSmallAabbs+128); + m_sum.resize(numSmallAabbs+128); + m_sum2.resize(numSmallAabbs+128); + m_sum.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow? + m_sum2.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow? + } + + b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel"); + launcher.setBuffer(m_allAabbsGPU.getBufferCL()); + + launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL()); + launcher.setBuffer(m_sum.getBufferCL()); + launcher.setBuffer(m_sum2.getBufferCL()); + launcher.setConst( numSmallAabbs ); + int num = numSmallAabbs; + launcher.launch1D( num); + + + b3Vector3 s; + b3Vector3 s2; + m_prefixScanFloat4->execute(m_sum,m_dst,numSmallAabbs+1,&s); + m_prefixScanFloat4->execute(m_sum2,m_dst,numSmallAabbs+1,&s2); + + b3Vector3 v = s2 - (s*s) / (float)numSmallAabbs; + + if(v[1] > v[0]) + axis = 1; + if(v[2] > v[axis]) + axis = 2; + } + + + + m_gpuSmallSortData.resize(numSmallAabbs); + + +#if 1 + if (m_smallAabbsMappingGPU.size()) + { + + B3_PROFILE("flipFloatKernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), + b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true), + b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numSmallAabbs ); + launcher.setConst( axis ); + + int num = numSmallAabbs; + launcher.launch1D( num); + clFinish(m_queue); + } + + if (m_gpuSmallSortData.size()) + { + B3_PROFILE("gpu radix sort"); + m_sorter->execute(m_gpuSmallSortData); + clFinish(m_queue); + } + + m_gpuSmallSortedAabbs.resize(numSmallAabbs); + if (numSmallAabbs) + { + B3_PROFILE("scatterKernel"); + + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ), + b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true), + b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true), + b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel "); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numSmallAabbs); + int num = numSmallAabbs; + launcher.launch1D( num); + clFinish(m_queue); + + } + + + m_overlappingPairs.resize(maxPairs); + + m_pairCount.resize(0); + m_pairCount.push_back(0); + int numPairs=0; + + { + int numLargeAabbs = m_largeAabbsMappingGPU.size(); + if (numLargeAabbs && numSmallAabbs) + { + //@todo + B3_PROFILE("sap2Kernel"); + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL( m_allAabbsGPU.getBufferCL() ), + b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ), + b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ), + b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), + b3BufferInfoCL(m_pairCount.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numLargeAabbs ); + launcher.setConst( numSmallAabbs); + launcher.setConst( axis ); + launcher.setConst( maxPairs ); +//@todo: use actual maximum work item sizes of the device instead of hardcoded values + launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64); + + numPairs = m_pairCount.at(0); + if (numPairs >maxPairs) + { + b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); + numPairs =maxPairs; + } + } + } + if (m_gpuSmallSortedAabbs.size()) + { + B3_PROFILE("sapKernel"); + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())}; + b3LauncherCL launcher(m_queue, m_sapKernel,"m_sapKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( numSmallAabbs ); + launcher.setConst( axis ); + launcher.setConst( maxPairs ); + + + int num = numSmallAabbs; +#if 0 + int buffSize = launcher.getSerializationBufferSize(); + unsigned char* buf = new unsigned char[buffSize+sizeof(int)]; + for (int i=0;i<buffSize+1;i++) + { + unsigned char* ptr = (unsigned char*)&buf[i]; + *ptr = 0xff; + } + int actualWrite = launcher.serializeArguments(buf,buffSize); + + unsigned char* cptr = (unsigned char*)&buf[buffSize]; + // printf("buf[buffSize] = %d\n",*cptr); + + assert(buf[buffSize]==0xff);//check for buffer overrun + int* ptr = (int*)&buf[buffSize]; + + *ptr = num; + + FILE* f = fopen("m_sapKernelArgs.bin","wb"); + fwrite(buf,buffSize+sizeof(int),1,f); + fclose(f); +#endif// + + launcher.launch1D( num); + clFinish(m_queue); + + numPairs = m_pairCount.at(0); + if (numPairs>maxPairs) + { + b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs); + numPairs = maxPairs; + m_pairCount.resize(0); + m_pairCount.push_back(maxPairs); + } + } + +#else + int numPairs = 0; + + + b3LauncherCL launcher(m_queue, m_sapKernel); + + const char* fileName = "m_sapKernelArgs.bin"; + FILE* f = fopen(fileName,"rb"); + if (f) + { + int sizeInBytes=0; + if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) + { + printf("error, cannot get file size\n"); + exit(0); + } + + unsigned char* buf = (unsigned char*) malloc(sizeInBytes); + fread(buf,sizeInBytes,1,f); + int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); + int num = *(int*)&buf[serializedBytes]; + launcher.launch1D( num); + + b3OpenCLArray<int> pairCount(m_context, m_queue); + int numElements = launcher.m_arrays[2]->size()/sizeof(int); + pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements); + numPairs = pairCount.at(0); + //printf("overlapping pairs = %d\n",numPairs); + b3AlignedObjectArray<b3Int4> hostOoverlappingPairs; + b3OpenCLArray<b3Int4> tmpGpuPairs(m_context,m_queue); + tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs ); + + tmpGpuPairs.copyToHost(hostOoverlappingPairs); + m_overlappingPairs.copyFromHost(hostOoverlappingPairs); + //printf("hello %d\n", m_overlappingPairs.size()); + free(buf); + fclose(f); + + } else { + printf("error: cannot find file %s\n",fileName); + } + + clFinish(m_queue); + + +#endif + + + m_overlappingPairs.resize(numPairs); + + }//B3_PROFILE("GPU_RADIX SORT"); + //init3dSap(); +} + +void b3GpuSapBroadphase::writeAabbsToGpu() +{ + m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU); + m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU); + + m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this + + + +} + +void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask) +{ + int index = userPtr; + b3SapAabb aabb; + for (int i=0;i<4;i++) + { + aabb.m_min[i] = aabbMin[i]; + aabb.m_max[i] = aabbMax[i]; + } + aabb.m_minIndices[3] = index; + aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); + m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size()); + + m_allAabbsCPU.push_back(aabb); +} + +void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask) +{ + int index = userPtr; + b3SapAabb aabb; + for (int i=0;i<4;i++) + { + aabb.m_min[i] = aabbMin[i]; + aabb.m_max[i] = aabbMax[i]; + } + aabb.m_minIndices[3] = index; + aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size(); + m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size()); + + + m_allAabbsCPU.push_back(aabb); +} + +cl_mem b3GpuSapBroadphase::getAabbBufferWS() +{ + return m_allAabbsGPU.getBufferCL(); +} + +int b3GpuSapBroadphase::getNumOverlap() +{ + return m_overlappingPairs.size(); +} +cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer() +{ + return m_overlappingPairs.getBufferCL(); +} + +b3OpenCLArray<b3Int4>& b3GpuSapBroadphase::getOverlappingPairsGPU() +{ + return m_overlappingPairs; +} +b3OpenCLArray<int>& b3GpuSapBroadphase::getSmallAabbIndicesGPU() +{ + return m_smallAabbsMappingGPU; +} +b3OpenCLArray<int>& b3GpuSapBroadphase::getLargeAabbIndicesGPU() +{ + return m_largeAabbsMappingGPU; +} |