summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp918
1 files changed, 425 insertions, 493 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
index c45fbbdcaa..4126d03ed0 100644
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
+++ b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.cpp
@@ -6,7 +6,6 @@ bool searchIncremental3dSapOnGpu = true;
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3PrefixScanFloat4CL.h"
-
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "kernels/sapKernels.h"
@@ -56,110 +55,105 @@ bool searchIncremental3dSapOnGpu = true;
class b3PrefixScanFloat4CL* m_prefixScanFloat4;
*/
-b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType)
-:m_context(ctx),
-m_device(device),
-m_queue(q),
-
-m_objectMinMaxIndexGPUaxis0(ctx,q),
-m_objectMinMaxIndexGPUaxis1(ctx,q),
-m_objectMinMaxIndexGPUaxis2(ctx,q),
-m_objectMinMaxIndexGPUaxis0prev(ctx,q),
-m_objectMinMaxIndexGPUaxis1prev(ctx,q),
-m_objectMinMaxIndexGPUaxis2prev(ctx,q),
-m_sortedAxisGPU0(ctx,q),
-m_sortedAxisGPU1(ctx,q),
-m_sortedAxisGPU2(ctx,q),
-m_sortedAxisGPU0prev(ctx,q),
-m_sortedAxisGPU1prev(ctx,q),
-m_sortedAxisGPU2prev(ctx,q),
-m_addedHostPairsGPU(ctx,q),
-m_removedHostPairsGPU(ctx,q),
-m_addedCountGPU(ctx,q),
-m_removedCountGPU(ctx,q),
-m_currentBuffer(-1),
-m_pairCount(ctx,q),
-m_allAabbsGPU(ctx,q),
-m_sum(ctx,q),
-m_sum2(ctx,q),
-m_dst(ctx,q),
-m_smallAabbsMappingGPU(ctx,q),
-m_largeAabbsMappingGPU(ctx,q),
-m_overlappingPairs(ctx,q),
-m_gpuSmallSortData(ctx,q),
-m_gpuSmallSortedAabbs(ctx,q)
+b3GpuSapBroadphase::b3GpuSapBroadphase(cl_context ctx, cl_device_id device, cl_command_queue q, b3GpuSapKernelType kernelType)
+ : m_context(ctx),
+ m_device(device),
+ m_queue(q),
+
+ m_objectMinMaxIndexGPUaxis0(ctx, q),
+ m_objectMinMaxIndexGPUaxis1(ctx, q),
+ m_objectMinMaxIndexGPUaxis2(ctx, q),
+ m_objectMinMaxIndexGPUaxis0prev(ctx, q),
+ m_objectMinMaxIndexGPUaxis1prev(ctx, q),
+ m_objectMinMaxIndexGPUaxis2prev(ctx, q),
+ m_sortedAxisGPU0(ctx, q),
+ m_sortedAxisGPU1(ctx, q),
+ m_sortedAxisGPU2(ctx, q),
+ m_sortedAxisGPU0prev(ctx, q),
+ m_sortedAxisGPU1prev(ctx, q),
+ m_sortedAxisGPU2prev(ctx, q),
+ m_addedHostPairsGPU(ctx, q),
+ m_removedHostPairsGPU(ctx, q),
+ m_addedCountGPU(ctx, q),
+ m_removedCountGPU(ctx, q),
+ m_currentBuffer(-1),
+ m_pairCount(ctx, q),
+ m_allAabbsGPU(ctx, q),
+ m_sum(ctx, q),
+ m_sum2(ctx, q),
+ m_dst(ctx, q),
+ m_smallAabbsMappingGPU(ctx, q),
+ m_largeAabbsMappingGPU(ctx, q),
+ m_overlappingPairs(ctx, q),
+ m_gpuSmallSortData(ctx, q),
+ m_gpuSmallSortedAabbs(ctx, q)
{
const char* sapSrc = sapCL;
-
-
- cl_int errNum=0;
+
+ cl_int errNum = 0;
b3Assert(m_context);
b3Assert(m_device);
- cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context,m_device,sapSrc,&errNum,"",B3_BROADPHASE_SAP_PATH);
- b3Assert(errNum==CL_SUCCESS);
-
+ cl_program sapProg = b3OpenCLUtils::compileCLProgramFromString(m_context, m_device, sapSrc, &errNum, "", B3_BROADPHASE_SAP_PATH);
+ b3Assert(errNum == CL_SUCCESS);
- b3Assert(errNum==CL_SUCCESS);
+ b3Assert(errNum == CL_SUCCESS);
#ifndef __APPLE__
- m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context,m_device,m_queue);
+ m_prefixScanFloat4 = new b3PrefixScanFloat4CL(m_context, m_device, m_queue);
#else
m_prefixScanFloat4 = 0;
#endif
m_sapKernel = 0;
-
+
switch (kernelType)
{
case B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU:
{
- m_sapKernel=0;
+ m_sapKernel = 0;
break;
}
- case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU:
+ case B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU:
{
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBruteForce",&errNum,sapProg );
+ m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBruteForce", &errNum, sapProg);
break;
}
case B3_GPU_SAP_KERNEL_ORIGINAL:
{
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelOriginal",&errNum,sapProg );
+ m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelOriginal", &errNum, sapProg);
break;
}
case B3_GPU_SAP_KERNEL_BARRIER:
{
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelBarrier",&errNum,sapProg );
+ m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelBarrier", &errNum, sapProg);
break;
}
case B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY:
{
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
+ m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg);
break;
}
default:
{
- m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelLocalSharedMemory",&errNum,sapProg );
+ m_sapKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelLocalSharedMemory", &errNum, sapProg);
b3Error("Unknown 3D GPU SAP provided, fallback to computePairsKernelLocalSharedMemory");
}
};
-
-
-
- m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "computePairsKernelTwoArrays",&errNum,sapProg );
- b3Assert(errNum==CL_SUCCESS);
- m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "prepareSumVarianceKernel",&errNum,sapProg );
- b3Assert(errNum==CL_SUCCESS);
+ m_sap2Kernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "computePairsKernelTwoArrays", &errNum, sapProg);
+ b3Assert(errNum == CL_SUCCESS);
-
- m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "flipFloatKernel",&errNum,sapProg );
+ m_prepareSumVarianceKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "prepareSumVarianceKernel", &errNum, sapProg);
+ b3Assert(errNum == CL_SUCCESS);
- m_copyAabbsKernel= b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "copyAabbsKernel",&errNum,sapProg );
+ m_flipFloatKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "flipFloatKernel", &errNum, sapProg);
- m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device,sapSrc, "scatterKernel",&errNum,sapProg );
+ m_copyAabbsKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "copyAabbsKernel", &errNum, sapProg);
- m_sorter = new b3RadixSort32CL(m_context,m_device,m_queue);
+ m_scatterKernel = b3OpenCLUtils::compileCLKernelFromString(m_context, m_device, sapSrc, "scatterKernel", &errNum, sapProg);
+
+ m_sorter = new b3RadixSort32CL(m_context, m_device, m_queue);
}
b3GpuSapBroadphase::~b3GpuSapBroadphase()
@@ -173,13 +167,11 @@ b3GpuSapBroadphase::~b3GpuSapBroadphase()
clReleaseKernel(m_sapKernel);
clReleaseKernel(m_sap2Kernel);
clReleaseKernel(m_prepareSumVarianceKernel);
-
-
}
/// conservative test for overlap between two aabbs
-static bool TestAabbAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aabbMax1,
- const b3Vector3 &aabbMin2, const b3Vector3 &aabbMax2)
+static bool TestAabbAgainstAabb2(const b3Vector3& aabbMin1, const b3Vector3& aabbMax1,
+ const b3Vector3& aabbMin2, const b3Vector3& aabbMax2)
{
bool overlap = true;
overlap = (aabbMin1.getX() > aabbMax2.getX() || aabbMax1.getX() < aabbMin2.getX()) ? false : overlap;
@@ -188,8 +180,6 @@ static bool TestAabbAgainstAabb2(const b3Vector3 &aabbMin1, const b3Vector3 &aab
return overlap;
}
-
-
//http://stereopsis.com/radix.html
static unsigned int FloatFlip(float fl)
{
@@ -198,79 +188,77 @@ static unsigned int FloatFlip(float fl)
return f ^ mask;
};
-void b3GpuSapBroadphase::init3dSap()
+void b3GpuSapBroadphase::init3dSap()
{
- if (m_currentBuffer<0)
+ if (m_currentBuffer < 0)
{
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
m_currentBuffer = 0;
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
{
- for (int buf=0;buf<2;buf++)
+ for (int buf = 0; buf < 2; buf++)
{
int totalNumAabbs = m_allAabbsCPU.size();
- int numEndPoints = 2*totalNumAabbs;
+ int numEndPoints = 2 * totalNumAabbs;
m_sortedAxisCPU[axis][buf].resize(numEndPoints);
- if (buf==m_currentBuffer)
+ if (buf == m_currentBuffer)
{
- for (int i=0;i<totalNumAabbs;i++)
+ for (int i = 0; i < totalNumAabbs; i++)
{
- m_sortedAxisCPU[axis][buf][i*2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis])-1;
- m_sortedAxisCPU[axis][buf][i*2].m_value = i*2;
- m_sortedAxisCPU[axis][buf][i*2+1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis])+1;
- m_sortedAxisCPU[axis][buf][i*2+1].m_value = i*2+1;
+ m_sortedAxisCPU[axis][buf][i * 2].m_key = FloatFlip(m_allAabbsCPU[i].m_min[axis]) - 1;
+ m_sortedAxisCPU[axis][buf][i * 2].m_value = i * 2;
+ m_sortedAxisCPU[axis][buf][i * 2 + 1].m_key = FloatFlip(m_allAabbsCPU[i].m_max[axis]) + 1;
+ m_sortedAxisCPU[axis][buf][i * 2 + 1].m_value = i * 2 + 1;
}
}
}
}
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
{
m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]);
}
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
{
//int totalNumAabbs = m_allAabbsCPU.size();
int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size();
m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(numEndPoints);
- for (int i=0;i<numEndPoints;i++)
+ for (int i = 0; i < numEndPoints; i++)
{
int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
- int newDest = destIndex/2;
- if (destIndex&1)
+ int newDest = destIndex / 2;
+ if (destIndex & 1)
{
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i;
- } else
+ m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i;
+ }
+ else
{
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i;
+ m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i;
}
}
}
-
}
}
-
static bool b3PairCmp(const b3Int4& p, const b3Int4& q)
{
- return ((p.x<q.x) || ((p.x==q.x) && (p.y<q.y)));
+ return ((p.x < q.x) || ((p.x == q.x) && (p.y < q.y)));
}
-
-static bool operator==(const b3Int4& a,const b3Int4& b)
+static bool operator==(const b3Int4& a, const b3Int4& b)
{
return a.x == b.x && a.y == b.y;
};
-static bool operator<(const b3Int4& a,const b3Int4& b)
+static bool operator<(const b3Int4& a, const b3Int4& b)
{
return a.x < b.x || (a.x == b.x && a.y < b.y);
};
-static bool operator>(const b3Int4& a,const b3Int4& b)
+static bool operator>(const b3Int4& a, const b3Int4& b)
{
return a.x > b.x || (a.x == b.x && a.y > b.y);
};
@@ -278,31 +266,29 @@ static bool operator>(const b3Int4& a,const b3Int4& b)
b3AlignedObjectArray<b3Int4> addedHostPairs;
b3AlignedObjectArray<b3Int4> removedHostPairs;
-b3AlignedObjectArray<b3SapAabb> preAabbs;
+b3AlignedObjectArray<b3SapAabb> preAabbs;
-void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
+void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
{
//static int framepje = 0;
//printf("framepje=%d\n",framepje++);
-
B3_PROFILE("calculateOverlappingPairsHostIncremental3Sap");
addedHostPairs.resize(0);
removedHostPairs.resize(0);
- b3Assert(m_currentBuffer>=0);
-
+ b3Assert(m_currentBuffer >= 0);
+
{
preAabbs.resize(m_allAabbsCPU.size());
- for (int i=0;i<preAabbs.size();i++)
+ for (int i = 0; i < preAabbs.size(); i++)
{
- preAabbs[i]=m_allAabbsCPU[i];
+ preAabbs[i] = m_allAabbsCPU[i];
}
}
-
- if (m_currentBuffer<0)
+ if (m_currentBuffer < 0)
return;
{
B3_PROFILE("m_allAabbsGPU.copyToHost");
@@ -316,100 +302,87 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
}
if (0)
{
- {
- printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n",
- m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1],m_allAabbsCPU[40].m_min[2],
- m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1],m_allAabbsCPU[40].m_max[2]);
- }
-
- {
- printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n",
- m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1],m_allAabbsCPU[53].m_min[2],
- m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1],m_allAabbsCPU[53].m_max[2]);
- }
-
-
- {
- b3Int4 newPair;
- newPair.x = 40;
- newPair.y = 53;
- int index = allPairs.findBinarySearch(newPair);
- printf("hasPair(40,53)=%d out of %d\n",index, allPairs.size());
-
{
- int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max,(const b3Vector3&)m_allAabbsCPU[53].m_min,(const b3Vector3&)m_allAabbsCPU[53].m_max);
- printf("overlap=%d\n",overlap);
+ printf("ab[40].min=%f,%f,%f,ab[40].max=%f,%f,%f\n",
+ m_allAabbsCPU[40].m_min[0], m_allAabbsCPU[40].m_min[1], m_allAabbsCPU[40].m_min[2],
+ m_allAabbsCPU[40].m_max[0], m_allAabbsCPU[40].m_max[1], m_allAabbsCPU[40].m_max[2]);
}
- if (preAabbs.size())
- {
- int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max,(const b3Vector3&)preAabbs[53].m_min,(const b3Vector3&)preAabbs[53].m_max);
- printf("prevoverlap=%d\n",prevOverlap);
- } else
{
- printf("unknown prevoverlap\n");
+ printf("ab[53].min=%f,%f,%f,ab[53].max=%f,%f,%f\n",
+ m_allAabbsCPU[53].m_min[0], m_allAabbsCPU[53].m_min[1], m_allAabbsCPU[53].m_min[2],
+ m_allAabbsCPU[53].m_max[0], m_allAabbsCPU[53].m_max[1], m_allAabbsCPU[53].m_max[2]);
}
- }
- }
+ {
+ b3Int4 newPair;
+ newPair.x = 40;
+ newPair.y = 53;
+ int index = allPairs.findBinarySearch(newPair);
+ printf("hasPair(40,53)=%d out of %d\n", index, allPairs.size());
+ {
+ int overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[40].m_min, (const b3Vector3&)m_allAabbsCPU[40].m_max, (const b3Vector3&)m_allAabbsCPU[53].m_min, (const b3Vector3&)m_allAabbsCPU[53].m_max);
+ printf("overlap=%d\n", overlap);
+ }
+
+ if (preAabbs.size())
+ {
+ int prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[40].m_min, (const b3Vector3&)preAabbs[40].m_max, (const b3Vector3&)preAabbs[53].m_min, (const b3Vector3&)preAabbs[53].m_max);
+ printf("prevoverlap=%d\n", prevOverlap);
+ }
+ else
+ {
+ printf("unknown prevoverlap\n");
+ }
+ }
+ }
if (0)
{
- for (int i=0;i<m_allAabbsCPU.size();i++)
+ for (int i = 0; i < m_allAabbsCPU.size(); i++)
{
//printf("aabb[%d] min=%f,%f,%f max=%f,%f,%f\n",i,m_allAabbsCPU[i].m_min[0],m_allAabbsCPU[i].m_min[1],m_allAabbsCPU[i].m_min[2], m_allAabbsCPU[i].m_max[0],m_allAabbsCPU[i].m_max[1],m_allAabbsCPU[i].m_max[2]);
-
-
}
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
{
- for (int buf=0;buf<2;buf++)
+ for (int buf = 0; buf < 2; buf++)
{
- b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size()*2);
+ b3Assert(m_sortedAxisCPU[axis][buf].size() == m_allAabbsCPU.size() * 2);
}
}
}
-
-
- m_currentBuffer = 1-m_currentBuffer;
-
-
+ m_currentBuffer = 1 - m_currentBuffer;
int totalNumAabbs = m_allAabbsCPU.size();
{
B3_PROFILE("assign m_sortedAxisCPU(FloatFlip)");
- for (int i=0;i<totalNumAabbs;i++)
+ for (int i = 0; i < totalNumAabbs; i++)
{
-
-
unsigned int keyMin[3];
unsigned int keyMax[3];
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
{
- float vmin=m_allAabbsCPU[i].m_min[axis];
+ float vmin = m_allAabbsCPU[i].m_min[axis];
float vmax = m_allAabbsCPU[i].m_max[axis];
keyMin[axis] = FloatFlip(vmin);
keyMax[axis] = FloatFlip(vmax);
-
- m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_key = keyMin[axis]-1;
- m_sortedAxisCPU[axis][m_currentBuffer][i*2].m_value = i*2;
- m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_key = keyMax[axis]+1;
- m_sortedAxisCPU[axis][m_currentBuffer][i*2+1].m_value = i*2+1;
+
+ m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_key = keyMin[axis] - 1;
+ m_sortedAxisCPU[axis][m_currentBuffer][i * 2].m_value = i * 2;
+ m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_key = keyMax[axis] + 1;
+ m_sortedAxisCPU[axis][m_currentBuffer][i * 2 + 1].m_value = i * 2 + 1;
}
//printf("aabb[%d] min=%u,%u,%u max %u,%u,%u\n", i,keyMin[0],keyMin[1],keyMin[2],keyMax[0],keyMax[1],keyMax[2]);
-
}
}
-
-
{
B3_PROFILE("sort m_sortedAxisCPU");
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
m_sorter->executeHost(m_sortedAxisCPU[axis][m_currentBuffer]);
}
@@ -432,21 +405,22 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
{
B3_PROFILE("assign m_objectMinMaxIndexCPU");
- for (int axis=0;axis<3;axis++)
+ for (int axis = 0; axis < 3; axis++)
{
int totalNumAabbs = m_allAabbsCPU.size();
int numEndPoints = m_sortedAxisCPU[axis][m_currentBuffer].size();
m_objectMinMaxIndexCPU[axis][m_currentBuffer].resize(totalNumAabbs);
- for (int i=0;i<numEndPoints;i++)
+ for (int i = 0; i < numEndPoints; i++)
{
int destIndex = m_sortedAxisCPU[axis][m_currentBuffer][i].m_value;
- int newDest = destIndex/2;
- if (destIndex&1)
+ int newDest = destIndex / 2;
+ if (destIndex & 1)
{
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y=i;
- } else
+ m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].y = i;
+ }
+ else
{
- m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x=i;
+ m_objectMinMaxIndexCPU[axis][m_currentBuffer][newDest].x = i;
}
}
}
@@ -485,12 +459,11 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
}
#endif
-
int a = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
int b = m_objectMinMaxIndexCPU[1][m_currentBuffer].size();
int c = m_objectMinMaxIndexCPU[2][m_currentBuffer].size();
- b3Assert(a==b);
- b3Assert(b==c);
+ b3Assert(a == b);
+ b3Assert(b == c);
/*
if (searchIncremental3dSapOnGpu)
{
@@ -574,175 +547,170 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
int numObjects = m_objectMinMaxIndexCPU[0][m_currentBuffer].size();
B3_PROFILE("actual search");
- for (int i=0;i<numObjects;i++)
+ for (int i = 0; i < numObjects; i++)
{
//int numObjects = m_objectMinMaxIndexCPU[axis][m_currentBuffer].size();
//int checkObjects[]={40,53};
//int numCheckObjects = sizeof(checkObjects)/sizeof(int);
-
+
//for (int a=0;a<numCheckObjects ;a++)
-
- for (int axis=0;axis<3;axis++)
+
+ for (int axis = 0; axis < 3; axis++)
{
//int i = checkObjects[a];
unsigned int curMinIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].x;
unsigned int curMaxIndex = m_objectMinMaxIndexCPU[axis][m_currentBuffer][i].y;
- unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].x;
+ unsigned int prevMinIndex = m_objectMinMaxIndexCPU[axis][1 - m_currentBuffer][i].x;
int dmin = curMinIndex - prevMinIndex;
-
- unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1-m_currentBuffer][i].y;
-
+ unsigned int prevMaxIndex = m_objectMinMaxIndexCPU[axis][1 - m_currentBuffer][i].y;
int dmax = curMaxIndex - prevMaxIndex;
- if (dmin!=0)
+ if (dmin != 0)
{
//printf("for object %d, dmin=%d\n",i,dmin);
}
- if (dmax!=0)
+ if (dmax != 0)
{
//printf("for object %d, dmax=%d\n",i,dmax);
}
- for (int otherbuffer = 0;otherbuffer<2;otherbuffer++)
+ for (int otherbuffer = 0; otherbuffer < 2; otherbuffer++)
{
- if (dmin!=0)
+ if (dmin != 0)
{
- int stepMin = dmin<0 ? -1 : 1;
- for (int j=prevMinIndex;j!=curMinIndex;j+=stepMin)
+ int stepMin = dmin < 0 ? -1 : 1;
+ for (int j = prevMinIndex; j != curMinIndex; j += stepMin)
{
int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y;
- int otherIndex = otherIndex2/2;
- if (otherIndex!=i)
+ int otherIndex = otherIndex2 / 2;
+ if (otherIndex != i)
{
- bool otherIsMax = ((otherIndex2&1)!=0);
+ bool otherIsMax = ((otherIndex2 & 1) != 0);
if (otherIsMax)
{
//bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max);
//bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max);
-
+
bool overlap = true;
- for (int ax=0;ax<3;ax++)
+ for (int ax = 0; ax < 3; ax++)
{
if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) ||
(m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x))
- overlap=false;
+ overlap = false;
}
- // b3Assert(overlap2==overlap);
+ // b3Assert(overlap2==overlap);
bool prevOverlap = true;
- for (int ax=0;ax<3;ax++)
+ for (int ax = 0; ax < 3; ax++)
{
- if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) ||
- (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x))
- prevOverlap=false;
+ if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) ||
+ (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x))
+ prevOverlap = false;
}
-
//b3Assert(overlap==overlap2);
-
-
- if (dmin<0)
+ if (dmin < 0)
{
if (overlap && !prevOverlap)
{
//add a pair
b3Int4 newPair;
- if (i<=otherIndex)
+ if (i <= otherIndex)
{
newPair.x = i;
newPair.y = otherIndex;
- } else
+ }
+ else
{
newPair.x = otherIndex;
newPair.y = i;
}
addedHostPairs.push_back(newPair);
}
- }
+ }
else
{
if (!overlap && prevOverlap)
{
-
//remove a pair
b3Int4 removedPair;
- if (i<=otherIndex)
+ if (i <= otherIndex)
{
removedPair.x = i;
removedPair.y = otherIndex;
- } else
+ }
+ else
{
removedPair.x = otherIndex;
removedPair.y = i;
}
removedHostPairs.push_back(removedPair);
}
- }//otherisMax
- }//if (dmin<0)
- }//if (otherIndex!=i)
- }//for (int j=
+ } //otherisMax
+ } //if (dmin<0)
+ } //if (otherIndex!=i)
+ } //for (int j=
}
-
- if (dmax!=0)
+
+ if (dmax != 0)
{
- int stepMax = dmax<0 ? -1 : 1;
- for (int j=prevMaxIndex;j!=curMaxIndex;j+=stepMax)
+ int stepMax = dmax < 0 ? -1 : 1;
+ for (int j = prevMaxIndex; j != curMaxIndex; j += stepMax)
{
int otherIndex2 = m_sortedAxisCPU[axis][otherbuffer][j].y;
- int otherIndex = otherIndex2/2;
- if (otherIndex!=i)
+ int otherIndex = otherIndex2 / 2;
+ if (otherIndex != i)
{
//bool otherIsMin = ((otherIndex2&1)==0);
//if (otherIsMin)
{
//bool overlap = TestAabbAgainstAabb2((const b3Vector3&)m_allAabbsCPU[i].m_min, (const b3Vector3&)m_allAabbsCPU[i].m_max,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_min,(const b3Vector3&)m_allAabbsCPU[otherIndex].m_max);
//bool prevOverlap = TestAabbAgainstAabb2((const b3Vector3&)preAabbs[i].m_min, (const b3Vector3&)preAabbs[i].m_max,(const b3Vector3&)preAabbs[otherIndex].m_min,(const b3Vector3&)preAabbs[otherIndex].m_max);
-
+
bool overlap = true;
- for (int ax=0;ax<3;ax++)
+ for (int ax = 0; ax < 3; ax++)
{
if ((m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].y) ||
(m_objectMinMaxIndexCPU[ax][m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][m_currentBuffer][otherIndex].x))
- overlap=false;
+ overlap = false;
}
//b3Assert(overlap2==overlap);
bool prevOverlap = true;
- for (int ax=0;ax<3;ax++)
+ for (int ax = 0; ax < 3; ax++)
{
- if ((m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].y) ||
- (m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1-m_currentBuffer][otherIndex].x))
- prevOverlap=false;
+ if ((m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].x > m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].y) ||
+ (m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][i].y < m_objectMinMaxIndexCPU[ax][1 - m_currentBuffer][otherIndex].x))
+ prevOverlap = false;
}
-
- if (dmax>0)
+ if (dmax > 0)
{
if (overlap && !prevOverlap)
{
//add a pair
b3Int4 newPair;
- if (i<=otherIndex)
+ if (i <= otherIndex)
{
newPair.x = i;
newPair.y = otherIndex;
- } else
+ }
+ else
{
newPair.x = otherIndex;
newPair.y = i;
}
addedHostPairs.push_back(newPair);
-
}
- }
+ }
else
{
if (!overlap && prevOverlap)
@@ -750,33 +718,31 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
//if (otherIndex2&1==0) -> min?
//remove a pair
b3Int4 removedPair;
- if (i<=otherIndex)
+ if (i <= otherIndex)
{
removedPair.x = i;
removedPair.y = otherIndex;
- } else
+ }
+ else
{
removedPair.x = otherIndex;
removedPair.y = i;
}
removedHostPairs.push_back(removedPair);
-
}
}
-
- }//if (dmin<0)
- }//if (otherIndex!=i)
- }//for (int j=
+
+ } //if (dmin<0)
+ } //if (otherIndex!=i)
+ } //for (int j=
}
- }//for (int otherbuffer
- }//for (int axis=0;
- }//for (int i=0;i<numObjects
+ } //for (int otherbuffer
+ } //for (int axis=0;
+ } //for (int i=0;i<numObjects
}
//remove duplicates and add/remove then to existing m_overlappingPairs
-
-
-
+
{
{
B3_PROFILE("sort allPairs");
@@ -795,31 +761,28 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
b3Int4 prevPair;
prevPair.x = -1;
prevPair.y = -1;
-
+
int uniqueRemovedPairs = 0;
b3AlignedObjectArray<int> removedPositions;
{
B3_PROFILE("actual removing");
- for (int i=0;i<removedHostPairs.size();i++)
+ for (int i = 0; i < removedHostPairs.size(); i++)
{
b3Int4 removedPair = removedHostPairs[i];
if ((removedPair.x != prevPair.x) || (removedPair.y != prevPair.y))
{
+ int index1 = allPairs.findBinarySearch(removedPair);
- int index1 = allPairs.findBinarySearch(removedPair);
+ //#ifdef _DEBUG
- //#ifdef _DEBUG
-
-
-
int index2 = allPairs.findLinearSearch(removedPair);
- b3Assert(index1==index2);
-
+ b3Assert(index1 == index2);
+
//b3Assert(index1!=allPairs.size());
- if (index1<allPairs.size())
- //#endif//_DEBUG
+ if (index1 < allPairs.size())
+ //#endif//_DEBUG
{
uniqueRemovedPairs++;
removedPositions.push_back(index1);
@@ -833,13 +796,13 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
if (uniqueRemovedPairs)
{
- for (int i=0;i<removedPositions.size();i++)
+ for (int i = 0; i < removedPositions.size(); i++)
{
- allPairs[removedPositions[i]].x = INT_MAX ;
- allPairs[removedPositions[i]].y = INT_MAX ;
+ allPairs[removedPositions[i]].x = INT_MAX;
+ allPairs[removedPositions[i]].y = INT_MAX;
}
allPairs.quickSort(b3PairCmp);
- allPairs.resize(allPairs.size()-uniqueRemovedPairs);
+ allPairs.resize(allPairs.size() - uniqueRemovedPairs);
}
}
//if (uniqueRemovedPairs)
@@ -848,33 +811,31 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
prevPair.x = -1;
prevPair.y = -1;
-
- int uniqueAddedPairs=0;
+
+ int uniqueAddedPairs = 0;
b3AlignedObjectArray<b3Int4> actualAddedPairs;
{
B3_PROFILE("actual adding");
- for (int i=0;i<addedHostPairs.size();i++)
+ for (int i = 0; i < addedHostPairs.size(); i++)
{
b3Int4 newPair = addedHostPairs[i];
if ((newPair.x != prevPair.x) || (newPair.y != prevPair.y))
{
-//#ifdef _DEBUG
+ //#ifdef _DEBUG
int index1 = allPairs.findBinarySearch(newPair);
-
-
+
int index2 = allPairs.findLinearSearch(newPair);
- b3Assert(index1==index2);
-
+ b3Assert(index1 == index2);
- b3Assert(index1==allPairs.size());
- if (index1!=allPairs.size())
+ b3Assert(index1 == allPairs.size());
+ if (index1 != allPairs.size())
{
printf("??\n");
}
- if (index1==allPairs.size())
-//#endif //_DEBUG
+ if (index1 == allPairs.size())
+ //#endif //_DEBUG
{
uniqueAddedPairs++;
actualAddedPairs.push_back(newPair);
@@ -882,94 +843,83 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHostIncremental3Sap()
}
prevPair = newPair;
}
- for (int i=0;i<actualAddedPairs.size();i++)
+ for (int i = 0; i < actualAddedPairs.size(); i++)
{
//printf("framepje (%d), new pair(%d):%d,%d\n",framepje,i,actualAddedPairs[i].x,actualAddedPairs[i].y);
allPairs.push_back(actualAddedPairs[i]);
}
}
-
+
//if (uniqueAddedPairs)
// printf("uniqueAddedPairs=%d\n", uniqueAddedPairs);
-
{
B3_PROFILE("m_overlappingPairs.copyFromHost");
m_overlappingPairs.copyFromHost(allPairs);
}
-
-
}
-
-
-
-void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
+void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
{
//test
-// if (m_currentBuffer>=0)
+ // if (m_currentBuffer>=0)
// return calculateOverlappingPairsHostIncremental3Sap();
b3Assert(m_allAabbsCPU.size() == m_allAabbsGPU.size());
m_allAabbsGPU.copyToHost(m_allAabbsCPU);
-
-
- int axis=0;
+ int axis = 0;
{
B3_PROFILE("CPU compute best variance axis");
- b3Vector3 s=b3MakeVector3(0,0,0),s2=b3MakeVector3(0,0,0);
+ b3Vector3 s = b3MakeVector3(0, 0, 0), s2 = b3MakeVector3(0, 0, 0);
int numRigidBodies = m_smallAabbsMappingCPU.size();
- for(int i=0;i<numRigidBodies;i++)
+ for (int i = 0; i < numRigidBodies; i++)
{
b3SapAabb aabb = this->m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
- b3Vector3 maxAabb=b3MakeVector3(aabb.m_max[0],aabb.m_max[1],aabb.m_max[2]);
- b3Vector3 minAabb=b3MakeVector3(aabb.m_min[0],aabb.m_min[1],aabb.m_min[2]);
- b3Vector3 centerAabb=(maxAabb+minAabb)*0.5f;
-
+ b3Vector3 maxAabb = b3MakeVector3(aabb.m_max[0], aabb.m_max[1], aabb.m_max[2]);
+ b3Vector3 minAabb = b3MakeVector3(aabb.m_min[0], aabb.m_min[1], aabb.m_min[2]);
+ b3Vector3 centerAabb = (maxAabb + minAabb) * 0.5f;
+
s += centerAabb;
- s2 += centerAabb*centerAabb;
+ s2 += centerAabb * centerAabb;
}
- b3Vector3 v = s2 - (s*s) / (float)numRigidBodies;
-
- if(v[1] > v[0])
+ b3Vector3 v = s2 - (s * s) / (float)numRigidBodies;
+
+ if (v[1] > v[0])
axis = 1;
- if(v[2] > v[axis])
+ if (v[2] > v[axis])
axis = 2;
}
-
-
-
b3AlignedObjectArray<b3Int4> hostPairs;
{
int numSmallAabbs = m_smallAabbsMappingCPU.size();
- for (int i=0;i<numSmallAabbs;i++)
+ for (int i = 0; i < numSmallAabbs; i++)
{
b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
//float reference = smallAabbi.m_max[axis];
- for (int j=i+1;j<numSmallAabbs;j++)
+ for (int j = i + 1; j < numSmallAabbs; j++)
{
-
b3SapAabb smallAabbj = m_allAabbsCPU[m_smallAabbsMappingCPU[j]];
if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max,
- (b3Vector3&)smallAabbj.m_min,(b3Vector3&)smallAabbj.m_max))
+ (b3Vector3&)smallAabbj.m_min, (b3Vector3&)smallAabbj.m_max))
{
b3Int4 pair;
int a = smallAabbi.m_minIndices[3];
int b = smallAabbj.m_minIndices[3];
- if (a<=b)
+ if (a <= b)
{
- pair.x = a;//store the original index in the unsorted aabb array
+ pair.x = a; //store the original index in the unsorted aabb array
pair.y = b;
- } else
+ }
+ else
{
- pair.x = b;//store the original index in the unsorted aabb array
+ pair.x = b; //store the original index in the unsorted aabb array
pair.y = a;
}
hostPairs.push_back(pair);
@@ -978,35 +928,35 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
}
}
-
{
int numSmallAabbs = m_smallAabbsMappingCPU.size();
- for (int i=0;i<numSmallAabbs;i++)
+ for (int i = 0; i < numSmallAabbs; i++)
{
b3SapAabb smallAabbi = m_allAabbsCPU[m_smallAabbsMappingCPU[i]];
//float reference = smallAabbi.m_max[axis];
int numLargeAabbs = m_largeAabbsMappingCPU.size();
- for (int j=0;j<numLargeAabbs;j++)
+ for (int j = 0; j < numLargeAabbs; j++)
{
b3SapAabb largeAabbj = m_allAabbsCPU[m_largeAabbsMappingCPU[j]];
if (TestAabbAgainstAabb2((b3Vector3&)smallAabbi.m_min, (b3Vector3&)smallAabbi.m_max,
- (b3Vector3&)largeAabbj.m_min,(b3Vector3&)largeAabbj.m_max))
+ (b3Vector3&)largeAabbj.m_min, (b3Vector3&)largeAabbj.m_max))
{
b3Int4 pair;
int a = largeAabbj.m_minIndices[3];
int b = smallAabbi.m_minIndices[3];
- if (a<=b)
+ if (a <= b)
{
- pair.x = a;
- pair.y = b;//store the original index in the unsorted aabb array
- } else
+ pair.x = a;
+ pair.y = b; //store the original index in the unsorted aabb array
+ }
+ else
{
pair.x = b;
- pair.y = a;//store the original index in the unsorted aabb array
+ pair.y = a; //store the original index in the unsorted aabb array
}
-
+
hostPairs.push_back(pair);
}
}
@@ -1021,21 +971,20 @@ void b3GpuSapBroadphase::calculateOverlappingPairsHost(int maxPairs)
if (hostPairs.size())
{
m_overlappingPairs.copyFromHost(hostPairs);
- } else
+ }
+ else
{
m_overlappingPairs.resize(0);
}
//init3dSap();
-
}
-void b3GpuSapBroadphase::reset()
+void b3GpuSapBroadphase::reset()
{
m_allAabbsGPU.resize(0);
m_allAabbsCPU.resize(0);
-
m_smallAabbsMappingGPU.resize(0);
m_smallAabbsMappingCPU.resize(0);
@@ -1043,13 +992,11 @@ void b3GpuSapBroadphase::reset()
m_largeAabbsMappingGPU.resize(0);
m_largeAabbsMappingCPU.resize(0);
-
}
-
-void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
+void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
{
- if (m_sapKernel==0)
+ if (m_sapKernel == 0)
{
calculateOverlappingPairsHost(maxPairs);
return;
@@ -1065,68 +1012,62 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
int axis = 0;
{
+ //bool syncOnHost = false;
- //bool syncOnHost = false;
-
- int numSmallAabbs = m_smallAabbsMappingCPU.size();
- if (m_prefixScanFloat4 && numSmallAabbs)
- {
- B3_PROFILE("GPU compute best variance axis");
-
- if (m_dst.size()!=(numSmallAabbs+1))
+ int numSmallAabbs = m_smallAabbsMappingCPU.size();
+ if (m_prefixScanFloat4 && numSmallAabbs)
{
- m_dst.resize(numSmallAabbs+128);
- m_sum.resize(numSmallAabbs+128);
- m_sum2.resize(numSmallAabbs+128);
- m_sum.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow?
- m_sum2.at(numSmallAabbs)=b3MakeVector3(0,0,0); //slow?
- }
+ B3_PROFILE("GPU compute best variance axis");
- b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel ,"m_prepareSumVarianceKernel");
- launcher.setBuffer(m_allAabbsGPU.getBufferCL());
-
- launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
- launcher.setBuffer(m_sum.getBufferCL());
- launcher.setBuffer(m_sum2.getBufferCL());
- launcher.setConst( numSmallAabbs );
- int num = numSmallAabbs;
- launcher.launch1D( num);
-
+ if (m_dst.size() != (numSmallAabbs + 1))
+ {
+ m_dst.resize(numSmallAabbs + 128);
+ m_sum.resize(numSmallAabbs + 128);
+ m_sum2.resize(numSmallAabbs + 128);
+ m_sum.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow?
+ m_sum2.at(numSmallAabbs) = b3MakeVector3(0, 0, 0); //slow?
+ }
- b3Vector3 s;
- b3Vector3 s2;
- m_prefixScanFloat4->execute(m_sum,m_dst,numSmallAabbs+1,&s);
- m_prefixScanFloat4->execute(m_sum2,m_dst,numSmallAabbs+1,&s2);
+ b3LauncherCL launcher(m_queue, m_prepareSumVarianceKernel, "m_prepareSumVarianceKernel");
+ launcher.setBuffer(m_allAabbsGPU.getBufferCL());
- b3Vector3 v = s2 - (s*s) / (float)numSmallAabbs;
-
- if(v[1] > v[0])
- axis = 1;
- if(v[2] > v[axis])
- axis = 2;
- }
+ launcher.setBuffer(m_smallAabbsMappingGPU.getBufferCL());
+ launcher.setBuffer(m_sum.getBufferCL());
+ launcher.setBuffer(m_sum2.getBufferCL());
+ launcher.setConst(numSmallAabbs);
+ int num = numSmallAabbs;
+ launcher.launch1D(num);
+ b3Vector3 s;
+ b3Vector3 s2;
+ m_prefixScanFloat4->execute(m_sum, m_dst, numSmallAabbs + 1, &s);
+ m_prefixScanFloat4->execute(m_sum2, m_dst, numSmallAabbs + 1, &s2);
+
+ b3Vector3 v = s2 - (s * s) / (float)numSmallAabbs;
+
+ if (v[1] > v[0])
+ axis = 1;
+ if (v[2] > v[axis])
+ axis = 2;
+ }
-
m_gpuSmallSortData.resize(numSmallAabbs);
-
#if 1
if (m_smallAabbsMappingGPU.size())
{
-
B3_PROFILE("flipFloatKernel");
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
- b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true),
- b3BufferInfoCL( m_gpuSmallSortData.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_flipFloatKernel ,"m_flipFloatKernel");
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
- launcher.setConst( numSmallAabbs );
- launcher.setConst( axis );
-
+ b3BufferInfoCL bInfo[] = {
+ b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true),
+ b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true),
+ b3BufferInfoCL(m_gpuSmallSortData.getBufferCL())};
+ b3LauncherCL launcher(m_queue, m_flipFloatKernel, "m_flipFloatKernel");
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
+ launcher.setConst(numSmallAabbs);
+ launcher.setConst(axis);
+
int num = numSmallAabbs;
- launcher.launch1D( num);
+ launcher.launch1D(num);
clFinish(m_queue);
}
@@ -1141,69 +1082,66 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
if (numSmallAabbs)
{
B3_PROFILE("scatterKernel");
-
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL( m_allAabbsGPU.getBufferCL(), true ),
- b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL(), true),
- b3BufferInfoCL( m_gpuSmallSortData.getBufferCL(),true),
+
+ b3BufferInfoCL bInfo[] = {
+ b3BufferInfoCL(m_allAabbsGPU.getBufferCL(), true),
+ b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL(), true),
+ b3BufferInfoCL(m_gpuSmallSortData.getBufferCL(), true),
b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_scatterKernel ,"m_scatterKernel ");
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
- launcher.setConst( numSmallAabbs);
+ b3LauncherCL launcher(m_queue, m_scatterKernel, "m_scatterKernel ");
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
+ launcher.setConst(numSmallAabbs);
int num = numSmallAabbs;
- launcher.launch1D( num);
+ launcher.launch1D(num);
clFinish(m_queue);
-
}
-
- m_overlappingPairs.resize(maxPairs);
+ m_overlappingPairs.resize(maxPairs);
- m_pairCount.resize(0);
- m_pairCount.push_back(0);
- int numPairs=0;
+ m_pairCount.resize(0);
+ m_pairCount.push_back(0);
+ int numPairs = 0;
+ {
+ int numLargeAabbs = m_largeAabbsMappingGPU.size();
+ if (numLargeAabbs && numSmallAabbs)
{
- int numLargeAabbs = m_largeAabbsMappingGPU.size();
- if (numLargeAabbs && numSmallAabbs)
+ //@todo
+ B3_PROFILE("sap2Kernel");
+ b3BufferInfoCL bInfo[] = {
+ b3BufferInfoCL(m_allAabbsGPU.getBufferCL()),
+ b3BufferInfoCL(m_largeAabbsMappingGPU.getBufferCL()),
+ b3BufferInfoCL(m_smallAabbsMappingGPU.getBufferCL()),
+ b3BufferInfoCL(m_overlappingPairs.getBufferCL()),
+ b3BufferInfoCL(m_pairCount.getBufferCL())};
+ b3LauncherCL launcher(m_queue, m_sap2Kernel, "m_sap2Kernel");
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
+ launcher.setConst(numLargeAabbs);
+ launcher.setConst(numSmallAabbs);
+ launcher.setConst(axis);
+ launcher.setConst(maxPairs);
+ //@todo: use actual maximum work item sizes of the device instead of hardcoded values
+ launcher.launch2D(numLargeAabbs, numSmallAabbs, 4, 64);
+
+ numPairs = m_pairCount.at(0);
+ if (numPairs > maxPairs)
{
- //@todo
- B3_PROFILE("sap2Kernel");
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL( m_allAabbsGPU.getBufferCL() ),
- b3BufferInfoCL( m_largeAabbsMappingGPU.getBufferCL() ),
- b3BufferInfoCL( m_smallAabbsMappingGPU.getBufferCL() ),
- b3BufferInfoCL( m_overlappingPairs.getBufferCL() ),
- b3BufferInfoCL(m_pairCount.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_sap2Kernel,"m_sap2Kernel");
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
- launcher.setConst( numLargeAabbs );
- launcher.setConst( numSmallAabbs);
- launcher.setConst( axis );
- launcher.setConst( maxPairs );
-//@todo: use actual maximum work item sizes of the device instead of hardcoded values
- launcher.launch2D( numLargeAabbs, numSmallAabbs,4,64);
-
- numPairs = m_pairCount.at(0);
- if (numPairs >maxPairs)
- {
- b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
- numPairs =maxPairs;
- }
+ b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
+ numPairs = maxPairs;
}
}
- if (m_gpuSmallSortedAabbs.size())
- {
- B3_PROFILE("sapKernel");
- b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_gpuSmallSortedAabbs.getBufferCL() ), b3BufferInfoCL( m_overlappingPairs.getBufferCL() ), b3BufferInfoCL(m_pairCount.getBufferCL())};
- b3LauncherCL launcher(m_queue, m_sapKernel,"m_sapKernel");
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
- launcher.setConst( numSmallAabbs );
- launcher.setConst( axis );
- launcher.setConst( maxPairs );
-
-
- int num = numSmallAabbs;
+ }
+ if (m_gpuSmallSortedAabbs.size())
+ {
+ B3_PROFILE("sapKernel");
+ b3BufferInfoCL bInfo[] = {b3BufferInfoCL(m_gpuSmallSortedAabbs.getBufferCL()), b3BufferInfoCL(m_overlappingPairs.getBufferCL()), b3BufferInfoCL(m_pairCount.getBufferCL())};
+ b3LauncherCL launcher(m_queue, m_sapKernel, "m_sapKernel");
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
+ launcher.setConst(numSmallAabbs);
+ launcher.setConst(axis);
+ launcher.setConst(maxPairs);
+
+ int num = numSmallAabbs;
#if 0
int buffSize = launcher.getSerializationBufferSize();
unsigned char* buf = new unsigned char[buffSize+sizeof(int)];
@@ -1225,73 +1163,71 @@ void b3GpuSapBroadphase::calculateOverlappingPairs(int maxPairs)
FILE* f = fopen("m_sapKernelArgs.bin","wb");
fwrite(buf,buffSize+sizeof(int),1,f);
fclose(f);
-#endif//
+#endif //
- launcher.launch1D( num);
- clFinish(m_queue);
-
- numPairs = m_pairCount.at(0);
- if (numPairs>maxPairs)
- {
- b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
- numPairs = maxPairs;
- m_pairCount.resize(0);
- m_pairCount.push_back(maxPairs);
- }
+ launcher.launch1D(num);
+ clFinish(m_queue);
+
+ numPairs = m_pairCount.at(0);
+ if (numPairs > maxPairs)
+ {
+ b3Error("Error running out of pairs: numPairs = %d, maxPairs = %d.\n", numPairs, maxPairs);
+ numPairs = maxPairs;
+ m_pairCount.resize(0);
+ m_pairCount.push_back(maxPairs);
}
-
+ }
+
#else
- int numPairs = 0;
-
-
- b3LauncherCL launcher(m_queue, m_sapKernel);
-
- const char* fileName = "m_sapKernelArgs.bin";
- FILE* f = fopen(fileName,"rb");
- if (f)
- {
- int sizeInBytes=0;
- if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET))
- {
- printf("error, cannot get file size\n");
- exit(0);
- }
-
- unsigned char* buf = (unsigned char*) malloc(sizeInBytes);
- fread(buf,sizeInBytes,1,f);
- int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context);
- int num = *(int*)&buf[serializedBytes];
- launcher.launch1D( num);
-
- b3OpenCLArray<int> pairCount(m_context, m_queue);
- int numElements = launcher.m_arrays[2]->size()/sizeof(int);
- pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(),numElements);
- numPairs = pairCount.at(0);
- //printf("overlapping pairs = %d\n",numPairs);
- b3AlignedObjectArray<b3Int4> hostOoverlappingPairs;
- b3OpenCLArray<b3Int4> tmpGpuPairs(m_context,m_queue);
- tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(),numPairs );
-
- tmpGpuPairs.copyToHost(hostOoverlappingPairs);
- m_overlappingPairs.copyFromHost(hostOoverlappingPairs);
- //printf("hello %d\n", m_overlappingPairs.size());
- free(buf);
- fclose(f);
-
- } else {
- printf("error: cannot find file %s\n",fileName);
- }
-
- clFinish(m_queue);
-
-
+ int numPairs = 0;
+
+ b3LauncherCL launcher(m_queue, m_sapKernel);
+
+ const char* fileName = "m_sapKernelArgs.bin";
+ FILE* f = fopen(fileName, "rb");
+ if (f)
+ {
+ int sizeInBytes = 0;
+ if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET))
+ {
+ printf("error, cannot get file size\n");
+ exit(0);
+ }
+
+ unsigned char* buf = (unsigned char*)malloc(sizeInBytes);
+ fread(buf, sizeInBytes, 1, f);
+ int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context);
+ int num = *(int*)&buf[serializedBytes];
+ launcher.launch1D(num);
+
+ b3OpenCLArray<int> pairCount(m_context, m_queue);
+ int numElements = launcher.m_arrays[2]->size() / sizeof(int);
+ pairCount.setFromOpenCLBuffer(launcher.m_arrays[2]->getBufferCL(), numElements);
+ numPairs = pairCount.at(0);
+ //printf("overlapping pairs = %d\n",numPairs);
+ b3AlignedObjectArray<b3Int4> hostOoverlappingPairs;
+ b3OpenCLArray<b3Int4> tmpGpuPairs(m_context, m_queue);
+ tmpGpuPairs.setFromOpenCLBuffer(launcher.m_arrays[1]->getBufferCL(), numPairs);
+
+ tmpGpuPairs.copyToHost(hostOoverlappingPairs);
+ m_overlappingPairs.copyFromHost(hostOoverlappingPairs);
+ //printf("hello %d\n", m_overlappingPairs.size());
+ free(buf);
+ fclose(f);
+ }
+ else
+ {
+ printf("error: cannot find file %s\n", fileName);
+ }
+
+ clFinish(m_queue);
+
#endif
-
- m_overlappingPairs.resize(numPairs);
-
- }//B3_PROFILE("GPU_RADIX SORT");
- //init3dSap();
+ m_overlappingPairs.resize(numPairs);
+
+ } //B3_PROFILE("GPU_RADIX SORT");
+ //init3dSap();
}
void b3GpuSapBroadphase::writeAabbsToGpu()
@@ -1299,17 +1235,14 @@ void b3GpuSapBroadphase::writeAabbsToGpu()
m_smallAabbsMappingGPU.copyFromHost(m_smallAabbsMappingCPU);
m_largeAabbsMappingGPU.copyFromHost(m_largeAabbsMappingCPU);
- m_allAabbsGPU.copyFromHost(m_allAabbsCPU);//might not be necessary, the 'setupGpuAabbsFull' already takes care of this
-
-
-
+ m_allAabbsGPU.copyFromHost(m_allAabbsCPU); //might not be necessary, the 'setupGpuAabbsFull' already takes care of this
}
-void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask)
+void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask)
{
int index = userPtr;
b3SapAabb aabb;
- for (int i=0;i<4;i++)
+ for (int i = 0; i < 4; i++)
{
aabb.m_min[i] = aabbMin[i];
aabb.m_max[i] = aabbMax[i];
@@ -1317,15 +1250,15 @@ void b3GpuSapBroadphase::createLargeProxy(const b3Vector3& aabbMin, const b3Vec
aabb.m_minIndices[3] = index;
aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
m_largeAabbsMappingCPU.push_back(m_allAabbsCPU.size());
-
+
m_allAabbsCPU.push_back(aabb);
}
-void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask)
+void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr, int collisionFilterGroup, int collisionFilterMask)
{
int index = userPtr;
b3SapAabb aabb;
- for (int i=0;i<4;i++)
+ for (int i = 0; i < 4; i++)
{
aabb.m_min[i] = aabbMin[i];
aabb.m_max[i] = aabbMax[i];
@@ -1334,20 +1267,19 @@ void b3GpuSapBroadphase::createProxy(const b3Vector3& aabbMin, const b3Vector3&
aabb.m_signedMaxIndices[3] = m_allAabbsCPU.size();
m_smallAabbsMappingCPU.push_back(m_allAabbsCPU.size());
-
m_allAabbsCPU.push_back(aabb);
}
-cl_mem b3GpuSapBroadphase::getAabbBufferWS()
+cl_mem b3GpuSapBroadphase::getAabbBufferWS()
{
return m_allAabbsGPU.getBufferCL();
}
-int b3GpuSapBroadphase::getNumOverlap()
+int b3GpuSapBroadphase::getNumOverlap()
{
return m_overlappingPairs.size();
}
-cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer()
+cl_mem b3GpuSapBroadphase::getOverlappingPairBuffer()
{
return m_overlappingPairs.getBufferCL();
}