summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl216
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h198
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl767
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h728
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl389
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h341
6 files changed, 0 insertions, 2639 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl
deleted file mode 100644
index ded4796d33..0000000000
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl
+++ /dev/null
@@ -1,216 +0,0 @@
-
-
-int getPosHash(int4 gridPos, __global float4* pParams)
-{
- int4 gridDim = *((__global int4*)(pParams + 1));
- gridPos.x &= gridDim.x - 1;
- gridPos.y &= gridDim.y - 1;
- gridPos.z &= gridDim.z - 1;
- int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;
- return hash;
-}
-
-int4 getGridPos(float4 worldPos, __global float4* pParams)
-{
- int4 gridPos;
- int4 gridDim = *((__global int4*)(pParams + 1));
- gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);
- gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);
- gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);
- return gridPos;
-}
-
-
-// calculate grid hash value for each body using its AABB
-__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )
-{
- int index = get_global_id(0);
- if(index >= numObjects)
- {
- return;
- }
- float4 bbMin = allpAABB[smallAabbMapping[index]*2];
- float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];
- float4 pos;
- pos.x = (bbMin.x + bbMax.x) * 0.5f;
- pos.y = (bbMin.y + bbMax.y) * 0.5f;
- pos.z = (bbMin.z + bbMax.z) * 0.5f;
- pos.w = 0.f;
- // get address in grid
- int4 gridPos = getGridPos(pos, pParams);
- int gridHash = getPosHash(gridPos, pParams);
- // store grid hash and body index
- int2 hashVal;
- hashVal.x = gridHash;
- hashVal.y = index;
- pHash[index] = hashVal;
-}
-
-__kernel void kClearCellStart( int numCells,
- __global int* pCellStart )
-{
- int index = get_global_id(0);
- if(index >= numCells)
- {
- return;
- }
- pCellStart[index] = -1;
-}
-
-__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )
-{
- __local int sharedHash[513];
- int index = get_global_id(0);
- int2 sortedData;
-
- if(index < numObjects)
- {
- sortedData = pHash[index];
- // Load hash data into shared memory so that we can look
- // at neighboring body's hash value without loading
- // two hash values per thread
- sharedHash[get_local_id(0) + 1] = sortedData.x;
- if((index > 0) && (get_local_id(0) == 0))
- {
- // first thread in block must load neighbor body hash
- sharedHash[0] = pHash[index-1].x;
- }
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- if(index < numObjects)
- {
- if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))
- {
- cellStart[sortedData.x] = index;
- }
- }
-}
-
-int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)
-{
- return (min0.x <= max1.x)&& (min1.x <= max0.x) &&
- (min0.y <= max1.y)&& (min1.y <= max0.y) &&
- (min0.z <= max1.z)&& (min1.z <= max0.z);
-}
-
-
-
-
-//search for AABB 'index' against other AABBs' in this cell
-void findPairsInCell( int numObjects,
- int4 gridPos,
- int index,
- __global int2* pHash,
- __global int* pCellStart,
- __global float4* allpAABB,
- __global const int* smallAabbMapping,
- __global float4* pParams,
- volatile __global int* pairCount,
- __global int4* pPairBuff2,
- int maxPairs
- )
-{
- int4 pGridDim = *((__global int4*)(pParams + 1));
- int maxBodiesPerCell = pGridDim.w;
- int gridHash = getPosHash(gridPos, pParams);
- // get start of bucket for this cell
- int bucketStart = pCellStart[gridHash];
- if (bucketStart == -1)
- {
- return; // cell empty
- }
- // iterate over bodies in this cell
- int2 sortedData = pHash[index];
- int unsorted_indx = sortedData.y;
- float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
- float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
- int handleIndex = as_int(min0.w);
-
- int bucketEnd = bucketStart + maxBodiesPerCell;
- bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;
- for(int index2 = bucketStart; index2 < bucketEnd; index2++)
- {
- int2 cellData = pHash[index2];
- if (cellData.x != gridHash)
- {
- break; // no longer in same bucket
- }
- int unsorted_indx2 = cellData.y;
- //if (unsorted_indx2 < unsorted_indx) // check not colliding with self
- if (unsorted_indx2 != unsorted_indx) // check not colliding with self
- {
- float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];
- float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];
- if(testAABBOverlap(min0, max0, min1, max1))
- {
- if (pairCount)
- {
- int handleIndex2 = as_int(min1.w);
- if (handleIndex<handleIndex2)
- {
- int curPair = atomic_add(pairCount,1);
- if (curPair<maxPairs)
- {
- int4 newpair;
- newpair.x = handleIndex;
- newpair.y = handleIndex2;
- newpair.z = -1;
- newpair.w = -1;
- pPairBuff2[curPair] = newpair;
- }
- }
-
- }
- }
- }
- }
-}
-
-__kernel void kFindOverlappingPairs( int numObjects,
- __global float4* allpAABB,
- __global const int* smallAabbMapping,
- __global int2* pHash,
- __global int* pCellStart,
- __global float4* pParams ,
- volatile __global int* pairCount,
- __global int4* pPairBuff2,
- int maxPairs
- )
-
-{
- int index = get_global_id(0);
- if(index >= numObjects)
- {
- return;
- }
- int2 sortedData = pHash[index];
- int unsorted_indx = sortedData.y;
- float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];
- float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];
- float4 pos;
- pos.x = (bbMin.x + bbMax.x) * 0.5f;
- pos.y = (bbMin.y + bbMax.y) * 0.5f;
- pos.z = (bbMin.z + bbMax.z) * 0.5f;
- // get address in grid
- int4 gridPosA = getGridPos(pos, pParams);
- int4 gridPosB;
- // examine only neighbouring cells
- for(int z=-1; z<=1; z++)
- {
- gridPosB.z = gridPosA.z + z;
- for(int y=-1; y<=1; y++)
- {
- gridPosB.y = gridPosA.y + y;
- for(int x=-1; x<=1; x++)
- {
- gridPosB.x = gridPosA.x + x;
- findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);
- }
- }
- }
-}
-
-
-
-
-
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h
deleted file mode 100644
index 0185417786..0000000000
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h
+++ /dev/null
@@ -1,198 +0,0 @@
-//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
-static const char* gridBroadphaseCL =
- "int getPosHash(int4 gridPos, __global float4* pParams)\n"
- "{\n"
- " int4 gridDim = *((__global int4*)(pParams + 1));\n"
- " gridPos.x &= gridDim.x - 1;\n"
- " gridPos.y &= gridDim.y - 1;\n"
- " gridPos.z &= gridDim.z - 1;\n"
- " int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n"
- " return hash;\n"
- "} \n"
- "int4 getGridPos(float4 worldPos, __global float4* pParams)\n"
- "{\n"
- " int4 gridPos;\n"
- " int4 gridDim = *((__global int4*)(pParams + 1));\n"
- " gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n"
- " gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n"
- " gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n"
- " return gridPos;\n"
- "}\n"
- "// calculate grid hash value for each body using its AABB\n"
- "__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n"
- "{\n"
- " int index = get_global_id(0);\n"
- " if(index >= numObjects)\n"
- " {\n"
- " return;\n"
- " }\n"
- " float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n"
- " float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n"
- " float4 pos;\n"
- " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
- " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
- " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
- " pos.w = 0.f;\n"
- " // get address in grid\n"
- " int4 gridPos = getGridPos(pos, pParams);\n"
- " int gridHash = getPosHash(gridPos, pParams);\n"
- " // store grid hash and body index\n"
- " int2 hashVal;\n"
- " hashVal.x = gridHash;\n"
- " hashVal.y = index;\n"
- " pHash[index] = hashVal;\n"
- "}\n"
- "__kernel void kClearCellStart( int numCells, \n"
- " __global int* pCellStart )\n"
- "{\n"
- " int index = get_global_id(0);\n"
- " if(index >= numCells)\n"
- " {\n"
- " return;\n"
- " }\n"
- " pCellStart[index] = -1;\n"
- "}\n"
- "__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n"
- "{\n"
- " __local int sharedHash[513];\n"
- " int index = get_global_id(0);\n"
- " int2 sortedData;\n"
- " if(index < numObjects)\n"
- " {\n"
- " sortedData = pHash[index];\n"
- " // Load hash data into shared memory so that we can look \n"
- " // at neighboring body's hash value without loading\n"
- " // two hash values per thread\n"
- " sharedHash[get_local_id(0) + 1] = sortedData.x;\n"
- " if((index > 0) && (get_local_id(0) == 0))\n"
- " {\n"
- " // first thread in block must load neighbor body hash\n"
- " sharedHash[0] = pHash[index-1].x;\n"
- " }\n"
- " }\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " if(index < numObjects)\n"
- " {\n"
- " if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n"
- " {\n"
- " cellStart[sortedData.x] = index;\n"
- " }\n"
- " }\n"
- "}\n"
- "int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n"
- "{\n"
- " return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n"
- " (min0.y <= max1.y)&& (min1.y <= max0.y) && \n"
- " (min0.z <= max1.z)&& (min1.z <= max0.z); \n"
- "}\n"
- "//search for AABB 'index' against other AABBs' in this cell\n"
- "void findPairsInCell( int numObjects,\n"
- " int4 gridPos,\n"
- " int index,\n"
- " __global int2* pHash,\n"
- " __global int* pCellStart,\n"
- " __global float4* allpAABB, \n"
- " __global const int* smallAabbMapping,\n"
- " __global float4* pParams,\n"
- " volatile __global int* pairCount,\n"
- " __global int4* pPairBuff2,\n"
- " int maxPairs\n"
- " )\n"
- "{\n"
- " int4 pGridDim = *((__global int4*)(pParams + 1));\n"
- " int maxBodiesPerCell = pGridDim.w;\n"
- " int gridHash = getPosHash(gridPos, pParams);\n"
- " // get start of bucket for this cell\n"
- " int bucketStart = pCellStart[gridHash];\n"
- " if (bucketStart == -1)\n"
- " {\n"
- " return; // cell empty\n"
- " }\n"
- " // iterate over bodies in this cell\n"
- " int2 sortedData = pHash[index];\n"
- " int unsorted_indx = sortedData.y;\n"
- " float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n"
- " float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
- " int handleIndex = as_int(min0.w);\n"
- " \n"
- " int bucketEnd = bucketStart + maxBodiesPerCell;\n"
- " bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n"
- " for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n"
- " {\n"
- " int2 cellData = pHash[index2];\n"
- " if (cellData.x != gridHash)\n"
- " {\n"
- " break; // no longer in same bucket\n"
- " }\n"
- " int unsorted_indx2 = cellData.y;\n"
- " //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n"
- " if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n"
- " { \n"
- " float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n"
- " float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n"
- " if(testAABBOverlap(min0, max0, min1, max1))\n"
- " {\n"
- " if (pairCount)\n"
- " {\n"
- " int handleIndex2 = as_int(min1.w);\n"
- " if (handleIndex<handleIndex2)\n"
- " {\n"
- " int curPair = atomic_add(pairCount,1);\n"
- " if (curPair<maxPairs)\n"
- " {\n"
- " int4 newpair;\n"
- " newpair.x = handleIndex;\n"
- " newpair.y = handleIndex2;\n"
- " newpair.z = -1;\n"
- " newpair.w = -1;\n"
- " pPairBuff2[curPair] = newpair;\n"
- " }\n"
- " }\n"
- " \n"
- " }\n"
- " }\n"
- " }\n"
- " }\n"
- "}\n"
- "__kernel void kFindOverlappingPairs( int numObjects,\n"
- " __global float4* allpAABB, \n"
- " __global const int* smallAabbMapping,\n"
- " __global int2* pHash, \n"
- " __global int* pCellStart, \n"
- " __global float4* pParams ,\n"
- " volatile __global int* pairCount,\n"
- " __global int4* pPairBuff2,\n"
- " int maxPairs\n"
- " )\n"
- "{\n"
- " int index = get_global_id(0);\n"
- " if(index >= numObjects)\n"
- " {\n"
- " return;\n"
- " }\n"
- " int2 sortedData = pHash[index];\n"
- " int unsorted_indx = sortedData.y;\n"
- " float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n"
- " float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n"
- " float4 pos;\n"
- " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n"
- " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n"
- " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n"
- " // get address in grid\n"
- " int4 gridPosA = getGridPos(pos, pParams);\n"
- " int4 gridPosB; \n"
- " // examine only neighbouring cells\n"
- " for(int z=-1; z<=1; z++) \n"
- " {\n"
- " gridPosB.z = gridPosA.z + z;\n"
- " for(int y=-1; y<=1; y++) \n"
- " {\n"
- " gridPosB.y = gridPosA.y + y;\n"
- " for(int x=-1; x<=1; x++) \n"
- " {\n"
- " gridPosB.x = gridPosA.x + x;\n"
- " findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n"
- " }\n"
- " }\n"
- " }\n"
- "}\n";
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
deleted file mode 100644
index c375b9bf37..0000000000
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl
+++ /dev/null
@@ -1,767 +0,0 @@
-/*
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-//Initial Author Jackson Lee, 2014
-
-typedef float b3Scalar;
-typedef float4 b3Vector3;
-#define b3Max max
-#define b3Min min
-#define b3Sqrt sqrt
-
-typedef struct
-{
- unsigned int m_key;
- unsigned int m_value;
-} SortDataCL;
-
-typedef struct
-{
- union
- {
- float4 m_min;
- float m_minElems[4];
- int m_minIndices[4];
- };
- union
- {
- float4 m_max;
- float m_maxElems[4];
- int m_maxIndices[4];
- };
-} b3AabbCL;
-
-
-unsigned int interleaveBits(unsigned int x)
-{
- //........ ........ ......12 3456789A //x
- //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits
-
- //......12 3456789A ......12 3456789A //x ^ (x << 16)
- //11111111 ........ ........ 11111111 //0x FF 00 00 FF
- //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;
-
- //......12 ........ 3456789A 3456789A //x ^ (x << 8)
- //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F
- //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;
-
- //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)
- //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3
- //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;
-
- //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)
- //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49
- //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;
-
- //........ ........ ......11 11111111 //0x000003FF
- x &= 0x000003FF; //Clear all bits above bit 10
-
- x = (x ^ (x << 16)) & 0xFF0000FF;
- x = (x ^ (x << 8)) & 0x0300F00F;
- x = (x ^ (x << 4)) & 0x030C30C3;
- x = (x ^ (x << 2)) & 0x09249249;
-
- return x;
-}
-unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)
-{
- return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;
-}
-
-__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)
-{
- int separatedAabbIndex = get_global_id(0);
- if(separatedAabbIndex >= numAabbsToSeparate) return;
-
- int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];
- out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];
-}
-
-//Should replace with an optimized parallel reduction
-__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)
-{
- //Each time this kernel is added to the command queue,
- //the number of AABBs needing to be merged is halved
- //
- //Example with 159 AABBs:
- // numRemainingAabbs == 159 / 2 + 159 % 2 == 80
- // numMergedAabbs == 159 - 80 == 79
- //So, indices [0, 78] are merged with [0 + 80, 78 + 80]
-
- int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;
- int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;
-
- int aabbIndex = get_global_id(0);
- if(aabbIndex >= numMergedAabbs) return;
-
- int otherAabbIndex = aabbIndex + numRemainingAabbs;
-
- b3AabbCL aabb = out_mergedAabb[aabbIndex];
- b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];
-
- b3AabbCL mergedAabb;
- mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);
- mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);
- out_mergedAabb[aabbIndex] = mergedAabb;
-}
-
-__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes,
- __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)
-{
- int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index
- if(leafNodeIndex >= numAabbs) return;
-
- b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];
- b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;
- b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;
-
- b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];
- b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;
- b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;
-
- //Quantize into integer coordinates
- //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size
- b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;
-
- int4 discretePosition;
- discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );
- discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );
- discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );
-
- //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]
- discretePosition = b3Max( -512, b3Min(discretePosition, 511) );
- discretePosition += 512;
-
- //Interleave bits(assign a morton code, also known as a z-curve)
- unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);
-
- //
- SortDataCL mortonCodeIndexPair;
- mortonCodeIndexPair.m_key = mortonCode;
- mortonCodeIndexPair.m_value = leafNodeIndex;
-
- out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;
-}
-
-#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128
-
-//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.
-//If it is set, then the index is for an internal node; otherwise, it is a leaf node.
-//In both cases, the bit should be cleared to access the actual node index.
-int isLeafNode(int index) { return (index >> 31 == 0); }
-int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }
-int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }
-
-//From sap.cl
-#define NEW_PAIR_MARKER -1
-
-bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)
-{
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
-}
-//From sap.cl
-
-__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs,
-
- __global int* rootNodeIndex,
- __global int2* internalNodeChildIndices,
- __global b3AabbCL* internalNodeAabbs,
- __global int2* internalNodeLeafIndexRanges,
-
- __global SortDataCL* mortonCodesAndAabbIndices,
- __global int* out_numPairs, __global int4* out_overlappingPairs,
- int maxPairs, int numQueryAabbs)
-{
- //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since
- //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)
- int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
- if(queryBvhNodeIndex >= numQueryAabbs) return;
-
- int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;
- b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];
-
- int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
-
- int stackSize = 1;
- stack[0] = *rootNodeIndex;
-
- while(stackSize)
- {
- int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
- --stackSize;
-
- int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
- int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
-
- //Optimization - if the BVH is structured as a binary radix tree, then
- //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).
- //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.
- {
- int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;
- if(highestLeafIndex <= queryBvhNodeIndex) continue;
- }
-
- //bvhRigidIndex is not used if internal node
- int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
-
- b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
- if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )
- {
- if(isLeaf)
- {
- int4 pair;
- pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];
- pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
- pair.z = NEW_PAIR_MARKER;
- pair.w = NEW_PAIR_MARKER;
-
- int pairIndex = atomic_inc(out_numPairs);
- if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
- }
-
- if(!isLeaf) //Internal node
- {
- if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
- {
- //Error
- }
- else
- {
- stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
- stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
- }
- }
- }
-
- }
-}
-
-
-//From rayCastKernels.cl
-typedef struct
-{
- float4 m_from;
- float4 m_to;
-} b3RayInfo;
-//From rayCastKernels.cl
-
-b3Vector3 b3Vector3_normalize(b3Vector3 v)
-{
- b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};
- return normalize(normal); //OpenCL normalize == vector4 normalize
-}
-b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }
-b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }
-
-int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)
-{
- //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).
- //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.
- //
- //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane
- //and min.x will be the far plane; otherwise, it is reversed.
- //
- //In order for there to be a collision, the t_min and t_max of each pair must overlap.
- //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.
-
- int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)
-
- //When using vector types, the select() function checks the most signficant bit,
- //but isless() sets the least significant bit.
- isNegative <<= 31;
-
- //select(b, a, condition) == condition ? a : b
- //When using select() with vector types, (condition[i]) is true if its most significant bit is 1
- b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;
- b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;
-
- b3Scalar t_min_final = 0.0f;
- b3Scalar t_max_final = rayLength;
-
- //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned.
- //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])
- //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.
- t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );
- t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );
-
- return (t_min_final <= t_max_final);
-}
-
-__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,
-
- __global int* rootNodeIndex,
- __global int2* internalNodeChildIndices,
- __global b3AabbCL* internalNodeAabbs,
- __global int2* internalNodeLeafIndexRanges,
- __global SortDataCL* mortonCodesAndAabbIndices,
-
- __global b3RayInfo* rays,
-
- __global int* out_numRayRigidPairs,
- __global int2* out_rayRigidPairs,
- int maxRayRigidPairs, int numRays)
-{
- int rayIndex = get_global_id(0);
- if(rayIndex >= numRays) return;
-
- //
- b3Vector3 rayFrom = rays[rayIndex].m_from;
- b3Vector3 rayTo = rays[rayIndex].m_to;
- b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);
- b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
-
- //
- int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];
-
- int stackSize = 1;
- stack[0] = *rootNodeIndex;
-
- while(stackSize)
- {
- int internalOrLeafNodeIndex = stack[ stackSize - 1 ];
- --stackSize;
-
- int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false
- int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);
-
- //bvhRigidIndex is not used if internal node
- int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;
-
- b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];
- if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )
- {
- if(isLeaf)
- {
- int2 rayRigidPair;
- rayRigidPair.x = rayIndex;
- rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];
-
- int pairIndex = atomic_inc(out_numRayRigidPairs);
- if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;
- }
-
- if(!isLeaf) //Internal node
- {
- if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)
- {
- //Error
- }
- else
- {
- stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;
- stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;
- }
- }
- }
- }
-}
-
-__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs,
- __global int* out_numPairs, __global int4* out_overlappingPairs,
- int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)
-{
- int smallAabbIndex = get_global_id(0);
- if(smallAabbIndex >= numSmallAabbRigids) return;
-
- b3AabbCL smallAabb = smallAabbs[smallAabbIndex];
- for(int i = 0; i < numLargeAabbRigids; ++i)
- {
- b3AabbCL largeAabb = largeAabbs[i];
- if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )
- {
- int4 pair;
- pair.x = largeAabb.m_minIndices[3];
- pair.y = smallAabb.m_minIndices[3];
- pair.z = NEW_PAIR_MARKER;
- pair.w = NEW_PAIR_MARKER;
-
- int pairIndex = atomic_inc(out_numPairs);
- if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;
- }
- }
-}
-__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,
- __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,
- int numLargeAabbRigids, int maxRayRigidPairs, int numRays)
-{
- int rayIndex = get_global_id(0);
- if(rayIndex >= numRays) return;
-
- b3Vector3 rayFrom = rays[rayIndex].m_from;
- b3Vector3 rayTo = rays[rayIndex].m_to;
- b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);
- b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );
-
- for(int i = 0; i < numLargeAabbRigids; ++i)
- {
- b3AabbCL rigidAabb = largeRigidAabbs[i];
- if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )
- {
- int2 rayRigidPair;
- rayRigidPair.x = rayIndex;
- rayRigidPair.y = rigidAabb.m_minIndices[3];
-
- int pairIndex = atomic_inc(out_numRayRigidPairs);
- if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;
- }
- }
-}
-
-
-//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.
-//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.
-//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.
-//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).
-#define B3_PLBVH_INVALID_COMMON_PREFIX 128
-
-#define B3_PLBVH_ROOT_NODE_MARKER -1
-
-#define b3Int64 long
-
-int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }
-b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j)
-{
- //This function only needs to return (i & j) in order for the algorithm to work,
- //but it may help with debugging to mask out the lower bits.
-
- b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);
-
- b3Int64 sharedBits = i & j;
- b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0
-
- return sharedBits & bitmask;
-}
-
-//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths
-int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)
-{
- return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );
-}
-
-__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,
- __global b3Int64* out_commonPrefixes,
- __global int* out_commonPrefixLengths,
- int numInternalNodes)
-{
- int internalNodeIndex = get_global_id(0);
- if (internalNodeIndex >= numInternalNodes) return;
-
- //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,
- //and the number of internal nodes is always numLeafNodes - 1
- int leftLeafIndex = internalNodeIndex;
- int rightLeafIndex = internalNodeIndex + 1;
-
- int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;
- int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;
-
- //Binary radix tree construction algorithm does not work if there are duplicate morton codes.
- //Append the index of each leaf node to each morton code so that there are no duplicates.
- //The algorithm also requires that the morton codes are sorted in ascending order; this requirement
- //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.
- //
- //upsample(a, b) == ( ((b3Int64)a) << 32) | b
- b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);
- b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);
-
- out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);
- out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);
-}
-
-
-__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,
- __global int2* out_childNodes, int numLeafNodes)
-{
- int leafNodeIndex = get_global_id(0);
- if (leafNodeIndex >= numLeafNodes) return;
-
- int numInternalNodes = numLeafNodes - 1;
-
- int leftSplitIndex = leafNodeIndex - 1;
- int rightSplitIndex = leafNodeIndex;
-
- int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
- int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
-
- //Parent node is the highest adjacent common prefix that is lower than the node's common prefix
- //Leaf nodes are considered as having the highest common prefix
- int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);
-
- //Handle cases for the edge nodes; the first and last node
- //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX
- if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;
- if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;
-
- int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;
- out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;
-
- int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa
-
- //out_childNodesAsInt[0] == int2.x == left child
- //out_childNodesAsInt[1] == int2.y == right child
- int isLeaf = 1;
- __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);
- out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);
-}
-
-__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,
- __global int2* out_childNodes,
- __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,
- int numInternalNodes)
-{
- int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);
- if(internalNodeIndex >= numInternalNodes) return;
-
- b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];
- int nodePrefixLength = commonPrefixLengths[internalNodeIndex];
-
-//#define USE_LINEAR_SEARCH
-#ifdef USE_LINEAR_SEARCH
- int leftIndex = -1;
- int rightIndex = -1;
-
- //Find nearest element to left with a lower common prefix
- for(int i = internalNodeIndex - 1; i >= 0; --i)
- {
- int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
- if(nodeLeftSharedPrefixLength < nodePrefixLength)
- {
- leftIndex = i;
- break;
- }
- }
-
- //Find nearest element to right with a lower common prefix
- for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)
- {
- int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);
- if(nodeRightSharedPrefixLength < nodePrefixLength)
- {
- rightIndex = i;
- break;
- }
- }
-
-#else //Use binary search
-
- //Find nearest element to left with a lower common prefix
- int leftIndex = -1;
- {
- int lower = 0;
- int upper = internalNodeIndex - 1;
-
- while(lower <= upper)
- {
- int mid = (lower + upper) / 2;
- b3Int64 midPrefix = commonPrefixes[mid];
- int midPrefixLength = commonPrefixLengths[mid];
-
- int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);
- if(nodeMidSharedPrefixLength < nodePrefixLength)
- {
- int right = mid + 1;
- if(right < internalNodeIndex)
- {
- b3Int64 rightPrefix = commonPrefixes[right];
- int rightPrefixLength = commonPrefixLengths[right];
-
- int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);
- if(nodeRightSharedPrefixLength < nodePrefixLength)
- {
- lower = right;
- leftIndex = right;
- }
- else
- {
- leftIndex = mid;
- break;
- }
- }
- else
- {
- leftIndex = mid;
- break;
- }
- }
- else upper = mid - 1;
- }
- }
-
- //Find nearest element to right with a lower common prefix
- int rightIndex = -1;
- {
- int lower = internalNodeIndex + 1;
- int upper = numInternalNodes - 1;
-
- while(lower <= upper)
- {
- int mid = (lower + upper) / 2;
- b3Int64 midPrefix = commonPrefixes[mid];
- int midPrefixLength = commonPrefixLengths[mid];
-
- int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);
- if(nodeMidSharedPrefixLength < nodePrefixLength)
- {
- int left = mid - 1;
- if(left > internalNodeIndex)
- {
- b3Int64 leftPrefix = commonPrefixes[left];
- int leftPrefixLength = commonPrefixLengths[left];
-
- int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);
- if(nodeLeftSharedPrefixLength < nodePrefixLength)
- {
- upper = left;
- rightIndex = left;
- }
- else
- {
- rightIndex = mid;
- break;
- }
- }
- else
- {
- rightIndex = mid;
- break;
- }
- }
- else lower = mid + 1;
- }
- }
-#endif
-
- //Select parent
- {
- int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
- int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;
-
- int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);
-
- if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;
- else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;
-
- int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;
-
- int isRootNode = (leftIndex == -1 && rightIndex == -1);
- out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;
-
- int isLeaf = 0;
- if(!isRootNode)
- {
- int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa
-
- //out_childNodesAsInt[0] == int2.x == left child
- //out_childNodesAsInt[1] == int2.y == right child
- __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);
- out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
- }
- else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);
- }
-}
-
-__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,
- __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)
-{
- if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);
-
- int internalNodeIndex = get_global_id(0);
- if(internalNodeIndex >= numInternalNodes) return;
-
- //
- int distanceFromRoot = 0;
- {
- int parentIndex = internalNodeParentNodes[internalNodeIndex];
- while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)
- {
- parentIndex = internalNodeParentNodes[parentIndex];
- ++distanceFromRoot;
- }
- }
- out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;
-
- //
- __local int localMaxDistanceFromRoot;
- if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;
- barrier(CLK_LOCAL_MEM_FENCE);
-
- atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);
-}
-
-__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,
- __global int2* childNodes,
- __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,
- int maxDistanceFromRoot, int processedDistance, int numInternalNodes)
-{
- int internalNodeIndex = get_global_id(0);
- if(internalNodeIndex >= numInternalNodes) return;
-
- int distance = distanceFromRoot[internalNodeIndex];
-
- if(distance == processedDistance)
- {
- int leftChildIndex = childNodes[internalNodeIndex].x;
- int rightChildIndex = childNodes[internalNodeIndex].y;
-
- int isLeftChildLeaf = isLeafNode(leftChildIndex);
- int isRightChildLeaf = isLeafNode(rightChildIndex);
-
- leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);
- rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);
-
- //leftRigidIndex/rightRigidIndex is not used if internal node
- int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;
- int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;
-
- b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];
- b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];
-
- b3AabbCL mergedAabb;
- mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);
- mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);
- internalNodeAabbs[internalNodeIndex] = mergedAabb;
- }
-}
-
-__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)
-{
- int internalNodeIndex = get_global_id(0);
- if(internalNodeIndex >= numInternalNodes) return;
-
- int numLeafNodes = numInternalNodes + 1;
-
- int2 childNodes = internalNodeChildNodes[internalNodeIndex];
-
- int2 leafIndexRange; //x == min leaf index, y == max leaf index
-
- //Find lowest leaf index covered by this internal node
- {
- int lowestIndex = childNodes.x; //childNodes.x == Left child
- while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;
- leafIndexRange.x = lowestIndex;
- }
-
- //Find highest leaf index covered by this internal node
- {
- int highestIndex = childNodes.y; //childNodes.y == Right child
- while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;
- leafIndexRange.y = highestIndex;
- }
-
- //
- out_leafIndexRanges[internalNodeIndex] = leafIndexRange;
-}
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
deleted file mode 100644
index c02877dde9..0000000000
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h
+++ /dev/null
@@ -1,728 +0,0 @@
-//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
-static const char* parallelLinearBvhCL =
- "/*\n"
- "This software is provided 'as-is', without any express or implied warranty.\n"
- "In no event will the authors be held liable for any damages arising from the use of this software.\n"
- "Permission is granted to anyone to use this software for any purpose,\n"
- "including commercial applications, and to alter it and redistribute it freely,\n"
- "subject to the following restrictions:\n"
- "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
- "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
- "3. This notice may not be removed or altered from any source distribution.\n"
- "*/\n"
- "//Initial Author Jackson Lee, 2014\n"
- "typedef float b3Scalar;\n"
- "typedef float4 b3Vector3;\n"
- "#define b3Max max\n"
- "#define b3Min min\n"
- "#define b3Sqrt sqrt\n"
- "typedef struct\n"
- "{\n"
- " unsigned int m_key;\n"
- " unsigned int m_value;\n"
- "} SortDataCL;\n"
- "typedef struct \n"
- "{\n"
- " union\n"
- " {\n"
- " float4 m_min;\n"
- " float m_minElems[4];\n"
- " int m_minIndices[4];\n"
- " };\n"
- " union\n"
- " {\n"
- " float4 m_max;\n"
- " float m_maxElems[4];\n"
- " int m_maxIndices[4];\n"
- " };\n"
- "} b3AabbCL;\n"
- "unsigned int interleaveBits(unsigned int x)\n"
- "{\n"
- " //........ ........ ......12 3456789A //x\n"
- " //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n"
- " \n"
- " //......12 3456789A ......12 3456789A //x ^ (x << 16)\n"
- " //11111111 ........ ........ 11111111 //0x FF 00 00 FF\n"
- " //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;\n"
- " \n"
- " //......12 ........ 3456789A 3456789A //x ^ (x << 8)\n"
- " //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F\n"
- " //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;\n"
- " \n"
- " //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)\n"
- " //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3\n"
- " //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;\n"
- " \n"
- " //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)\n"
- " //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49\n"
- " //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;\n"
- " \n"
- " //........ ........ ......11 11111111 //0x000003FF\n"
- " x &= 0x000003FF; //Clear all bits above bit 10\n"
- " \n"
- " x = (x ^ (x << 16)) & 0xFF0000FF;\n"
- " x = (x ^ (x << 8)) & 0x0300F00F;\n"
- " x = (x ^ (x << 4)) & 0x030C30C3;\n"
- " x = (x ^ (x << 2)) & 0x09249249;\n"
- " \n"
- " return x;\n"
- "}\n"
- "unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n"
- "{\n"
- " return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n"
- "}\n"
- "__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n"
- "{\n"
- " int separatedAabbIndex = get_global_id(0);\n"
- " if(separatedAabbIndex >= numAabbsToSeparate) return;\n"
- " int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n"
- " out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n"
- "}\n"
- "//Should replace with an optimized parallel reduction\n"
- "__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n"
- "{\n"
- " //Each time this kernel is added to the command queue, \n"
- " //the number of AABBs needing to be merged is halved\n"
- " //\n"
- " //Example with 159 AABBs:\n"
- " // numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n"
- " // numMergedAabbs == 159 - 80 == 79\n"
- " //So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n"
- " \n"
- " int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n"
- " int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n"
- " \n"
- " int aabbIndex = get_global_id(0);\n"
- " if(aabbIndex >= numMergedAabbs) return;\n"
- " \n"
- " int otherAabbIndex = aabbIndex + numRemainingAabbs;\n"
- " \n"
- " b3AabbCL aabb = out_mergedAabb[aabbIndex];\n"
- " b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n"
- " \n"
- " b3AabbCL mergedAabb;\n"
- " mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n"
- " mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n"
- " out_mergedAabb[aabbIndex] = mergedAabb;\n"
- "}\n"
- "__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n"
- " __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n"
- "{\n"
- " int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n"
- " if(leafNodeIndex >= numAabbs) return;\n"
- " \n"
- " b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n"
- " b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n"
- " b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n"
- " \n"
- " b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n"
- " b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n"
- " b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n"
- " \n"
- " //Quantize into integer coordinates\n"
- " //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n"
- " b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n"
- " \n"
- " int4 discretePosition;\n"
- " discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n"
- " discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n"
- " discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n"
- " \n"
- " //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n"
- " discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n"
- " discretePosition += 512;\n"
- " \n"
- " //Interleave bits(assign a morton code, also known as a z-curve)\n"
- " unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n"
- " \n"
- " //\n"
- " SortDataCL mortonCodeIndexPair;\n"
- " mortonCodeIndexPair.m_key = mortonCode;\n"
- " mortonCodeIndexPair.m_value = leafNodeIndex;\n"
- " \n"
- " out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n"
- "}\n"
- "#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n"
- "//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n"
- "//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n"
- "//In both cases, the bit should be cleared to access the actual node index.\n"
- "int isLeafNode(int index) { return (index >> 31 == 0); }\n"
- "int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n"
- "int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n"
- "//From sap.cl\n"
- "#define NEW_PAIR_MARKER -1\n"
- "bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n"
- "{\n"
- " bool overlap = true;\n"
- " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
- " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
- " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
- " return overlap;\n"
- "}\n"
- "//From sap.cl\n"
- "__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n"
- " __global int* rootNodeIndex, \n"
- " __global int2* internalNodeChildIndices, \n"
- " __global b3AabbCL* internalNodeAabbs,\n"
- " __global int2* internalNodeLeafIndexRanges,\n"
- " \n"
- " __global SortDataCL* mortonCodesAndAabbIndices,\n"
- " __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
- " int maxPairs, int numQueryAabbs)\n"
- "{\n"
- " //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n"
- " //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n"
- " int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
- " if(queryBvhNodeIndex >= numQueryAabbs) return;\n"
- " \n"
- " int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n"
- " b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n"
- " \n"
- " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
- " \n"
- " int stackSize = 1;\n"
- " stack[0] = *rootNodeIndex;\n"
- " \n"
- " while(stackSize)\n"
- " {\n"
- " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
- " --stackSize;\n"
- " \n"
- " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
- " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
- " \n"
- " //Optimization - if the BVH is structured as a binary radix tree, then\n"
- " //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n"
- " //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n"
- " {\n"
- " int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n"
- " if(highestLeafIndex <= queryBvhNodeIndex) continue;\n"
- " }\n"
- " \n"
- " //bvhRigidIndex is not used if internal node\n"
- " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
- " \n"
- " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
- " if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n"
- " {\n"
- " if(isLeaf)\n"
- " {\n"
- " int4 pair;\n"
- " pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n"
- " pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
- " pair.z = NEW_PAIR_MARKER;\n"
- " pair.w = NEW_PAIR_MARKER;\n"
- " \n"
- " int pairIndex = atomic_inc(out_numPairs);\n"
- " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
- " }\n"
- " \n"
- " if(!isLeaf) //Internal node\n"
- " {\n"
- " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
- " {\n"
- " //Error\n"
- " }\n"
- " else\n"
- " {\n"
- " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
- " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
- " }\n"
- " }\n"
- " }\n"
- " \n"
- " }\n"
- "}\n"
- "//From rayCastKernels.cl\n"
- "typedef struct\n"
- "{\n"
- " float4 m_from;\n"
- " float4 m_to;\n"
- "} b3RayInfo;\n"
- "//From rayCastKernels.cl\n"
- "b3Vector3 b3Vector3_normalize(b3Vector3 v)\n"
- "{\n"
- " b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n"
- " return normalize(normal); //OpenCL normalize == vector4 normalize\n"
- "}\n"
- "b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n"
- "b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n"
- "int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n"
- "{\n"
- " //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n"
- " //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n"
- " //\n"
- " //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n"
- " //and min.x will be the far plane; otherwise, it is reversed.\n"
- " //\n"
- " //In order for there to be a collision, the t_min and t_max of each pair must overlap.\n"
- " //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n"
- " \n"
- " int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)\n"
- " \n"
- " //When using vector types, the select() function checks the most signficant bit, \n"
- " //but isless() sets the least significant bit.\n"
- " isNegative <<= 31;\n"
- " //select(b, a, condition) == condition ? a : b\n"
- " //When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n"
- " b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n"
- " b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n"
- " \n"
- " b3Scalar t_min_final = 0.0f;\n"
- " b3Scalar t_max_final = rayLength;\n"
- " \n"
- " //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n"
- " //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n"
- " //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n"
- " t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n"
- " t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n"
- " \n"
- " return (t_min_final <= t_max_final);\n"
- "}\n"
- "__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n"
- " __global int* rootNodeIndex, \n"
- " __global int2* internalNodeChildIndices, \n"
- " __global b3AabbCL* internalNodeAabbs,\n"
- " __global int2* internalNodeLeafIndexRanges,\n"
- " __global SortDataCL* mortonCodesAndAabbIndices,\n"
- " \n"
- " __global b3RayInfo* rays,\n"
- " \n"
- " __global int* out_numRayRigidPairs, \n"
- " __global int2* out_rayRigidPairs,\n"
- " int maxRayRigidPairs, int numRays)\n"
- "{\n"
- " int rayIndex = get_global_id(0);\n"
- " if(rayIndex >= numRays) return;\n"
- " \n"
- " //\n"
- " b3Vector3 rayFrom = rays[rayIndex].m_from;\n"
- " b3Vector3 rayTo = rays[rayIndex].m_to;\n"
- " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n"
- " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
- " \n"
- " //\n"
- " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n"
- " \n"
- " int stackSize = 1;\n"
- " stack[0] = *rootNodeIndex;\n"
- " \n"
- " while(stackSize)\n"
- " {\n"
- " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n"
- " --stackSize;\n"
- " \n"
- " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n"
- " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n"
- " \n"
- " //bvhRigidIndex is not used if internal node\n"
- " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n"
- " \n"
- " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n"
- " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )\n"
- " {\n"
- " if(isLeaf)\n"
- " {\n"
- " int2 rayRigidPair;\n"
- " rayRigidPair.x = rayIndex;\n"
- " rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n"
- " \n"
- " int pairIndex = atomic_inc(out_numRayRigidPairs);\n"
- " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n"
- " }\n"
- " \n"
- " if(!isLeaf) //Internal node\n"
- " {\n"
- " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n"
- " {\n"
- " //Error\n"
- " }\n"
- " else\n"
- " {\n"
- " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n"
- " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n"
- " }\n"
- " }\n"
- " }\n"
- " }\n"
- "}\n"
- "__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n"
- " __global int* out_numPairs, __global int4* out_overlappingPairs, \n"
- " int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n"
- "{\n"
- " int smallAabbIndex = get_global_id(0);\n"
- " if(smallAabbIndex >= numSmallAabbRigids) return;\n"
- " \n"
- " b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n"
- " for(int i = 0; i < numLargeAabbRigids; ++i)\n"
- " {\n"
- " b3AabbCL largeAabb = largeAabbs[i];\n"
- " if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n"
- " {\n"
- " int4 pair;\n"
- " pair.x = largeAabb.m_minIndices[3];\n"
- " pair.y = smallAabb.m_minIndices[3];\n"
- " pair.z = NEW_PAIR_MARKER;\n"
- " pair.w = NEW_PAIR_MARKER;\n"
- " \n"
- " int pairIndex = atomic_inc(out_numPairs);\n"
- " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n"
- " }\n"
- " }\n"
- "}\n"
- "__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n"
- " __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n"
- " int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n"
- "{\n"
- " int rayIndex = get_global_id(0);\n"
- " if(rayIndex >= numRays) return;\n"
- " \n"
- " b3Vector3 rayFrom = rays[rayIndex].m_from;\n"
- " b3Vector3 rayTo = rays[rayIndex].m_to;\n"
- " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n"
- " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n"
- " \n"
- " for(int i = 0; i < numLargeAabbRigids; ++i)\n"
- " {\n"
- " b3AabbCL rigidAabb = largeRigidAabbs[i];\n"
- " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n"
- " {\n"
- " int2 rayRigidPair;\n"
- " rayRigidPair.x = rayIndex;\n"
- " rayRigidPair.y = rigidAabb.m_minIndices[3];\n"
- " \n"
- " int pairIndex = atomic_inc(out_numRayRigidPairs);\n"
- " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n"
- " }\n"
- " }\n"
- "}\n"
- "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n"
- "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n"
- "//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n"
- "//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n"
- "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n"
- "#define B3_PLBVH_ROOT_NODE_MARKER -1\n"
- "#define b3Int64 long\n"
- "int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n"
- "b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n"
- "{\n"
- " //This function only needs to return (i & j) in order for the algorithm to work,\n"
- " //but it may help with debugging to mask out the lower bits.\n"
- " b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n"
- " b3Int64 sharedBits = i & j;\n"
- " b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0\n"
- " \n"
- " return sharedBits & bitmask;\n"
- "}\n"
- "//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n"
- "int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n"
- "{\n"
- " return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n"
- "}\n"
- "__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n"
- " __global b3Int64* out_commonPrefixes,\n"
- " __global int* out_commonPrefixLengths,\n"
- " int numInternalNodes)\n"
- "{\n"
- " int internalNodeIndex = get_global_id(0);\n"
- " if (internalNodeIndex >= numInternalNodes) return;\n"
- " \n"
- " //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n"
- " //and the number of internal nodes is always numLeafNodes - 1\n"
- " int leftLeafIndex = internalNodeIndex;\n"
- " int rightLeafIndex = internalNodeIndex + 1;\n"
- " \n"
- " int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n"
- " int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n"
- " \n"
- " //Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n"
- " //Append the index of each leaf node to each morton code so that there are no duplicates.\n"
- " //The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n"
- " //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n"
- " //\n"
- " //upsample(a, b) == ( ((b3Int64)a) << 32) | b\n"
- " b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n"
- " b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n"
- " \n"
- " out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n"
- " out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n"
- "}\n"
- "__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n"
- " __global int2* out_childNodes, int numLeafNodes)\n"
- "{\n"
- " int leafNodeIndex = get_global_id(0);\n"
- " if (leafNodeIndex >= numLeafNodes) return;\n"
- " \n"
- " int numInternalNodes = numLeafNodes - 1;\n"
- " \n"
- " int leftSplitIndex = leafNodeIndex - 1;\n"
- " int rightSplitIndex = leafNodeIndex;\n"
- " \n"
- " int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
- " int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
- " \n"
- " //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n"
- " //Leaf nodes are considered as having the highest common prefix\n"
- " int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n"
- " \n"
- " //Handle cases for the edge nodes; the first and last node\n"
- " //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n"
- " if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n"
- " if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n"
- " \n"
- " int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n"
- " out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n"
- " \n"
- " int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa\n"
- " \n"
- " //out_childNodesAsInt[0] == int2.x == left child\n"
- " //out_childNodesAsInt[1] == int2.y == right child\n"
- " int isLeaf = 1;\n"
- " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n"
- " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n"
- "}\n"
- "__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n"
- " __global int2* out_childNodes,\n"
- " __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n"
- " int numInternalNodes)\n"
- "{\n"
- " int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n"
- " if(internalNodeIndex >= numInternalNodes) return;\n"
- " \n"
- " b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n"
- " int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n"
- " \n"
- "//#define USE_LINEAR_SEARCH\n"
- "#ifdef USE_LINEAR_SEARCH\n"
- " int leftIndex = -1;\n"
- " int rightIndex = -1;\n"
- " \n"
- " //Find nearest element to left with a lower common prefix\n"
- " for(int i = internalNodeIndex - 1; i >= 0; --i)\n"
- " {\n"
- " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
- " if(nodeLeftSharedPrefixLength < nodePrefixLength)\n"
- " {\n"
- " leftIndex = i;\n"
- " break;\n"
- " }\n"
- " }\n"
- " \n"
- " //Find nearest element to right with a lower common prefix\n"
- " for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n"
- " {\n"
- " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n"
- " if(nodeRightSharedPrefixLength < nodePrefixLength)\n"
- " {\n"
- " rightIndex = i;\n"
- " break;\n"
- " }\n"
- " }\n"
- " \n"
- "#else //Use binary search\n"
- " //Find nearest element to left with a lower common prefix\n"
- " int leftIndex = -1;\n"
- " {\n"
- " int lower = 0;\n"
- " int upper = internalNodeIndex - 1;\n"
- " \n"
- " while(lower <= upper)\n"
- " {\n"
- " int mid = (lower + upper) / 2;\n"
- " b3Int64 midPrefix = commonPrefixes[mid];\n"
- " int midPrefixLength = commonPrefixLengths[mid];\n"
- " \n"
- " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n"
- " if(nodeMidSharedPrefixLength < nodePrefixLength) \n"
- " {\n"
- " int right = mid + 1;\n"
- " if(right < internalNodeIndex)\n"
- " {\n"
- " b3Int64 rightPrefix = commonPrefixes[right];\n"
- " int rightPrefixLength = commonPrefixLengths[right];\n"
- " \n"
- " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n"
- " if(nodeRightSharedPrefixLength < nodePrefixLength) \n"
- " {\n"
- " lower = right;\n"
- " leftIndex = right;\n"
- " }\n"
- " else \n"
- " {\n"
- " leftIndex = mid;\n"
- " break;\n"
- " }\n"
- " }\n"
- " else \n"
- " {\n"
- " leftIndex = mid;\n"
- " break;\n"
- " }\n"
- " }\n"
- " else upper = mid - 1;\n"
- " }\n"
- " }\n"
- " \n"
- " //Find nearest element to right with a lower common prefix\n"
- " int rightIndex = -1;\n"
- " {\n"
- " int lower = internalNodeIndex + 1;\n"
- " int upper = numInternalNodes - 1;\n"
- " \n"
- " while(lower <= upper)\n"
- " {\n"
- " int mid = (lower + upper) / 2;\n"
- " b3Int64 midPrefix = commonPrefixes[mid];\n"
- " int midPrefixLength = commonPrefixLengths[mid];\n"
- " \n"
- " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n"
- " if(nodeMidSharedPrefixLength < nodePrefixLength) \n"
- " {\n"
- " int left = mid - 1;\n"
- " if(left > internalNodeIndex)\n"
- " {\n"
- " b3Int64 leftPrefix = commonPrefixes[left];\n"
- " int leftPrefixLength = commonPrefixLengths[left];\n"
- " \n"
- " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n"
- " if(nodeLeftSharedPrefixLength < nodePrefixLength) \n"
- " {\n"
- " upper = left;\n"
- " rightIndex = left;\n"
- " }\n"
- " else \n"
- " {\n"
- " rightIndex = mid;\n"
- " break;\n"
- " }\n"
- " }\n"
- " else \n"
- " {\n"
- " rightIndex = mid;\n"
- " break;\n"
- " }\n"
- " }\n"
- " else lower = mid + 1;\n"
- " }\n"
- " }\n"
- "#endif\n"
- " \n"
- " //Select parent\n"
- " {\n"
- " int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
- " int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n"
- " \n"
- " int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n"
- " \n"
- " if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n"
- " else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n"
- " \n"
- " int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n"
- " \n"
- " int isRootNode = (leftIndex == -1 && rightIndex == -1);\n"
- " out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n"
- " \n"
- " int isLeaf = 0;\n"
- " if(!isRootNode)\n"
- " {\n"
- " int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa\n"
- " \n"
- " //out_childNodesAsInt[0] == int2.x == left child\n"
- " //out_childNodesAsInt[1] == int2.y == right child\n"
- " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n"
- " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
- " }\n"
- " else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n"
- " }\n"
- "}\n"
- "__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n"
- " __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n"
- "{\n"
- " if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n"
- " int internalNodeIndex = get_global_id(0);\n"
- " if(internalNodeIndex >= numInternalNodes) return;\n"
- " \n"
- " //\n"
- " int distanceFromRoot = 0;\n"
- " {\n"
- " int parentIndex = internalNodeParentNodes[internalNodeIndex];\n"
- " while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n"
- " {\n"
- " parentIndex = internalNodeParentNodes[parentIndex];\n"
- " ++distanceFromRoot;\n"
- " }\n"
- " }\n"
- " out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n"
- " \n"
- " //\n"
- " __local int localMaxDistanceFromRoot;\n"
- " if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n"
- "}\n"
- "__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n"
- " __global int2* childNodes,\n"
- " __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n"
- " int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n"
- "{\n"
- " int internalNodeIndex = get_global_id(0);\n"
- " if(internalNodeIndex >= numInternalNodes) return;\n"
- " \n"
- " int distance = distanceFromRoot[internalNodeIndex];\n"
- " \n"
- " if(distance == processedDistance)\n"
- " {\n"
- " int leftChildIndex = childNodes[internalNodeIndex].x;\n"
- " int rightChildIndex = childNodes[internalNodeIndex].y;\n"
- " \n"
- " int isLeftChildLeaf = isLeafNode(leftChildIndex);\n"
- " int isRightChildLeaf = isLeafNode(rightChildIndex);\n"
- " \n"
- " leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n"
- " rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n"
- " \n"
- " //leftRigidIndex/rightRigidIndex is not used if internal node\n"
- " int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n"
- " int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n"
- " \n"
- " b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n"
- " b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n"
- " \n"
- " b3AabbCL mergedAabb;\n"
- " mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n"
- " mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n"
- " internalNodeAabbs[internalNodeIndex] = mergedAabb;\n"
- " }\n"
- "}\n"
- "__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n"
- "{\n"
- " int internalNodeIndex = get_global_id(0);\n"
- " if(internalNodeIndex >= numInternalNodes) return;\n"
- " \n"
- " int numLeafNodes = numInternalNodes + 1;\n"
- " \n"
- " int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n"
- " \n"
- " int2 leafIndexRange; //x == min leaf index, y == max leaf index\n"
- " \n"
- " //Find lowest leaf index covered by this internal node\n"
- " {\n"
- " int lowestIndex = childNodes.x; //childNodes.x == Left child\n"
- " while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n"
- " leafIndexRange.x = lowestIndex;\n"
- " }\n"
- " \n"
- " //Find highest leaf index covered by this internal node\n"
- " {\n"
- " int highestIndex = childNodes.y; //childNodes.y == Right child\n"
- " while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n"
- " leafIndexRange.y = highestIndex;\n"
- " }\n"
- " \n"
- " //\n"
- " out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n"
- "}\n";
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl
deleted file mode 100644
index 93f77a6433..0000000000
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl
+++ /dev/null
@@ -1,389 +0,0 @@
-/*
-Copyright (c) 2012 Advanced Micro Devices, Inc.
-
-This software is provided 'as-is', without any express or implied warranty.
-In no event will the authors be held liable for any damages arising from the use of this software.
-Permission is granted to anyone to use this software for any purpose,
-including commercial applications, and to alter it and redistribute it freely,
-subject to the following restrictions:
-
-1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
-2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
-3. This notice may not be removed or altered from any source distribution.
-*/
-//Originally written by Erwin Coumans
-
-#define NEW_PAIR_MARKER -1
-
-typedef struct
-{
- union
- {
- float4 m_min;
- float m_minElems[4];
- int m_minIndices[4];
- };
- union
- {
- float4 m_max;
- float m_maxElems[4];
- int m_maxIndices[4];
- };
-} btAabbCL;
-
-
-/// conservative test for overlap between two aabbs
-bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);
-bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)
-{
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
-}
-bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);
-bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)
-{
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
-}
-
-bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);
-bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)
-{
- bool overlap = true;
- overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;
- overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;
- overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;
- return overlap;
-}
-
-
-__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)
-{
- int i = get_global_id(0);
- if (i>=numUnsortedAabbs)
- return;
-
- int j = get_global_id(1);
- if (j>=numUnSortedAabbs2)
- return;
-
-
- __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];
- __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];
-
- if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))
- {
- int4 myPair;
-
- int xIndex = unsortedAabbPtr[0].m_minIndices[3];
- int yIndex = unsortedAabbPtr2[0].m_minIndices[3];
- if (xIndex>yIndex)
- {
- int tmp = xIndex;
- xIndex=yIndex;
- yIndex=tmp;
- }
-
- myPair.x = xIndex;
- myPair.y = yIndex;
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
-
-
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
-}
-
-
-
-__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
-{
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
- for (int j=i+1;j<numObjects;j++)
- {
- if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
- {
- int4 myPair;
- myPair.x = aabbs[i].m_minIndices[3];
- myPair.y = aabbs[j].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
-
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
-}
-
-__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
-{
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
- for (int j=i+1;j<numObjects;j++)
- {
- if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis]))
- {
- break;
- }
- if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
- {
- int4 myPair;
- myPair.x = aabbs[i].m_minIndices[3];
- myPair.y = aabbs[j].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
-
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
-}
-
-
-
-
-__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
-{
- int i = get_global_id(0);
- int localId = get_local_id(0);
-
- __local int numActiveWgItems[1];
- __local int breakRequest[1];
-
- if (localId==0)
- {
- numActiveWgItems[0] = 0;
- breakRequest[0] = 0;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
- atomic_inc(numActiveWgItems);
- barrier(CLK_LOCAL_MEM_FENCE);
- int localBreak = 0;
-
- int j=i+1;
- do
- {
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j<numObjects)
- {
- if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis]))
- {
- if (!localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j>=numObjects && !localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (!localBreak)
- {
- if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))
- {
- int4 myPair;
- myPair.x = aabbs[i].m_minIndices[3];
- myPair.y = aabbs[j].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
-
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
- j++;
-
- } while (breakRequest[0]<numActiveWgItems[0]);
-}
-
-
-__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)
-{
- int i = get_global_id(0);
- int localId = get_local_id(0);
-
- __local int numActiveWgItems[1];
- __local int breakRequest[1];
- __local btAabbCL localAabbs[128];// = aabbs[i];
-
- btAabbCL myAabb;
-
- myAabb = (i<numObjects)? aabbs[i]:aabbs[0];
- float testValue = myAabb.m_maxElems[axis];
-
- if (localId==0)
- {
- numActiveWgItems[0] = 0;
- breakRequest[0] = 0;
- }
- int localCount=0;
- int block=0;
- localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];
- localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];
-
- barrier(CLK_LOCAL_MEM_FENCE);
- atomic_inc(numActiveWgItems);
- barrier(CLK_LOCAL_MEM_FENCE);
- int localBreak = 0;
-
- int j=i+1;
- do
- {
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j<numObjects)
- {
- if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis]))
- {
- if (!localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (j>=numObjects && !localBreak)
- {
- atomic_inc(breakRequest);
- localBreak = 1;
- }
- barrier(CLK_LOCAL_MEM_FENCE);
-
- if (!localBreak)
- {
- if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))
- {
- int4 myPair;
- myPair.x = myAabb.m_minIndices[3];
- myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];
- myPair.z = NEW_PAIR_MARKER;
- myPair.w = NEW_PAIR_MARKER;
-
- int curPair = atomic_inc (pairCount);
- if (curPair<maxPairs)
- {
- pairsOut[curPair] = myPair; //flush to main memory
- }
- }
- }
-
- barrier(CLK_LOCAL_MEM_FENCE);
-
- localCount++;
- if (localCount==64)
- {
- localCount = 0;
- block+=64;
- localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];
- localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];
- }
- j++;
-
- } while (breakRequest[0]<numActiveWgItems[0]);
-
-}
-
-
-
-
-//http://stereopsis.com/radix.html
-unsigned int FloatFlip(float fl);
-unsigned int FloatFlip(float fl)
-{
- unsigned int f = *(unsigned int*)&fl;
- unsigned int mask = -(int)(f >> 31) | 0x80000000;
- return f ^ mask;
-}
-float IFloatFlip(unsigned int f);
-float IFloatFlip(unsigned int f)
-{
- unsigned int mask = ((f >> 31) - 1) | 0x80000000;
- unsigned int fl = f ^ mask;
- return *(float*)&fl;
-}
-
-
-
-
-__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)
-{
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
- int src = destAabbs[i].m_maxIndices[3];
- destAabbs[i] = allAabbs[src];
- destAabbs[i].m_maxIndices[3] = src;
-}
-
-
-__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)
-{
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
-
-
- sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);
- sortData[i].y = i;
-
-}
-
-
-__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)
-{
- int i = get_global_id(0);
- if (i>=numObjects)
- return;
-
- sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];
-}
-
-
-
-__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)
-{
- int i = get_global_id(0);
- if (i>=numAabbs)
- return;
-
- btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];
-
- float4 s;
- s = (smallAabb.m_max+smallAabb.m_min)*0.5f;
- sum[i]=s;
- sum2[i]=s*s;
-}
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h
deleted file mode 100644
index d6999b94cb..0000000000
--- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h
+++ /dev/null
@@ -1,341 +0,0 @@
-//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
-static const char* sapCL =
- "/*\n"
- "Copyright (c) 2012 Advanced Micro Devices, Inc. \n"
- "This software is provided 'as-is', without any express or implied warranty.\n"
- "In no event will the authors be held liable for any damages arising from the use of this software.\n"
- "Permission is granted to anyone to use this software for any purpose, \n"
- "including commercial applications, and to alter it and redistribute it freely, \n"
- "subject to the following restrictions:\n"
- "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n"
- "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n"
- "3. This notice may not be removed or altered from any source distribution.\n"
- "*/\n"
- "//Originally written by Erwin Coumans\n"
- "#define NEW_PAIR_MARKER -1\n"
- "typedef struct \n"
- "{\n"
- " union\n"
- " {\n"
- " float4 m_min;\n"
- " float m_minElems[4];\n"
- " int m_minIndices[4];\n"
- " };\n"
- " union\n"
- " {\n"
- " float4 m_max;\n"
- " float m_maxElems[4];\n"
- " int m_maxIndices[4];\n"
- " };\n"
- "} btAabbCL;\n"
- "/// conservative test for overlap between two aabbs\n"
- "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n"
- "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n"
- "{\n"
- " bool overlap = true;\n"
- " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
- " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
- " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
- " return overlap;\n"
- "}\n"
- "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
- "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
- "{\n"
- " bool overlap = true;\n"
- " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
- " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
- " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
- " return overlap;\n"
- "}\n"
- "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n"
- "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n"
- "{\n"
- " bool overlap = true;\n"
- " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n"
- " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n"
- " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n"
- " return overlap;\n"
- "}\n"
- "__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numUnsortedAabbs)\n"
- " return;\n"
- " int j = get_global_id(1);\n"
- " if (j>=numUnSortedAabbs2)\n"
- " return;\n"
- " __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n"
- " __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n"
- " if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n"
- " {\n"
- " int4 myPair;\n"
- " \n"
- " int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n"
- " int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n"
- " if (xIndex>yIndex)\n"
- " {\n"
- " int tmp = xIndex;\n"
- " xIndex=yIndex;\n"
- " yIndex=tmp;\n"
- " }\n"
- " \n"
- " myPair.x = xIndex;\n"
- " myPair.y = yIndex;\n"
- " myPair.z = NEW_PAIR_MARKER;\n"
- " myPair.w = NEW_PAIR_MARKER;\n"
- " int curPair = atomic_inc (pairCount);\n"
- " if (curPair<maxPairs)\n"
- " {\n"
- " pairsOut[curPair] = myPair; //flush to main memory\n"
- " }\n"
- " }\n"
- "}\n"
- "__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numObjects)\n"
- " return;\n"
- " for (int j=i+1;j<numObjects;j++)\n"
- " {\n"
- " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
- " {\n"
- " int4 myPair;\n"
- " myPair.x = aabbs[i].m_minIndices[3];\n"
- " myPair.y = aabbs[j].m_minIndices[3];\n"
- " myPair.z = NEW_PAIR_MARKER;\n"
- " myPair.w = NEW_PAIR_MARKER;\n"
- " int curPair = atomic_inc (pairCount);\n"
- " if (curPair<maxPairs)\n"
- " {\n"
- " pairsOut[curPair] = myPair; //flush to main memory\n"
- " }\n"
- " }\n"
- " }\n"
- "}\n"
- "__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numObjects)\n"
- " return;\n"
- " for (int j=i+1;j<numObjects;j++)\n"
- " {\n"
- " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n"
- " {\n"
- " break;\n"
- " }\n"
- " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
- " {\n"
- " int4 myPair;\n"
- " myPair.x = aabbs[i].m_minIndices[3];\n"
- " myPair.y = aabbs[j].m_minIndices[3];\n"
- " myPair.z = NEW_PAIR_MARKER;\n"
- " myPair.w = NEW_PAIR_MARKER;\n"
- " int curPair = atomic_inc (pairCount);\n"
- " if (curPair<maxPairs)\n"
- " {\n"
- " pairsOut[curPair] = myPair; //flush to main memory\n"
- " }\n"
- " }\n"
- " }\n"
- "}\n"
- "__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " int localId = get_local_id(0);\n"
- " __local int numActiveWgItems[1];\n"
- " __local int breakRequest[1];\n"
- " if (localId==0)\n"
- " {\n"
- " numActiveWgItems[0] = 0;\n"
- " breakRequest[0] = 0;\n"
- " }\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " atomic_inc(numActiveWgItems);\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " int localBreak = 0;\n"
- " int j=i+1;\n"
- " do\n"
- " {\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if (j<numObjects)\n"
- " {\n"
- " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n"
- " {\n"
- " if (!localBreak)\n"
- " {\n"
- " atomic_inc(breakRequest);\n"
- " localBreak = 1;\n"
- " }\n"
- " }\n"
- " }\n"
- " \n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if (j>=numObjects && !localBreak)\n"
- " {\n"
- " atomic_inc(breakRequest);\n"
- " localBreak = 1;\n"
- " }\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if (!localBreak)\n"
- " {\n"
- " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n"
- " {\n"
- " int4 myPair;\n"
- " myPair.x = aabbs[i].m_minIndices[3];\n"
- " myPair.y = aabbs[j].m_minIndices[3];\n"
- " myPair.z = NEW_PAIR_MARKER;\n"
- " myPair.w = NEW_PAIR_MARKER;\n"
- " int curPair = atomic_inc (pairCount);\n"
- " if (curPair<maxPairs)\n"
- " {\n"
- " pairsOut[curPair] = myPair; //flush to main memory\n"
- " }\n"
- " }\n"
- " }\n"
- " j++;\n"
- " } while (breakRequest[0]<numActiveWgItems[0]);\n"
- "}\n"
- "__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " int localId = get_local_id(0);\n"
- " __local int numActiveWgItems[1];\n"
- " __local int breakRequest[1];\n"
- " __local btAabbCL localAabbs[128];// = aabbs[i];\n"
- " \n"
- " btAabbCL myAabb;\n"
- " \n"
- " myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n"
- " float testValue = myAabb.m_maxElems[axis];\n"
- " \n"
- " if (localId==0)\n"
- " {\n"
- " numActiveWgItems[0] = 0;\n"
- " breakRequest[0] = 0;\n"
- " }\n"
- " int localCount=0;\n"
- " int block=0;\n"
- " localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n"
- " localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n"
- " \n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " atomic_inc(numActiveWgItems);\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " int localBreak = 0;\n"
- " \n"
- " int j=i+1;\n"
- " do\n"
- " {\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if (j<numObjects)\n"
- " {\n"
- " if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n"
- " {\n"
- " if (!localBreak)\n"
- " {\n"
- " atomic_inc(breakRequest);\n"
- " localBreak = 1;\n"
- " }\n"
- " }\n"
- " }\n"
- " \n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if (j>=numObjects && !localBreak)\n"
- " {\n"
- " atomic_inc(breakRequest);\n"
- " localBreak = 1;\n"
- " }\n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " \n"
- " if (!localBreak)\n"
- " {\n"
- " if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n"
- " {\n"
- " int4 myPair;\n"
- " myPair.x = myAabb.m_minIndices[3];\n"
- " myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n"
- " myPair.z = NEW_PAIR_MARKER;\n"
- " myPair.w = NEW_PAIR_MARKER;\n"
- " int curPair = atomic_inc (pairCount);\n"
- " if (curPair<maxPairs)\n"
- " {\n"
- " pairsOut[curPair] = myPair; //flush to main memory\n"
- " }\n"
- " }\n"
- " }\n"
- " \n"
- " barrier(CLK_LOCAL_MEM_FENCE);\n"
- " localCount++;\n"
- " if (localCount==64)\n"
- " {\n"
- " localCount = 0;\n"
- " block+=64; \n"
- " localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n"
- " localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n"
- " }\n"
- " j++;\n"
- " \n"
- " } while (breakRequest[0]<numActiveWgItems[0]);\n"
- " \n"
- "}\n"
- "//http://stereopsis.com/radix.html\n"
- "unsigned int FloatFlip(float fl);\n"
- "unsigned int FloatFlip(float fl)\n"
- "{\n"
- " unsigned int f = *(unsigned int*)&fl;\n"
- " unsigned int mask = -(int)(f >> 31) | 0x80000000;\n"
- " return f ^ mask;\n"
- "}\n"
- "float IFloatFlip(unsigned int f);\n"
- "float IFloatFlip(unsigned int f)\n"
- "{\n"
- " unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n"
- " unsigned int fl = f ^ mask;\n"
- " return *(float*)&fl;\n"
- "}\n"
- "__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numObjects)\n"
- " return;\n"
- " int src = destAabbs[i].m_maxIndices[3];\n"
- " destAabbs[i] = allAabbs[src];\n"
- " destAabbs[i].m_maxIndices[3] = src;\n"
- "}\n"
- "__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numObjects)\n"
- " return;\n"
- " \n"
- " \n"
- " sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n"
- " sortData[i].y = i;\n"
- " \n"
- "}\n"
- "__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numObjects)\n"
- " return;\n"
- " \n"
- " sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n"
- "}\n"
- "__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n"
- "{\n"
- " int i = get_global_id(0);\n"
- " if (i>=numAabbs)\n"
- " return;\n"
- " \n"
- " btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n"
- " \n"
- " float4 s;\n"
- " s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n"
- " sum[i]=s;\n"
- " sum2[i]=s*s; \n"
- "}\n";