diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels')
6 files changed, 0 insertions, 2639 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl deleted file mode 100644 index ded4796d33..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphase.cl +++ /dev/null @@ -1,216 +0,0 @@ - - -int getPosHash(int4 gridPos, __global float4* pParams) -{ - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x &= gridDim.x - 1; - gridPos.y &= gridDim.y - 1; - gridPos.z &= gridDim.z - 1; - int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x; - return hash; -} - -int4 getGridPos(float4 worldPos, __global float4* pParams) -{ - int4 gridPos; - int4 gridDim = *((__global int4*)(pParams + 1)); - gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1); - gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1); - gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1); - return gridPos; -} - - -// calculate grid hash value for each body using its AABB -__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams ) -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - float4 bbMin = allpAABB[smallAabbMapping[index]*2]; - float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - pos.w = 0.f; - // get address in grid - int4 gridPos = getGridPos(pos, pParams); - int gridHash = getPosHash(gridPos, pParams); - // store grid hash and body index - int2 hashVal; - hashVal.x = gridHash; - hashVal.y = index; - pHash[index] = hashVal; -} - -__kernel void kClearCellStart( int numCells, - __global int* pCellStart ) -{ - int index = get_global_id(0); - if(index >= numCells) - { - return; - } - pCellStart[index] = -1; -} - -__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart ) -{ - __local int sharedHash[513]; - int index = get_global_id(0); - int2 sortedData; - - if(index < numObjects) - { - sortedData = pHash[index]; - // Load hash data into shared memory so that we can look - // at neighboring body's hash value without loading - // two hash values per thread - sharedHash[get_local_id(0) + 1] = sortedData.x; - if((index > 0) && (get_local_id(0) == 0)) - { - // first thread in block must load neighbor body hash - sharedHash[0] = pHash[index-1].x; - } - } - barrier(CLK_LOCAL_MEM_FENCE); - if(index < numObjects) - { - if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)])) - { - cellStart[sortedData.x] = index; - } - } -} - -int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1) -{ - return (min0.x <= max1.x)&& (min1.x <= max0.x) && - (min0.y <= max1.y)&& (min1.y <= max0.y) && - (min0.z <= max1.z)&& (min1.z <= max0.z); -} - - - - -//search for AABB 'index' against other AABBs' in this cell -void findPairsInCell( int numObjects, - int4 gridPos, - int index, - __global int2* pHash, - __global int* pCellStart, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global float4* pParams, - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) -{ - int4 pGridDim = *((__global int4*)(pParams + 1)); - int maxBodiesPerCell = pGridDim.w; - int gridHash = getPosHash(gridPos, pParams); - // get start of bucket for this cell - int bucketStart = pCellStart[gridHash]; - if (bucketStart == -1) - { - return; // cell empty - } - // iterate over bodies in this cell - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - int handleIndex = as_int(min0.w); - - int bucketEnd = bucketStart + maxBodiesPerCell; - bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd; - for(int index2 = bucketStart; index2 < bucketEnd; index2++) - { - int2 cellData = pHash[index2]; - if (cellData.x != gridHash) - { - break; // no longer in same bucket - } - int unsorted_indx2 = cellData.y; - //if (unsorted_indx2 < unsorted_indx) // check not colliding with self - if (unsorted_indx2 != unsorted_indx) // check not colliding with self - { - float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0]; - float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1]; - if(testAABBOverlap(min0, max0, min1, max1)) - { - if (pairCount) - { - int handleIndex2 = as_int(min1.w); - if (handleIndex<handleIndex2) - { - int curPair = atomic_add(pairCount,1); - if (curPair<maxPairs) - { - int4 newpair; - newpair.x = handleIndex; - newpair.y = handleIndex2; - newpair.z = -1; - newpair.w = -1; - pPairBuff2[curPair] = newpair; - } - } - - } - } - } - } -} - -__kernel void kFindOverlappingPairs( int numObjects, - __global float4* allpAABB, - __global const int* smallAabbMapping, - __global int2* pHash, - __global int* pCellStart, - __global float4* pParams , - volatile __global int* pairCount, - __global int4* pPairBuff2, - int maxPairs - ) - -{ - int index = get_global_id(0); - if(index >= numObjects) - { - return; - } - int2 sortedData = pHash[index]; - int unsorted_indx = sortedData.y; - float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; - float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1]; - float4 pos; - pos.x = (bbMin.x + bbMax.x) * 0.5f; - pos.y = (bbMin.y + bbMax.y) * 0.5f; - pos.z = (bbMin.z + bbMax.z) * 0.5f; - // get address in grid - int4 gridPosA = getGridPos(pos, pParams); - int4 gridPosB; - // examine only neighbouring cells - for(int z=-1; z<=1; z++) - { - gridPosB.z = gridPosA.z + z; - for(int y=-1; y<=1; y++) - { - gridPosB.y = gridPosA.y + y; - for(int x=-1; x<=1; x++) - { - gridPosB.x = gridPosA.x + x; - findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs); - } - } - } -} - - - - - diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h deleted file mode 100644 index 0185417786..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/gridBroadphaseKernels.h +++ /dev/null @@ -1,198 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* gridBroadphaseCL = - "int getPosHash(int4 gridPos, __global float4* pParams)\n" - "{\n" - " int4 gridDim = *((__global int4*)(pParams + 1));\n" - " gridPos.x &= gridDim.x - 1;\n" - " gridPos.y &= gridDim.y - 1;\n" - " gridPos.z &= gridDim.z - 1;\n" - " int hash = gridPos.z * gridDim.y * gridDim.x + gridPos.y * gridDim.x + gridPos.x;\n" - " return hash;\n" - "} \n" - "int4 getGridPos(float4 worldPos, __global float4* pParams)\n" - "{\n" - " int4 gridPos;\n" - " int4 gridDim = *((__global int4*)(pParams + 1));\n" - " gridPos.x = (int)floor(worldPos.x * pParams[0].x) & (gridDim.x - 1);\n" - " gridPos.y = (int)floor(worldPos.y * pParams[0].y) & (gridDim.y - 1);\n" - " gridPos.z = (int)floor(worldPos.z * pParams[0].z) & (gridDim.z - 1);\n" - " return gridPos;\n" - "}\n" - "// calculate grid hash value for each body using its AABB\n" - "__kernel void kCalcHashAABB(int numObjects, __global float4* allpAABB, __global const int* smallAabbMapping, __global int2* pHash, __global float4* pParams )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numObjects)\n" - " {\n" - " return;\n" - " }\n" - " float4 bbMin = allpAABB[smallAabbMapping[index]*2];\n" - " float4 bbMax = allpAABB[smallAabbMapping[index]*2 + 1];\n" - " float4 pos;\n" - " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" - " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" - " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" - " pos.w = 0.f;\n" - " // get address in grid\n" - " int4 gridPos = getGridPos(pos, pParams);\n" - " int gridHash = getPosHash(gridPos, pParams);\n" - " // store grid hash and body index\n" - " int2 hashVal;\n" - " hashVal.x = gridHash;\n" - " hashVal.y = index;\n" - " pHash[index] = hashVal;\n" - "}\n" - "__kernel void kClearCellStart( int numCells, \n" - " __global int* pCellStart )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numCells)\n" - " {\n" - " return;\n" - " }\n" - " pCellStart[index] = -1;\n" - "}\n" - "__kernel void kFindCellStart(int numObjects, __global int2* pHash, __global int* cellStart )\n" - "{\n" - " __local int sharedHash[513];\n" - " int index = get_global_id(0);\n" - " int2 sortedData;\n" - " if(index < numObjects)\n" - " {\n" - " sortedData = pHash[index];\n" - " // Load hash data into shared memory so that we can look \n" - " // at neighboring body's hash value without loading\n" - " // two hash values per thread\n" - " sharedHash[get_local_id(0) + 1] = sortedData.x;\n" - " if((index > 0) && (get_local_id(0) == 0))\n" - " {\n" - " // first thread in block must load neighbor body hash\n" - " sharedHash[0] = pHash[index-1].x;\n" - " }\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " if(index < numObjects)\n" - " {\n" - " if((index == 0) || (sortedData.x != sharedHash[get_local_id(0)]))\n" - " {\n" - " cellStart[sortedData.x] = index;\n" - " }\n" - " }\n" - "}\n" - "int testAABBOverlap(float4 min0, float4 max0, float4 min1, float4 max1)\n" - "{\n" - " return (min0.x <= max1.x)&& (min1.x <= max0.x) && \n" - " (min0.y <= max1.y)&& (min1.y <= max0.y) && \n" - " (min0.z <= max1.z)&& (min1.z <= max0.z); \n" - "}\n" - "//search for AABB 'index' against other AABBs' in this cell\n" - "void findPairsInCell( int numObjects,\n" - " int4 gridPos,\n" - " int index,\n" - " __global int2* pHash,\n" - " __global int* pCellStart,\n" - " __global float4* allpAABB, \n" - " __global const int* smallAabbMapping,\n" - " __global float4* pParams,\n" - " volatile __global int* pairCount,\n" - " __global int4* pPairBuff2,\n" - " int maxPairs\n" - " )\n" - "{\n" - " int4 pGridDim = *((__global int4*)(pParams + 1));\n" - " int maxBodiesPerCell = pGridDim.w;\n" - " int gridHash = getPosHash(gridPos, pParams);\n" - " // get start of bucket for this cell\n" - " int bucketStart = pCellStart[gridHash];\n" - " if (bucketStart == -1)\n" - " {\n" - " return; // cell empty\n" - " }\n" - " // iterate over bodies in this cell\n" - " int2 sortedData = pHash[index];\n" - " int unsorted_indx = sortedData.y;\n" - " float4 min0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0]; \n" - " float4 max0 = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" - " int handleIndex = as_int(min0.w);\n" - " \n" - " int bucketEnd = bucketStart + maxBodiesPerCell;\n" - " bucketEnd = (bucketEnd > numObjects) ? numObjects : bucketEnd;\n" - " for(int index2 = bucketStart; index2 < bucketEnd; index2++) \n" - " {\n" - " int2 cellData = pHash[index2];\n" - " if (cellData.x != gridHash)\n" - " {\n" - " break; // no longer in same bucket\n" - " }\n" - " int unsorted_indx2 = cellData.y;\n" - " //if (unsorted_indx2 < unsorted_indx) // check not colliding with self\n" - " if (unsorted_indx2 != unsorted_indx) // check not colliding with self\n" - " { \n" - " float4 min1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 0];\n" - " float4 max1 = allpAABB[smallAabbMapping[unsorted_indx2]*2 + 1];\n" - " if(testAABBOverlap(min0, max0, min1, max1))\n" - " {\n" - " if (pairCount)\n" - " {\n" - " int handleIndex2 = as_int(min1.w);\n" - " if (handleIndex<handleIndex2)\n" - " {\n" - " int curPair = atomic_add(pairCount,1);\n" - " if (curPair<maxPairs)\n" - " {\n" - " int4 newpair;\n" - " newpair.x = handleIndex;\n" - " newpair.y = handleIndex2;\n" - " newpair.z = -1;\n" - " newpair.w = -1;\n" - " pPairBuff2[curPair] = newpair;\n" - " }\n" - " }\n" - " \n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void kFindOverlappingPairs( int numObjects,\n" - " __global float4* allpAABB, \n" - " __global const int* smallAabbMapping,\n" - " __global int2* pHash, \n" - " __global int* pCellStart, \n" - " __global float4* pParams ,\n" - " volatile __global int* pairCount,\n" - " __global int4* pPairBuff2,\n" - " int maxPairs\n" - " )\n" - "{\n" - " int index = get_global_id(0);\n" - " if(index >= numObjects)\n" - " {\n" - " return;\n" - " }\n" - " int2 sortedData = pHash[index];\n" - " int unsorted_indx = sortedData.y;\n" - " float4 bbMin = allpAABB[smallAabbMapping[unsorted_indx]*2 + 0];\n" - " float4 bbMax = allpAABB[smallAabbMapping[unsorted_indx]*2 + 1];\n" - " float4 pos;\n" - " pos.x = (bbMin.x + bbMax.x) * 0.5f;\n" - " pos.y = (bbMin.y + bbMax.y) * 0.5f;\n" - " pos.z = (bbMin.z + bbMax.z) * 0.5f;\n" - " // get address in grid\n" - " int4 gridPosA = getGridPos(pos, pParams);\n" - " int4 gridPosB; \n" - " // examine only neighbouring cells\n" - " for(int z=-1; z<=1; z++) \n" - " {\n" - " gridPosB.z = gridPosA.z + z;\n" - " for(int y=-1; y<=1; y++) \n" - " {\n" - " gridPosB.y = gridPosA.y + y;\n" - " for(int x=-1; x<=1; x++) \n" - " {\n" - " gridPosB.x = gridPosA.x + x;\n" - " findPairsInCell(numObjects, gridPosB, index, pHash, pCellStart, allpAABB,smallAabbMapping, pParams, pairCount,pPairBuff2, maxPairs);\n" - " }\n" - " }\n" - " }\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl deleted file mode 100644 index c375b9bf37..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvh.cl +++ /dev/null @@ -1,767 +0,0 @@ -/* -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Initial Author Jackson Lee, 2014 - -typedef float b3Scalar; -typedef float4 b3Vector3; -#define b3Max max -#define b3Min min -#define b3Sqrt sqrt - -typedef struct -{ - unsigned int m_key; - unsigned int m_value; -} SortDataCL; - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} b3AabbCL; - - -unsigned int interleaveBits(unsigned int x) -{ - //........ ........ ......12 3456789A //x - //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits - - //......12 3456789A ......12 3456789A //x ^ (x << 16) - //11111111 ........ ........ 11111111 //0x FF 00 00 FF - //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF; - - //......12 ........ 3456789A 3456789A //x ^ (x << 8) - //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F - //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F; - - //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4) - //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3 - //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3; - - //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2) - //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49 - //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249; - - //........ ........ ......11 11111111 //0x000003FF - x &= 0x000003FF; //Clear all bits above bit 10 - - x = (x ^ (x << 16)) & 0xFF0000FF; - x = (x ^ (x << 8)) & 0x0300F00F; - x = (x ^ (x << 4)) & 0x030C30C3; - x = (x ^ (x << 2)) & 0x09249249; - - return x; -} -unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z) -{ - return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2; -} - -__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate) -{ - int separatedAabbIndex = get_global_id(0); - if(separatedAabbIndex >= numAabbsToSeparate) return; - - int unseparatedAabbIndex = aabbIndices[separatedAabbIndex]; - out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex]; -} - -//Should replace with an optimized parallel reduction -__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge) -{ - //Each time this kernel is added to the command queue, - //the number of AABBs needing to be merged is halved - // - //Example with 159 AABBs: - // numRemainingAabbs == 159 / 2 + 159 % 2 == 80 - // numMergedAabbs == 159 - 80 == 79 - //So, indices [0, 78] are merged with [0 + 80, 78 + 80] - - int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2; - int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs; - - int aabbIndex = get_global_id(0); - if(aabbIndex >= numMergedAabbs) return; - - int otherAabbIndex = aabbIndex + numRemainingAabbs; - - b3AabbCL aabb = out_mergedAabb[aabbIndex]; - b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min); - mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max); - out_mergedAabb[aabbIndex] = mergedAabb; -} - -__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, - __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs) -{ - int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index - if(leafNodeIndex >= numAabbs) return; - - b3AabbCL mergedAabb = mergedAabbOfAllNodes[0]; - b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f; - b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024; - - b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex]; - b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f; - b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter; - - //Quantize into integer coordinates - //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size - b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize; - - int4 discretePosition; - discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) ); - discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) ); - discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) ); - - //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023] - discretePosition = b3Max( -512, b3Min(discretePosition, 511) ); - discretePosition += 512; - - //Interleave bits(assign a morton code, also known as a z-curve) - unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z); - - // - SortDataCL mortonCodeIndexPair; - mortonCodeIndexPair.m_key = mortonCode; - mortonCodeIndexPair.m_value = leafNodeIndex; - - out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair; -} - -#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128 - -//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes. -//If it is set, then the index is for an internal node; otherwise, it is a leaf node. -//In both cases, the bit should be cleared to access the actual node index. -int isLeafNode(int index) { return (index >> 31 == 0); } -int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); } -int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); } - -//From sap.cl -#define NEW_PAIR_MARKER -1 - -bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -//From sap.cl - -__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - - __global SortDataCL* mortonCodesAndAabbIndices, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numQueryAabbs) -{ - //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since - //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent) - int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(queryBvhNodeIndex >= numQueryAabbs) return; - - int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value; - b3AabbCL queryAabb = rigidAabbs[queryRigidIndex]; - - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //Optimization - if the BVH is structured as a binary radix tree, then - //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]). - //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself. - { - int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y; - if(highestLeafIndex <= queryBvhNodeIndex) continue; - } - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) ) - { - if(isLeaf) - { - int4 pair; - pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3]; - pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - - } -} - - -//From rayCastKernels.cl -typedef struct -{ - float4 m_from; - float4 m_to; -} b3RayInfo; -//From rayCastKernels.cl - -b3Vector3 b3Vector3_normalize(b3Vector3 v) -{ - b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f}; - return normalize(normal); //OpenCL normalize == vector4 normalize -} -b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; } -b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; } - -int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb) -{ - //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ). - //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane. - // - //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane - //and min.x will be the far plane; otherwise, it is reversed. - // - //In order for there to be a collision, the t_min and t_max of each pair must overlap. - //This can be tested for by selecting the highest t_min and lowest t_max and comparing them. - - int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y) - - //When using vector types, the select() function checks the most signficant bit, - //but isless() sets the least significant bit. - isNegative <<= 31; - - //select(b, a, condition) == condition ? a : b - //When using select() with vector types, (condition[i]) is true if its most significant bit is 1 - b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection; - b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection; - - b3Scalar t_min_final = 0.0f; - b3Scalar t_max_final = rayLength; - - //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. - //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4]) - //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN. - t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) ); - t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) ); - - return (t_min_final <= t_max_final); -} - -__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs, - - __global int* rootNodeIndex, - __global int2* internalNodeChildIndices, - __global b3AabbCL* internalNodeAabbs, - __global int2* internalNodeLeafIndexRanges, - __global SortDataCL* mortonCodesAndAabbIndices, - - __global b3RayInfo* rays, - - __global int* out_numRayRigidPairs, - __global int2* out_rayRigidPairs, - int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - // - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - // - int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE]; - - int stackSize = 1; - stack[0] = *rootNodeIndex; - - while(stackSize) - { - int internalOrLeafNodeIndex = stack[ stackSize - 1 ]; - --stackSize; - - int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false - int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex); - - //bvhRigidIndex is not used if internal node - int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1; - - b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) ) - { - if(isLeaf) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - - if(!isLeaf) //Internal node - { - if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE) - { - //Error - } - else - { - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x; - stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y; - } - } - } - } -} - -__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, - __global int* out_numPairs, __global int4* out_overlappingPairs, - int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids) -{ - int smallAabbIndex = get_global_id(0); - if(smallAabbIndex >= numSmallAabbRigids) return; - - b3AabbCL smallAabb = smallAabbs[smallAabbIndex]; - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL largeAabb = largeAabbs[i]; - if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) ) - { - int4 pair; - pair.x = largeAabb.m_minIndices[3]; - pair.y = smallAabb.m_minIndices[3]; - pair.z = NEW_PAIR_MARKER; - pair.w = NEW_PAIR_MARKER; - - int pairIndex = atomic_inc(out_numPairs); - if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair; - } - } -} -__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays, - __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs, - int numLargeAabbRigids, int maxRayRigidPairs, int numRays) -{ - int rayIndex = get_global_id(0); - if(rayIndex >= numRays) return; - - b3Vector3 rayFrom = rays[rayIndex].m_from; - b3Vector3 rayTo = rays[rayIndex].m_to; - b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom); - b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) ); - - for(int i = 0; i < numLargeAabbRigids; ++i) - { - b3AabbCL rigidAabb = largeRigidAabbs[i]; - if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) ) - { - int2 rayRigidPair; - rayRigidPair.x = rayIndex; - rayRigidPair.y = rigidAabb.m_minIndices[3]; - - int pairIndex = atomic_inc(out_numRayRigidPairs); - if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair; - } - } -} - - -//Set so that it is always greater than the actual common prefixes, and never selected as a parent node. -//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve. -//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node. -//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve). -#define B3_PLBVH_INVALID_COMMON_PREFIX 128 - -#define B3_PLBVH_ROOT_NODE_MARKER -1 - -#define b3Int64 long - -int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); } -b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) -{ - //This function only needs to return (i & j) in order for the algorithm to work, - //but it may help with debugging to mask out the lower bits. - - b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j); - - b3Int64 sharedBits = i & j; - b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0 - - return sharedBits & bitmask; -} - -//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths -int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB) -{ - return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) ); -} - -__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices, - __global b3Int64* out_commonPrefixes, - __global int* out_commonPrefixLengths, - int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if (internalNodeIndex >= numInternalNodes) return; - - //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index, - //and the number of internal nodes is always numLeafNodes - 1 - int leftLeafIndex = internalNodeIndex; - int rightLeafIndex = internalNodeIndex + 1; - - int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key; - int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key; - - //Binary radix tree construction algorithm does not work if there are duplicate morton codes. - //Append the index of each leaf node to each morton code so that there are no duplicates. - //The algorithm also requires that the morton codes are sorted in ascending order; this requirement - //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true. - // - //upsample(a, b) == ( ((b3Int64)a) << 32) | b - b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex); - b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex); - - out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); - out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode); -} - - -__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes, - __global int2* out_childNodes, int numLeafNodes) -{ - int leafNodeIndex = get_global_id(0); - if (leafNodeIndex >= numLeafNodes) return; - - int numInternalNodes = numLeafNodes - 1; - - int leftSplitIndex = leafNodeIndex - 1; - int rightSplitIndex = leafNodeIndex; - - int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - //Parent node is the highest adjacent common prefix that is lower than the node's common prefix - //Leaf nodes are considered as having the highest common prefix - int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix); - - //Handle cases for the edge nodes; the first and last node - //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX - if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false; - if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true; - - int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex; - out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex; - - int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - int isLeaf = 1; - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex); -} - -__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths, - __global int2* out_childNodes, - __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex, - int numInternalNodes) -{ - int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - b3Int64 nodePrefix = commonPrefixes[internalNodeIndex]; - int nodePrefixLength = commonPrefixLengths[internalNodeIndex]; - -//#define USE_LINEAR_SEARCH -#ifdef USE_LINEAR_SEARCH - int leftIndex = -1; - int rightIndex = -1; - - //Find nearest element to left with a lower common prefix - for(int i = internalNodeIndex - 1; i >= 0; --i) - { - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - leftIndex = i; - break; - } - } - - //Find nearest element to right with a lower common prefix - for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i) - { - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - rightIndex = i; - break; - } - } - -#else //Use binary search - - //Find nearest element to left with a lower common prefix - int leftIndex = -1; - { - int lower = 0; - int upper = internalNodeIndex - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int right = mid + 1; - if(right < internalNodeIndex) - { - b3Int64 rightPrefix = commonPrefixes[right]; - int rightPrefixLength = commonPrefixLengths[right]; - - int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength); - if(nodeRightSharedPrefixLength < nodePrefixLength) - { - lower = right; - leftIndex = right; - } - else - { - leftIndex = mid; - break; - } - } - else - { - leftIndex = mid; - break; - } - } - else upper = mid - 1; - } - } - - //Find nearest element to right with a lower common prefix - int rightIndex = -1; - { - int lower = internalNodeIndex + 1; - int upper = numInternalNodes - 1; - - while(lower <= upper) - { - int mid = (lower + upper) / 2; - b3Int64 midPrefix = commonPrefixes[mid]; - int midPrefixLength = commonPrefixLengths[mid]; - - int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength); - if(nodeMidSharedPrefixLength < nodePrefixLength) - { - int left = mid - 1; - if(left > internalNodeIndex) - { - b3Int64 leftPrefix = commonPrefixes[left]; - int leftPrefixLength = commonPrefixLengths[left]; - - int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength); - if(nodeLeftSharedPrefixLength < nodePrefixLength) - { - upper = left; - rightIndex = left; - } - else - { - rightIndex = mid; - break; - } - } - else - { - rightIndex = mid; - break; - } - } - else lower = mid + 1; - } - } -#endif - - //Select parent - { - int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX; - - int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength); - - if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false; - else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true; - - int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex; - - int isRootNode = (leftIndex == -1 && rightIndex == -1); - out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER; - - int isLeaf = 0; - if(!isRootNode) - { - int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa - - //out_childNodesAsInt[0] == int2.x == left child - //out_childNodesAsInt[1] == int2.y == right child - __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]); - out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } - else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex); - } -} - -__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes, - __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes) -{ - if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0); - - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - // - int distanceFromRoot = 0; - { - int parentIndex = internalNodeParentNodes[internalNodeIndex]; - while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER) - { - parentIndex = internalNodeParentNodes[parentIndex]; - ++distanceFromRoot; - } - } - out_distanceFromRoot[internalNodeIndex] = distanceFromRoot; - - // - __local int localMaxDistanceFromRoot; - if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0; - barrier(CLK_LOCAL_MEM_FENCE); - - atomic_max(&localMaxDistanceFromRoot, distanceFromRoot); - barrier(CLK_LOCAL_MEM_FENCE); - - if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot); -} - -__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices, - __global int2* childNodes, - __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs, - int maxDistanceFromRoot, int processedDistance, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int distance = distanceFromRoot[internalNodeIndex]; - - if(distance == processedDistance) - { - int leftChildIndex = childNodes[internalNodeIndex].x; - int rightChildIndex = childNodes[internalNodeIndex].y; - - int isLeftChildLeaf = isLeafNode(leftChildIndex); - int isRightChildLeaf = isLeafNode(rightChildIndex); - - leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex); - rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex); - - //leftRigidIndex/rightRigidIndex is not used if internal node - int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1; - int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1; - - b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex]; - b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex]; - - b3AabbCL mergedAabb; - mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min); - mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max); - internalNodeAabbs[internalNodeIndex] = mergedAabb; - } -} - -__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes) -{ - int internalNodeIndex = get_global_id(0); - if(internalNodeIndex >= numInternalNodes) return; - - int numLeafNodes = numInternalNodes + 1; - - int2 childNodes = internalNodeChildNodes[internalNodeIndex]; - - int2 leafIndexRange; //x == min leaf index, y == max leaf index - - //Find lowest leaf index covered by this internal node - { - int lowestIndex = childNodes.x; //childNodes.x == Left child - while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x; - leafIndexRange.x = lowestIndex; - } - - //Find highest leaf index covered by this internal node - { - int highestIndex = childNodes.y; //childNodes.y == Right child - while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y; - leafIndexRange.y = highestIndex; - } - - // - out_leafIndexRanges[internalNodeIndex] = leafIndexRange; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h deleted file mode 100644 index c02877dde9..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/parallelLinearBvhKernels.h +++ /dev/null @@ -1,728 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* parallelLinearBvhCL = - "/*\n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose,\n" - "including commercial applications, and to alter it and redistribute it freely,\n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Initial Author Jackson Lee, 2014\n" - "typedef float b3Scalar;\n" - "typedef float4 b3Vector3;\n" - "#define b3Max max\n" - "#define b3Min min\n" - "#define b3Sqrt sqrt\n" - "typedef struct\n" - "{\n" - " unsigned int m_key;\n" - " unsigned int m_value;\n" - "} SortDataCL;\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} b3AabbCL;\n" - "unsigned int interleaveBits(unsigned int x)\n" - "{\n" - " //........ ........ ......12 3456789A //x\n" - " //....1..2 ..3..4.. 5..6..7. .8..9..A //x after interleaving bits\n" - " \n" - " //......12 3456789A ......12 3456789A //x ^ (x << 16)\n" - " //11111111 ........ ........ 11111111 //0x FF 00 00 FF\n" - " //......12 ........ ........ 3456789A //x = (x ^ (x << 16)) & 0xFF0000FF;\n" - " \n" - " //......12 ........ 3456789A 3456789A //x ^ (x << 8)\n" - " //......11 ........ 1111.... ....1111 //0x 03 00 F0 0F\n" - " //......12 ........ 3456.... ....789A //x = (x ^ (x << 8)) & 0x0300F00F;\n" - " \n" - " //..12..12 ....3456 3456.... 789A789A //x ^ (x << 4)\n" - " //......11 ....11.. ..11.... 11....11 //0x 03 0C 30 C3\n" - " //......12 ....34.. ..56.... 78....9A //x = (x ^ (x << 4)) & 0x030C30C3;\n" - " \n" - " //....1212 ..3434.. 5656..78 78..9A9A //x ^ (x << 2)\n" - " //....1..1 ..1..1.. 1..1..1. .1..1..1 //0x 09 24 92 49\n" - " //....1..2 ..3..4.. 5..6..7. .8..9..A //x = (x ^ (x << 2)) & 0x09249249;\n" - " \n" - " //........ ........ ......11 11111111 //0x000003FF\n" - " x &= 0x000003FF; //Clear all bits above bit 10\n" - " \n" - " x = (x ^ (x << 16)) & 0xFF0000FF;\n" - " x = (x ^ (x << 8)) & 0x0300F00F;\n" - " x = (x ^ (x << 4)) & 0x030C30C3;\n" - " x = (x ^ (x << 2)) & 0x09249249;\n" - " \n" - " return x;\n" - "}\n" - "unsigned int getMortonCode(unsigned int x, unsigned int y, unsigned int z)\n" - "{\n" - " return interleaveBits(x) << 0 | interleaveBits(y) << 1 | interleaveBits(z) << 2;\n" - "}\n" - "__kernel void separateAabbs(__global b3AabbCL* unseparatedAabbs, __global int* aabbIndices, __global b3AabbCL* out_aabbs, int numAabbsToSeparate)\n" - "{\n" - " int separatedAabbIndex = get_global_id(0);\n" - " if(separatedAabbIndex >= numAabbsToSeparate) return;\n" - " int unseparatedAabbIndex = aabbIndices[separatedAabbIndex];\n" - " out_aabbs[separatedAabbIndex] = unseparatedAabbs[unseparatedAabbIndex];\n" - "}\n" - "//Should replace with an optimized parallel reduction\n" - "__kernel void findAllNodesMergedAabb(__global b3AabbCL* out_mergedAabb, int numAabbsNeedingMerge)\n" - "{\n" - " //Each time this kernel is added to the command queue, \n" - " //the number of AABBs needing to be merged is halved\n" - " //\n" - " //Example with 159 AABBs:\n" - " // numRemainingAabbs == 159 / 2 + 159 % 2 == 80\n" - " // numMergedAabbs == 159 - 80 == 79\n" - " //So, indices [0, 78] are merged with [0 + 80, 78 + 80]\n" - " \n" - " int numRemainingAabbs = numAabbsNeedingMerge / 2 + numAabbsNeedingMerge % 2;\n" - " int numMergedAabbs = numAabbsNeedingMerge - numRemainingAabbs;\n" - " \n" - " int aabbIndex = get_global_id(0);\n" - " if(aabbIndex >= numMergedAabbs) return;\n" - " \n" - " int otherAabbIndex = aabbIndex + numRemainingAabbs;\n" - " \n" - " b3AabbCL aabb = out_mergedAabb[aabbIndex];\n" - " b3AabbCL otherAabb = out_mergedAabb[otherAabbIndex];\n" - " \n" - " b3AabbCL mergedAabb;\n" - " mergedAabb.m_min = b3Min(aabb.m_min, otherAabb.m_min);\n" - " mergedAabb.m_max = b3Max(aabb.m_max, otherAabb.m_max);\n" - " out_mergedAabb[aabbIndex] = mergedAabb;\n" - "}\n" - "__kernel void assignMortonCodesAndAabbIndicies(__global b3AabbCL* worldSpaceAabbs, __global b3AabbCL* mergedAabbOfAllNodes, \n" - " __global SortDataCL* out_mortonCodesAndAabbIndices, int numAabbs)\n" - "{\n" - " int leafNodeIndex = get_global_id(0); //Leaf node index == AABB index\n" - " if(leafNodeIndex >= numAabbs) return;\n" - " \n" - " b3AabbCL mergedAabb = mergedAabbOfAllNodes[0];\n" - " b3Vector3 gridCenter = (mergedAabb.m_min + mergedAabb.m_max) * 0.5f;\n" - " b3Vector3 gridCellSize = (mergedAabb.m_max - mergedAabb.m_min) / (float)1024;\n" - " \n" - " b3AabbCL aabb = worldSpaceAabbs[leafNodeIndex];\n" - " b3Vector3 aabbCenter = (aabb.m_min + aabb.m_max) * 0.5f;\n" - " b3Vector3 aabbCenterRelativeToGrid = aabbCenter - gridCenter;\n" - " \n" - " //Quantize into integer coordinates\n" - " //floor() is needed to prevent the center cell, at (0,0,0) from being twice the size\n" - " b3Vector3 gridPosition = aabbCenterRelativeToGrid / gridCellSize;\n" - " \n" - " int4 discretePosition;\n" - " discretePosition.x = (int)( (gridPosition.x >= 0.0f) ? gridPosition.x : floor(gridPosition.x) );\n" - " discretePosition.y = (int)( (gridPosition.y >= 0.0f) ? gridPosition.y : floor(gridPosition.y) );\n" - " discretePosition.z = (int)( (gridPosition.z >= 0.0f) ? gridPosition.z : floor(gridPosition.z) );\n" - " \n" - " //Clamp coordinates into [-512, 511], then convert range from [-512, 511] to [0, 1023]\n" - " discretePosition = b3Max( -512, b3Min(discretePosition, 511) );\n" - " discretePosition += 512;\n" - " \n" - " //Interleave bits(assign a morton code, also known as a z-curve)\n" - " unsigned int mortonCode = getMortonCode(discretePosition.x, discretePosition.y, discretePosition.z);\n" - " \n" - " //\n" - " SortDataCL mortonCodeIndexPair;\n" - " mortonCodeIndexPair.m_key = mortonCode;\n" - " mortonCodeIndexPair.m_value = leafNodeIndex;\n" - " \n" - " out_mortonCodesAndAabbIndices[leafNodeIndex] = mortonCodeIndexPair;\n" - "}\n" - "#define B3_PLVBH_TRAVERSE_MAX_STACK_SIZE 128\n" - "//The most significant bit(0x80000000) of a int32 is used to distinguish between leaf and internal nodes.\n" - "//If it is set, then the index is for an internal node; otherwise, it is a leaf node. \n" - "//In both cases, the bit should be cleared to access the actual node index.\n" - "int isLeafNode(int index) { return (index >> 31 == 0); }\n" - "int getIndexWithInternalNodeMarkerRemoved(int index) { return index & (~0x80000000); }\n" - "int getIndexWithInternalNodeMarkerSet(int isLeaf, int index) { return (isLeaf) ? index : (index | 0x80000000); }\n" - "//From sap.cl\n" - "#define NEW_PAIR_MARKER -1\n" - "bool TestAabbAgainstAabb2(const b3AabbCL* aabb1, const b3AabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "//From sap.cl\n" - "__kernel void plbvhCalculateOverlappingPairs(__global b3AabbCL* rigidAabbs, \n" - " __global int* rootNodeIndex, \n" - " __global int2* internalNodeChildIndices, \n" - " __global b3AabbCL* internalNodeAabbs,\n" - " __global int2* internalNodeLeafIndexRanges,\n" - " \n" - " __global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" - " int maxPairs, int numQueryAabbs)\n" - "{\n" - " //Using get_group_id()/get_local_id() is Faster than get_global_id(0) since\n" - " //mortonCodesAndAabbIndices[] contains rigid body indices sorted along the z-curve (more spatially coherent)\n" - " int queryBvhNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" - " if(queryBvhNodeIndex >= numQueryAabbs) return;\n" - " \n" - " int queryRigidIndex = mortonCodesAndAabbIndices[queryBvhNodeIndex].m_value;\n" - " b3AabbCL queryAabb = rigidAabbs[queryRigidIndex];\n" - " \n" - " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" - " \n" - " int stackSize = 1;\n" - " stack[0] = *rootNodeIndex;\n" - " \n" - " while(stackSize)\n" - " {\n" - " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" - " --stackSize;\n" - " \n" - " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" - " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" - " \n" - " //Optimization - if the BVH is structured as a binary radix tree, then\n" - " //each internal node corresponds to a contiguous range of leaf nodes(internalNodeLeafIndexRanges[]).\n" - " //This can be used to avoid testing each AABB-AABB pair twice, including preventing each node from colliding with itself.\n" - " {\n" - " int highestLeafIndex = (isLeaf) ? bvhNodeIndex : internalNodeLeafIndexRanges[bvhNodeIndex].y;\n" - " if(highestLeafIndex <= queryBvhNodeIndex) continue;\n" - " }\n" - " \n" - " //bvhRigidIndex is not used if internal node\n" - " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" - " \n" - " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" - " if( TestAabbAgainstAabb2(&queryAabb, &bvhNodeAabb) )\n" - " {\n" - " if(isLeaf)\n" - " {\n" - " int4 pair;\n" - " pair.x = rigidAabbs[queryRigidIndex].m_minIndices[3];\n" - " pair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" - " pair.z = NEW_PAIR_MARKER;\n" - " pair.w = NEW_PAIR_MARKER;\n" - " \n" - " int pairIndex = atomic_inc(out_numPairs);\n" - " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" - " }\n" - " \n" - " if(!isLeaf) //Internal node\n" - " {\n" - " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" - " {\n" - " //Error\n" - " }\n" - " else\n" - " {\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" - " }\n" - " }\n" - " }\n" - " \n" - " }\n" - "}\n" - "//From rayCastKernels.cl\n" - "typedef struct\n" - "{\n" - " float4 m_from;\n" - " float4 m_to;\n" - "} b3RayInfo;\n" - "//From rayCastKernels.cl\n" - "b3Vector3 b3Vector3_normalize(b3Vector3 v)\n" - "{\n" - " b3Vector3 normal = (b3Vector3){v.x, v.y, v.z, 0.f};\n" - " return normalize(normal); //OpenCL normalize == vector4 normalize\n" - "}\n" - "b3Scalar b3Vector3_length2(b3Vector3 v) { return v.x*v.x + v.y*v.y + v.z*v.z; }\n" - "b3Scalar b3Vector3_dot(b3Vector3 a, b3Vector3 b) { return a.x*b.x + a.y*b.y + a.z*b.z; }\n" - "int rayIntersectsAabb(b3Vector3 rayOrigin, b3Scalar rayLength, b3Vector3 rayNormalizedDirection, b3AabbCL aabb)\n" - "{\n" - " //AABB is considered as 3 pairs of 2 planes( {x_min, x_max}, {y_min, y_max}, {z_min, z_max} ).\n" - " //t_min is the point of intersection with the closer plane, t_max is the point of intersection with the farther plane.\n" - " //\n" - " //if (rayNormalizedDirection.x < 0.0f), then max.x will be the near plane \n" - " //and min.x will be the far plane; otherwise, it is reversed.\n" - " //\n" - " //In order for there to be a collision, the t_min and t_max of each pair must overlap.\n" - " //This can be tested for by selecting the highest t_min and lowest t_max and comparing them.\n" - " \n" - " int4 isNegative = isless( rayNormalizedDirection, ((b3Vector3){0.0f, 0.0f, 0.0f, 0.0f}) ); //isless(x,y) returns (x < y)\n" - " \n" - " //When using vector types, the select() function checks the most signficant bit, \n" - " //but isless() sets the least significant bit.\n" - " isNegative <<= 31;\n" - " //select(b, a, condition) == condition ? a : b\n" - " //When using select() with vector types, (condition[i]) is true if its most significant bit is 1\n" - " b3Vector3 t_min = ( select(aabb.m_min, aabb.m_max, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" - " b3Vector3 t_max = ( select(aabb.m_max, aabb.m_min, isNegative) - rayOrigin ) / rayNormalizedDirection;\n" - " \n" - " b3Scalar t_min_final = 0.0f;\n" - " b3Scalar t_max_final = rayLength;\n" - " \n" - " //Must use fmin()/fmax(); if one of the parameters is NaN, then the parameter that is not NaN is returned. \n" - " //Behavior of min()/max() with NaNs is undefined. (See OpenCL Specification 1.2 [6.12.2] and [6.12.4])\n" - " //Since the innermost fmin()/fmax() is always not NaN, this should never return NaN.\n" - " t_min_final = fmax( t_min.z, fmax(t_min.y, fmax(t_min.x, t_min_final)) );\n" - " t_max_final = fmin( t_max.z, fmin(t_max.y, fmin(t_max.x, t_max_final)) );\n" - " \n" - " return (t_min_final <= t_max_final);\n" - "}\n" - "__kernel void plbvhRayTraverse(__global b3AabbCL* rigidAabbs,\n" - " __global int* rootNodeIndex, \n" - " __global int2* internalNodeChildIndices, \n" - " __global b3AabbCL* internalNodeAabbs,\n" - " __global int2* internalNodeLeafIndexRanges,\n" - " __global SortDataCL* mortonCodesAndAabbIndices,\n" - " \n" - " __global b3RayInfo* rays,\n" - " \n" - " __global int* out_numRayRigidPairs, \n" - " __global int2* out_rayRigidPairs,\n" - " int maxRayRigidPairs, int numRays)\n" - "{\n" - " int rayIndex = get_global_id(0);\n" - " if(rayIndex >= numRays) return;\n" - " \n" - " //\n" - " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" - " b3Vector3 rayTo = rays[rayIndex].m_to;\n" - " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" - " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" - " \n" - " //\n" - " int stack[B3_PLVBH_TRAVERSE_MAX_STACK_SIZE];\n" - " \n" - " int stackSize = 1;\n" - " stack[0] = *rootNodeIndex;\n" - " \n" - " while(stackSize)\n" - " {\n" - " int internalOrLeafNodeIndex = stack[ stackSize - 1 ];\n" - " --stackSize;\n" - " \n" - " int isLeaf = isLeafNode(internalOrLeafNodeIndex); //Internal node if false\n" - " int bvhNodeIndex = getIndexWithInternalNodeMarkerRemoved(internalOrLeafNodeIndex);\n" - " \n" - " //bvhRigidIndex is not used if internal node\n" - " int bvhRigidIndex = (isLeaf) ? mortonCodesAndAabbIndices[bvhNodeIndex].m_value : -1;\n" - " \n" - " b3AabbCL bvhNodeAabb = (isLeaf) ? rigidAabbs[bvhRigidIndex] : internalNodeAabbs[bvhNodeIndex];\n" - " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, bvhNodeAabb) )\n" - " {\n" - " if(isLeaf)\n" - " {\n" - " int2 rayRigidPair;\n" - " rayRigidPair.x = rayIndex;\n" - " rayRigidPair.y = rigidAabbs[bvhRigidIndex].m_minIndices[3];\n" - " \n" - " int pairIndex = atomic_inc(out_numRayRigidPairs);\n" - " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" - " }\n" - " \n" - " if(!isLeaf) //Internal node\n" - " {\n" - " if(stackSize + 2 > B3_PLVBH_TRAVERSE_MAX_STACK_SIZE)\n" - " {\n" - " //Error\n" - " }\n" - " else\n" - " {\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].x;\n" - " stack[ stackSize++ ] = internalNodeChildIndices[bvhNodeIndex].y;\n" - " }\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void plbvhLargeAabbAabbTest(__global b3AabbCL* smallAabbs, __global b3AabbCL* largeAabbs, \n" - " __global int* out_numPairs, __global int4* out_overlappingPairs, \n" - " int maxPairs, int numLargeAabbRigids, int numSmallAabbRigids)\n" - "{\n" - " int smallAabbIndex = get_global_id(0);\n" - " if(smallAabbIndex >= numSmallAabbRigids) return;\n" - " \n" - " b3AabbCL smallAabb = smallAabbs[smallAabbIndex];\n" - " for(int i = 0; i < numLargeAabbRigids; ++i)\n" - " {\n" - " b3AabbCL largeAabb = largeAabbs[i];\n" - " if( TestAabbAgainstAabb2(&smallAabb, &largeAabb) )\n" - " {\n" - " int4 pair;\n" - " pair.x = largeAabb.m_minIndices[3];\n" - " pair.y = smallAabb.m_minIndices[3];\n" - " pair.z = NEW_PAIR_MARKER;\n" - " pair.w = NEW_PAIR_MARKER;\n" - " \n" - " int pairIndex = atomic_inc(out_numPairs);\n" - " if(pairIndex < maxPairs) out_overlappingPairs[pairIndex] = pair;\n" - " }\n" - " }\n" - "}\n" - "__kernel void plbvhLargeAabbRayTest(__global b3AabbCL* largeRigidAabbs, __global b3RayInfo* rays,\n" - " __global int* out_numRayRigidPairs, __global int2* out_rayRigidPairs,\n" - " int numLargeAabbRigids, int maxRayRigidPairs, int numRays)\n" - "{\n" - " int rayIndex = get_global_id(0);\n" - " if(rayIndex >= numRays) return;\n" - " \n" - " b3Vector3 rayFrom = rays[rayIndex].m_from;\n" - " b3Vector3 rayTo = rays[rayIndex].m_to;\n" - " b3Vector3 rayNormalizedDirection = b3Vector3_normalize(rayTo - rayFrom);\n" - " b3Scalar rayLength = b3Sqrt( b3Vector3_length2(rayTo - rayFrom) );\n" - " \n" - " for(int i = 0; i < numLargeAabbRigids; ++i)\n" - " {\n" - " b3AabbCL rigidAabb = largeRigidAabbs[i];\n" - " if( rayIntersectsAabb(rayFrom, rayLength, rayNormalizedDirection, rigidAabb) )\n" - " {\n" - " int2 rayRigidPair;\n" - " rayRigidPair.x = rayIndex;\n" - " rayRigidPair.y = rigidAabb.m_minIndices[3];\n" - " \n" - " int pairIndex = atomic_inc(out_numRayRigidPairs);\n" - " if(pairIndex < maxRayRigidPairs) out_rayRigidPairs[pairIndex] = rayRigidPair;\n" - " }\n" - " }\n" - "}\n" - "//Set so that it is always greater than the actual common prefixes, and never selected as a parent node.\n" - "//If there are no duplicates, then the highest common prefix is 32 or 64, depending on the number of bits used for the z-curve.\n" - "//Duplicate common prefixes increase the highest common prefix at most by the number of bits used to index the leaf node.\n" - "//Since 32 bit ints are used to index leaf nodes, the max prefix is 64(32 + 32 bit z-curve) or 96(32 + 64 bit z-curve).\n" - "#define B3_PLBVH_INVALID_COMMON_PREFIX 128\n" - "#define B3_PLBVH_ROOT_NODE_MARKER -1\n" - "#define b3Int64 long\n" - "int computeCommonPrefixLength(b3Int64 i, b3Int64 j) { return (int)clz(i ^ j); }\n" - "b3Int64 computeCommonPrefix(b3Int64 i, b3Int64 j) \n" - "{\n" - " //This function only needs to return (i & j) in order for the algorithm to work,\n" - " //but it may help with debugging to mask out the lower bits.\n" - " b3Int64 commonPrefixLength = (b3Int64)computeCommonPrefixLength(i, j);\n" - " b3Int64 sharedBits = i & j;\n" - " b3Int64 bitmask = ((b3Int64)(~0)) << (64 - commonPrefixLength); //Set all bits after the common prefix to 0\n" - " \n" - " return sharedBits & bitmask;\n" - "}\n" - "//Same as computeCommonPrefixLength(), but allows for prefixes with different lengths\n" - "int getSharedPrefixLength(b3Int64 prefixA, int prefixLengthA, b3Int64 prefixB, int prefixLengthB)\n" - "{\n" - " return b3Min( computeCommonPrefixLength(prefixA, prefixB), b3Min(prefixLengthA, prefixLengthB) );\n" - "}\n" - "__kernel void computeAdjacentPairCommonPrefix(__global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global b3Int64* out_commonPrefixes,\n" - " __global int* out_commonPrefixLengths,\n" - " int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if (internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " //Here, (internalNodeIndex + 1) is never out of bounds since it is a leaf node index,\n" - " //and the number of internal nodes is always numLeafNodes - 1\n" - " int leftLeafIndex = internalNodeIndex;\n" - " int rightLeafIndex = internalNodeIndex + 1;\n" - " \n" - " int leftLeafMortonCode = mortonCodesAndAabbIndices[leftLeafIndex].m_key;\n" - " int rightLeafMortonCode = mortonCodesAndAabbIndices[rightLeafIndex].m_key;\n" - " \n" - " //Binary radix tree construction algorithm does not work if there are duplicate morton codes.\n" - " //Append the index of each leaf node to each morton code so that there are no duplicates.\n" - " //The algorithm also requires that the morton codes are sorted in ascending order; this requirement\n" - " //is also satisfied with this method, as (leftLeafIndex < rightLeafIndex) is always true.\n" - " //\n" - " //upsample(a, b) == ( ((b3Int64)a) << 32) | b\n" - " b3Int64 nonduplicateLeftMortonCode = upsample(leftLeafMortonCode, leftLeafIndex);\n" - " b3Int64 nonduplicateRightMortonCode = upsample(rightLeafMortonCode, rightLeafIndex);\n" - " \n" - " out_commonPrefixes[internalNodeIndex] = computeCommonPrefix(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" - " out_commonPrefixLengths[internalNodeIndex] = computeCommonPrefixLength(nonduplicateLeftMortonCode, nonduplicateRightMortonCode);\n" - "}\n" - "__kernel void buildBinaryRadixTreeLeafNodes(__global int* commonPrefixLengths, __global int* out_leafNodeParentNodes,\n" - " __global int2* out_childNodes, int numLeafNodes)\n" - "{\n" - " int leafNodeIndex = get_global_id(0);\n" - " if (leafNodeIndex >= numLeafNodes) return;\n" - " \n" - " int numInternalNodes = numLeafNodes - 1;\n" - " \n" - " int leftSplitIndex = leafNodeIndex - 1;\n" - " int rightSplitIndex = leafNodeIndex;\n" - " \n" - " int leftCommonPrefix = (leftSplitIndex >= 0) ? commonPrefixLengths[leftSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " int rightCommonPrefix = (rightSplitIndex < numInternalNodes) ? commonPrefixLengths[rightSplitIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " \n" - " //Parent node is the highest adjacent common prefix that is lower than the node's common prefix\n" - " //Leaf nodes are considered as having the highest common prefix\n" - " int isLeftHigherCommonPrefix = (leftCommonPrefix > rightCommonPrefix);\n" - " \n" - " //Handle cases for the edge nodes; the first and last node\n" - " //For leaf nodes, leftCommonPrefix and rightCommonPrefix should never both be B3_PLBVH_INVALID_COMMON_PREFIX\n" - " if(leftCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = false;\n" - " if(rightCommonPrefix == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherCommonPrefix = true;\n" - " \n" - " int parentNodeIndex = (isLeftHigherCommonPrefix) ? leftSplitIndex : rightSplitIndex;\n" - " out_leafNodeParentNodes[leafNodeIndex] = parentNodeIndex;\n" - " \n" - " int isRightChild = (isLeftHigherCommonPrefix); //If the left node is the parent, then this node is its right child and vice versa\n" - " \n" - " //out_childNodesAsInt[0] == int2.x == left child\n" - " //out_childNodesAsInt[1] == int2.y == right child\n" - " int isLeaf = 1;\n" - " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" - " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, leafNodeIndex);\n" - "}\n" - "__kernel void buildBinaryRadixTreeInternalNodes(__global b3Int64* commonPrefixes, __global int* commonPrefixLengths,\n" - " __global int2* out_childNodes,\n" - " __global int* out_internalNodeParentNodes, __global int* out_rootNodeIndex,\n" - " int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_group_id(0) * get_local_size(0) + get_local_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " b3Int64 nodePrefix = commonPrefixes[internalNodeIndex];\n" - " int nodePrefixLength = commonPrefixLengths[internalNodeIndex];\n" - " \n" - "//#define USE_LINEAR_SEARCH\n" - "#ifdef USE_LINEAR_SEARCH\n" - " int leftIndex = -1;\n" - " int rightIndex = -1;\n" - " \n" - " //Find nearest element to left with a lower common prefix\n" - " for(int i = internalNodeIndex - 1; i >= 0; --i)\n" - " {\n" - " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" - " if(nodeLeftSharedPrefixLength < nodePrefixLength)\n" - " {\n" - " leftIndex = i;\n" - " break;\n" - " }\n" - " }\n" - " \n" - " //Find nearest element to right with a lower common prefix\n" - " for(int i = internalNodeIndex + 1; i < numInternalNodes; ++i)\n" - " {\n" - " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, commonPrefixes[i], commonPrefixLengths[i]);\n" - " if(nodeRightSharedPrefixLength < nodePrefixLength)\n" - " {\n" - " rightIndex = i;\n" - " break;\n" - " }\n" - " }\n" - " \n" - "#else //Use binary search\n" - " //Find nearest element to left with a lower common prefix\n" - " int leftIndex = -1;\n" - " {\n" - " int lower = 0;\n" - " int upper = internalNodeIndex - 1;\n" - " \n" - " while(lower <= upper)\n" - " {\n" - " int mid = (lower + upper) / 2;\n" - " b3Int64 midPrefix = commonPrefixes[mid];\n" - " int midPrefixLength = commonPrefixLengths[mid];\n" - " \n" - " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" - " if(nodeMidSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " int right = mid + 1;\n" - " if(right < internalNodeIndex)\n" - " {\n" - " b3Int64 rightPrefix = commonPrefixes[right];\n" - " int rightPrefixLength = commonPrefixLengths[right];\n" - " \n" - " int nodeRightSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, rightPrefix, rightPrefixLength);\n" - " if(nodeRightSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " lower = right;\n" - " leftIndex = right;\n" - " }\n" - " else \n" - " {\n" - " leftIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else \n" - " {\n" - " leftIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else upper = mid - 1;\n" - " }\n" - " }\n" - " \n" - " //Find nearest element to right with a lower common prefix\n" - " int rightIndex = -1;\n" - " {\n" - " int lower = internalNodeIndex + 1;\n" - " int upper = numInternalNodes - 1;\n" - " \n" - " while(lower <= upper)\n" - " {\n" - " int mid = (lower + upper) / 2;\n" - " b3Int64 midPrefix = commonPrefixes[mid];\n" - " int midPrefixLength = commonPrefixLengths[mid];\n" - " \n" - " int nodeMidSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, midPrefix, midPrefixLength);\n" - " if(nodeMidSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " int left = mid - 1;\n" - " if(left > internalNodeIndex)\n" - " {\n" - " b3Int64 leftPrefix = commonPrefixes[left];\n" - " int leftPrefixLength = commonPrefixLengths[left];\n" - " \n" - " int nodeLeftSharedPrefixLength = getSharedPrefixLength(nodePrefix, nodePrefixLength, leftPrefix, leftPrefixLength);\n" - " if(nodeLeftSharedPrefixLength < nodePrefixLength) \n" - " {\n" - " upper = left;\n" - " rightIndex = left;\n" - " }\n" - " else \n" - " {\n" - " rightIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else \n" - " {\n" - " rightIndex = mid;\n" - " break;\n" - " }\n" - " }\n" - " else lower = mid + 1;\n" - " }\n" - " }\n" - "#endif\n" - " \n" - " //Select parent\n" - " {\n" - " int leftPrefixLength = (leftIndex != -1) ? commonPrefixLengths[leftIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " int rightPrefixLength = (rightIndex != -1) ? commonPrefixLengths[rightIndex] : B3_PLBVH_INVALID_COMMON_PREFIX;\n" - " \n" - " int isLeftHigherPrefixLength = (leftPrefixLength > rightPrefixLength);\n" - " \n" - " if(leftPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = false;\n" - " else if(rightPrefixLength == B3_PLBVH_INVALID_COMMON_PREFIX) isLeftHigherPrefixLength = true;\n" - " \n" - " int parentNodeIndex = (isLeftHigherPrefixLength) ? leftIndex : rightIndex;\n" - " \n" - " int isRootNode = (leftIndex == -1 && rightIndex == -1);\n" - " out_internalNodeParentNodes[internalNodeIndex] = (!isRootNode) ? parentNodeIndex : B3_PLBVH_ROOT_NODE_MARKER;\n" - " \n" - " int isLeaf = 0;\n" - " if(!isRootNode)\n" - " {\n" - " int isRightChild = (isLeftHigherPrefixLength); //If the left node is the parent, then this node is its right child and vice versa\n" - " \n" - " //out_childNodesAsInt[0] == int2.x == left child\n" - " //out_childNodesAsInt[1] == int2.y == right child\n" - " __global int* out_childNodesAsInt = (__global int*)(&out_childNodes[parentNodeIndex]);\n" - " out_childNodesAsInt[isRightChild] = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" - " }\n" - " else *out_rootNodeIndex = getIndexWithInternalNodeMarkerSet(isLeaf, internalNodeIndex);\n" - " }\n" - "}\n" - "__kernel void findDistanceFromRoot(__global int* rootNodeIndex, __global int* internalNodeParentNodes,\n" - " __global int* out_maxDistanceFromRoot, __global int* out_distanceFromRoot, int numInternalNodes)\n" - "{\n" - " if( get_global_id(0) == 0 ) atomic_xchg(out_maxDistanceFromRoot, 0);\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " //\n" - " int distanceFromRoot = 0;\n" - " {\n" - " int parentIndex = internalNodeParentNodes[internalNodeIndex];\n" - " while(parentIndex != B3_PLBVH_ROOT_NODE_MARKER)\n" - " {\n" - " parentIndex = internalNodeParentNodes[parentIndex];\n" - " ++distanceFromRoot;\n" - " }\n" - " }\n" - " out_distanceFromRoot[internalNodeIndex] = distanceFromRoot;\n" - " \n" - " //\n" - " __local int localMaxDistanceFromRoot;\n" - " if( get_local_id(0) == 0 ) localMaxDistanceFromRoot = 0;\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " atomic_max(&localMaxDistanceFromRoot, distanceFromRoot);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if( get_local_id(0) == 0 ) atomic_max(out_maxDistanceFromRoot, localMaxDistanceFromRoot);\n" - "}\n" - "__kernel void buildBinaryRadixTreeAabbsRecursive(__global int* distanceFromRoot, __global SortDataCL* mortonCodesAndAabbIndices,\n" - " __global int2* childNodes,\n" - " __global b3AabbCL* leafNodeAabbs, __global b3AabbCL* internalNodeAabbs,\n" - " int maxDistanceFromRoot, int processedDistance, int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " int distance = distanceFromRoot[internalNodeIndex];\n" - " \n" - " if(distance == processedDistance)\n" - " {\n" - " int leftChildIndex = childNodes[internalNodeIndex].x;\n" - " int rightChildIndex = childNodes[internalNodeIndex].y;\n" - " \n" - " int isLeftChildLeaf = isLeafNode(leftChildIndex);\n" - " int isRightChildLeaf = isLeafNode(rightChildIndex);\n" - " \n" - " leftChildIndex = getIndexWithInternalNodeMarkerRemoved(leftChildIndex);\n" - " rightChildIndex = getIndexWithInternalNodeMarkerRemoved(rightChildIndex);\n" - " \n" - " //leftRigidIndex/rightRigidIndex is not used if internal node\n" - " int leftRigidIndex = (isLeftChildLeaf) ? mortonCodesAndAabbIndices[leftChildIndex].m_value : -1;\n" - " int rightRigidIndex = (isRightChildLeaf) ? mortonCodesAndAabbIndices[rightChildIndex].m_value : -1;\n" - " \n" - " b3AabbCL leftChildAabb = (isLeftChildLeaf) ? leafNodeAabbs[leftRigidIndex] : internalNodeAabbs[leftChildIndex];\n" - " b3AabbCL rightChildAabb = (isRightChildLeaf) ? leafNodeAabbs[rightRigidIndex] : internalNodeAabbs[rightChildIndex];\n" - " \n" - " b3AabbCL mergedAabb;\n" - " mergedAabb.m_min = b3Min(leftChildAabb.m_min, rightChildAabb.m_min);\n" - " mergedAabb.m_max = b3Max(leftChildAabb.m_max, rightChildAabb.m_max);\n" - " internalNodeAabbs[internalNodeIndex] = mergedAabb;\n" - " }\n" - "}\n" - "__kernel void findLeafIndexRanges(__global int2* internalNodeChildNodes, __global int2* out_leafIndexRanges, int numInternalNodes)\n" - "{\n" - " int internalNodeIndex = get_global_id(0);\n" - " if(internalNodeIndex >= numInternalNodes) return;\n" - " \n" - " int numLeafNodes = numInternalNodes + 1;\n" - " \n" - " int2 childNodes = internalNodeChildNodes[internalNodeIndex];\n" - " \n" - " int2 leafIndexRange; //x == min leaf index, y == max leaf index\n" - " \n" - " //Find lowest leaf index covered by this internal node\n" - " {\n" - " int lowestIndex = childNodes.x; //childNodes.x == Left child\n" - " while( !isLeafNode(lowestIndex) ) lowestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(lowestIndex) ].x;\n" - " leafIndexRange.x = lowestIndex;\n" - " }\n" - " \n" - " //Find highest leaf index covered by this internal node\n" - " {\n" - " int highestIndex = childNodes.y; //childNodes.y == Right child\n" - " while( !isLeafNode(highestIndex) ) highestIndex = internalNodeChildNodes[ getIndexWithInternalNodeMarkerRemoved(highestIndex) ].y;\n" - " leafIndexRange.y = highestIndex;\n" - " }\n" - " \n" - " //\n" - " out_leafIndexRanges[internalNodeIndex] = leafIndexRange;\n" - "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl deleted file mode 100644 index 93f77a6433..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sap.cl +++ /dev/null @@ -1,389 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Erwin Coumans - -#define NEW_PAIR_MARKER -1 - -typedef struct -{ - union - { - float4 m_min; - float m_minElems[4]; - int m_minIndices[4]; - }; - union - { - float4 m_max; - float m_maxElems[4]; - int m_maxIndices[4]; - }; -} btAabbCL; - - -/// conservative test for overlap between two aabbs -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2); -bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2); -bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2) -{ - bool overlap = true; - overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap; - overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap; - overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap; - return overlap; -} - - -__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numUnsortedAabbs) - return; - - int j = get_global_id(1); - if (j>=numUnSortedAabbs2) - return; - - - __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]]; - __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]]; - - if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2)) - { - int4 myPair; - - int xIndex = unsortedAabbPtr[0].m_minIndices[3]; - int yIndex = unsortedAabbPtr2[0].m_minIndices[3]; - if (xIndex>yIndex) - { - int tmp = xIndex; - xIndex=yIndex; - yIndex=tmp; - } - - myPair.x = xIndex; - myPair.y = yIndex; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } -} - - - -__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - for (int j=i+1;j<numObjects;j++) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } -} - -__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - for (int j=i+1;j<numObjects;j++) - { - if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) - { - break; - } - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } -} - - - - -__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - int localId = get_local_id(0); - - __local int numActiveWgItems[1]; - __local int breakRequest[1]; - - if (localId==0) - { - numActiveWgItems[0] = 0; - breakRequest[0] = 0; - } - barrier(CLK_LOCAL_MEM_FENCE); - atomic_inc(numActiveWgItems); - barrier(CLK_LOCAL_MEM_FENCE); - int localBreak = 0; - - int j=i+1; - do - { - barrier(CLK_LOCAL_MEM_FENCE); - - if (j<numObjects) - { - if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) - { - if (!localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (j>=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j])) - { - int4 myPair; - myPair.x = aabbs[i].m_minIndices[3]; - myPair.y = aabbs[j].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } - j++; - - } while (breakRequest[0]<numActiveWgItems[0]); -} - - -__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs) -{ - int i = get_global_id(0); - int localId = get_local_id(0); - - __local int numActiveWgItems[1]; - __local int breakRequest[1]; - __local btAabbCL localAabbs[128];// = aabbs[i]; - - btAabbCL myAabb; - - myAabb = (i<numObjects)? aabbs[i]:aabbs[0]; - float testValue = myAabb.m_maxElems[axis]; - - if (localId==0) - { - numActiveWgItems[0] = 0; - breakRequest[0] = 0; - } - int localCount=0; - int block=0; - localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0]; - localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0]; - - barrier(CLK_LOCAL_MEM_FENCE); - atomic_inc(numActiveWgItems); - barrier(CLK_LOCAL_MEM_FENCE); - int localBreak = 0; - - int j=i+1; - do - { - barrier(CLK_LOCAL_MEM_FENCE); - - if (j<numObjects) - { - if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) - { - if (!localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - if (j>=numObjects && !localBreak) - { - atomic_inc(breakRequest); - localBreak = 1; - } - barrier(CLK_LOCAL_MEM_FENCE); - - if (!localBreak) - { - if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1])) - { - int4 myPair; - myPair.x = myAabb.m_minIndices[3]; - myPair.y = localAabbs[localCount+localId+1].m_minIndices[3]; - myPair.z = NEW_PAIR_MARKER; - myPair.w = NEW_PAIR_MARKER; - - int curPair = atomic_inc (pairCount); - if (curPair<maxPairs) - { - pairsOut[curPair] = myPair; //flush to main memory - } - } - } - - barrier(CLK_LOCAL_MEM_FENCE); - - localCount++; - if (localCount==64) - { - localCount = 0; - block+=64; - localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0]; - localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0]; - } - j++; - - } while (breakRequest[0]<numActiveWgItems[0]); - -} - - - - -//http://stereopsis.com/radix.html -unsigned int FloatFlip(float fl); -unsigned int FloatFlip(float fl) -{ - unsigned int f = *(unsigned int*)&fl; - unsigned int mask = -(int)(f >> 31) | 0x80000000; - return f ^ mask; -} -float IFloatFlip(unsigned int f); -float IFloatFlip(unsigned int f) -{ - unsigned int mask = ((f >> 31) - 1) | 0x80000000; - unsigned int fl = f ^ mask; - return *(float*)&fl; -} - - - - -__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - int src = destAabbs[i].m_maxIndices[3]; - destAabbs[i] = allAabbs[src]; - destAabbs[i].m_maxIndices[3] = src; -} - - -__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - - sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]); - sortData[i].y = i; - -} - - -__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects) -{ - int i = get_global_id(0); - if (i>=numObjects) - return; - - sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]]; -} - - - -__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs) -{ - int i = get_global_id(0); - if (i>=numAabbs) - return; - - btAabbCL smallAabb = allAabbs[smallAabbMapping[i]]; - - float4 s; - s = (smallAabb.m_max+smallAabb.m_min)*0.5f; - sum[i]=s; - sum2[i]=s*s; -} diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h deleted file mode 100644 index d6999b94cb..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ /dev/null @@ -1,341 +0,0 @@ -//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* sapCL = - "/*\n" - "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" - "This software is provided 'as-is', without any express or implied warranty.\n" - "In no event will the authors be held liable for any damages arising from the use of this software.\n" - "Permission is granted to anyone to use this software for any purpose, \n" - "including commercial applications, and to alter it and redistribute it freely, \n" - "subject to the following restrictions:\n" - "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" - "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" - "3. This notice may not be removed or altered from any source distribution.\n" - "*/\n" - "//Originally written by Erwin Coumans\n" - "#define NEW_PAIR_MARKER -1\n" - "typedef struct \n" - "{\n" - " union\n" - " {\n" - " float4 m_min;\n" - " float m_minElems[4];\n" - " int m_minIndices[4];\n" - " };\n" - " union\n" - " {\n" - " float4 m_max;\n" - " float m_maxElems[4];\n" - " int m_maxIndices[4];\n" - " };\n" - "} btAabbCL;\n" - "/// conservative test for overlap between two aabbs\n" - "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" - "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" - "{\n" - " bool overlap = true;\n" - " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" - " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" - " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" - " return overlap;\n" - "}\n" - "__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numUnsortedAabbs)\n" - " return;\n" - " int j = get_global_id(1);\n" - " if (j>=numUnSortedAabbs2)\n" - " return;\n" - " __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" - " __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" - " if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" - " {\n" - " int4 myPair;\n" - " \n" - " int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" - " int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" - " if (xIndex>yIndex)\n" - " {\n" - " int tmp = xIndex;\n" - " xIndex=yIndex;\n" - " yIndex=tmp;\n" - " }\n" - " \n" - " myPair.x = xIndex;\n" - " myPair.y = yIndex;\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - "}\n" - "__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " for (int j=i+1;j<numObjects;j++)\n" - " {\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " for (int j=i+1;j<numObjects;j++)\n" - " {\n" - " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" - " {\n" - " break;\n" - " }\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - "}\n" - "__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int localId = get_local_id(0);\n" - " __local int numActiveWgItems[1];\n" - " __local int breakRequest[1];\n" - " if (localId==0)\n" - " {\n" - " numActiveWgItems[0] = 0;\n" - " breakRequest[0] = 0;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " atomic_inc(numActiveWgItems);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " int localBreak = 0;\n" - " int j=i+1;\n" - " do\n" - " {\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j<numObjects)\n" - " {\n" - " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" - " {\n" - " if (!localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " }\n" - " }\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j>=numObjects && !localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (!localBreak)\n" - " {\n" - " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = aabbs[i].m_minIndices[3];\n" - " myPair.y = aabbs[j].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - " j++;\n" - " } while (breakRequest[0]<numActiveWgItems[0]);\n" - "}\n" - "__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" - "{\n" - " int i = get_global_id(0);\n" - " int localId = get_local_id(0);\n" - " __local int numActiveWgItems[1];\n" - " __local int breakRequest[1];\n" - " __local btAabbCL localAabbs[128];// = aabbs[i];\n" - " \n" - " btAabbCL myAabb;\n" - " \n" - " myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n" - " float testValue = myAabb.m_maxElems[axis];\n" - " \n" - " if (localId==0)\n" - " {\n" - " numActiveWgItems[0] = 0;\n" - " breakRequest[0] = 0;\n" - " }\n" - " int localCount=0;\n" - " int block=0;\n" - " localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n" - " localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " atomic_inc(numActiveWgItems);\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " int localBreak = 0;\n" - " \n" - " int j=i+1;\n" - " do\n" - " {\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j<numObjects)\n" - " {\n" - " if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n" - " {\n" - " if (!localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " }\n" - " }\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (j>=numObjects && !localBreak)\n" - " {\n" - " atomic_inc(breakRequest);\n" - " localBreak = 1;\n" - " }\n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " \n" - " if (!localBreak)\n" - " {\n" - " if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n" - " {\n" - " int4 myPair;\n" - " myPair.x = myAabb.m_minIndices[3];\n" - " myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n" - " myPair.z = NEW_PAIR_MARKER;\n" - " myPair.w = NEW_PAIR_MARKER;\n" - " int curPair = atomic_inc (pairCount);\n" - " if (curPair<maxPairs)\n" - " {\n" - " pairsOut[curPair] = myPair; //flush to main memory\n" - " }\n" - " }\n" - " }\n" - " \n" - " barrier(CLK_LOCAL_MEM_FENCE);\n" - " localCount++;\n" - " if (localCount==64)\n" - " {\n" - " localCount = 0;\n" - " block+=64; \n" - " localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n" - " localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n" - " }\n" - " j++;\n" - " \n" - " } while (breakRequest[0]<numActiveWgItems[0]);\n" - " \n" - "}\n" - "//http://stereopsis.com/radix.html\n" - "unsigned int FloatFlip(float fl);\n" - "unsigned int FloatFlip(float fl)\n" - "{\n" - " unsigned int f = *(unsigned int*)&fl;\n" - " unsigned int mask = -(int)(f >> 31) | 0x80000000;\n" - " return f ^ mask;\n" - "}\n" - "float IFloatFlip(unsigned int f);\n" - "float IFloatFlip(unsigned int f)\n" - "{\n" - " unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n" - " unsigned int fl = f ^ mask;\n" - " return *(float*)&fl;\n" - "}\n" - "__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " int src = destAabbs[i].m_maxIndices[3];\n" - " destAabbs[i] = allAabbs[src];\n" - " destAabbs[i].m_maxIndices[3] = src;\n" - "}\n" - "__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " \n" - " \n" - " sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n" - " sortData[i].y = i;\n" - " \n" - "}\n" - "__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numObjects)\n" - " return;\n" - " \n" - " sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n" - "}\n" - "__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n" - "{\n" - " int i = get_global_id(0);\n" - " if (i>=numAabbs)\n" - " return;\n" - " \n" - " btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n" - " \n" - " float4 s;\n" - " s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n" - " sum[i]=s;\n" - " sum2[i]=s*s; \n" - "}\n"; |