diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h | 681 |
1 files changed, 340 insertions, 341 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h index 04d40fcf26..d6999b94cb 100644 --- a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/kernels/sapKernels.h @@ -1,342 +1,341 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* sapCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#define NEW_PAIR_MARKER -1\n" -"typedef struct \n" -"{\n" -" union\n" -" {\n" -" float4 m_min;\n" -" float m_minElems[4];\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float4 m_max;\n" -" float m_maxElems[4];\n" -" int m_maxIndices[4];\n" -" };\n" -"} btAabbCL;\n" -"/// conservative test for overlap between two aabbs\n" -"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" -"bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" -"bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" -"bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" -" overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" -" overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numUnsortedAabbs)\n" -" return;\n" -" int j = get_global_id(1);\n" -" if (j>=numUnSortedAabbs2)\n" -" return;\n" -" __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" -" __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" -" if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" -" {\n" -" int4 myPair;\n" -" \n" -" int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" -" int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" -" if (xIndex>yIndex)\n" -" {\n" -" int tmp = xIndex;\n" -" xIndex=yIndex;\n" -" yIndex=tmp;\n" -" }\n" -" \n" -" myPair.x = xIndex;\n" -" myPair.y = yIndex;\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -"}\n" -"__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" for (int j=i+1;j<numObjects;j++)\n" -" {\n" -" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = aabbs[i].m_minIndices[3];\n" -" myPair.y = aabbs[j].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" for (int j=i+1;j<numObjects;j++)\n" -" {\n" -" if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" -" {\n" -" break;\n" -" }\n" -" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = aabbs[i].m_minIndices[3];\n" -" myPair.y = aabbs[j].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int localId = get_local_id(0);\n" -" __local int numActiveWgItems[1];\n" -" __local int breakRequest[1];\n" -" if (localId==0)\n" -" {\n" -" numActiveWgItems[0] = 0;\n" -" breakRequest[0] = 0;\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" atomic_inc(numActiveWgItems);\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" int localBreak = 0;\n" -" int j=i+1;\n" -" do\n" -" {\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j<numObjects)\n" -" {\n" -" if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" -" {\n" -" if (!localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" }\n" -" }\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j>=numObjects && !localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (!localBreak)\n" -" {\n" -" if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = aabbs[i].m_minIndices[3];\n" -" myPair.y = aabbs[j].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -" j++;\n" -" } while (breakRequest[0]<numActiveWgItems[0]);\n" -"}\n" -"__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" -"{\n" -" int i = get_global_id(0);\n" -" int localId = get_local_id(0);\n" -" __local int numActiveWgItems[1];\n" -" __local int breakRequest[1];\n" -" __local btAabbCL localAabbs[128];// = aabbs[i];\n" -" \n" -" btAabbCL myAabb;\n" -" \n" -" myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n" -" float testValue = myAabb.m_maxElems[axis];\n" -" \n" -" if (localId==0)\n" -" {\n" -" numActiveWgItems[0] = 0;\n" -" breakRequest[0] = 0;\n" -" }\n" -" int localCount=0;\n" -" int block=0;\n" -" localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n" -" localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" atomic_inc(numActiveWgItems);\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" int localBreak = 0;\n" -" \n" -" int j=i+1;\n" -" do\n" -" {\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j<numObjects)\n" -" {\n" -" if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n" -" {\n" -" if (!localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" }\n" -" }\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (j>=numObjects && !localBreak)\n" -" {\n" -" atomic_inc(breakRequest);\n" -" localBreak = 1;\n" -" }\n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" \n" -" if (!localBreak)\n" -" {\n" -" if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n" -" {\n" -" int4 myPair;\n" -" myPair.x = myAabb.m_minIndices[3];\n" -" myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n" -" myPair.z = NEW_PAIR_MARKER;\n" -" myPair.w = NEW_PAIR_MARKER;\n" -" int curPair = atomic_inc (pairCount);\n" -" if (curPair<maxPairs)\n" -" {\n" -" pairsOut[curPair] = myPair; //flush to main memory\n" -" }\n" -" }\n" -" }\n" -" \n" -" barrier(CLK_LOCAL_MEM_FENCE);\n" -" localCount++;\n" -" if (localCount==64)\n" -" {\n" -" localCount = 0;\n" -" block+=64; \n" -" localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n" -" localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n" -" }\n" -" j++;\n" -" \n" -" } while (breakRequest[0]<numActiveWgItems[0]);\n" -" \n" -"}\n" -"//http://stereopsis.com/radix.html\n" -"unsigned int FloatFlip(float fl);\n" -"unsigned int FloatFlip(float fl)\n" -"{\n" -" unsigned int f = *(unsigned int*)&fl;\n" -" unsigned int mask = -(int)(f >> 31) | 0x80000000;\n" -" return f ^ mask;\n" -"}\n" -"float IFloatFlip(unsigned int f);\n" -"float IFloatFlip(unsigned int f)\n" -"{\n" -" unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n" -" unsigned int fl = f ^ mask;\n" -" return *(float*)&fl;\n" -"}\n" -"__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" int src = destAabbs[i].m_maxIndices[3];\n" -" destAabbs[i] = allAabbs[src];\n" -" destAabbs[i].m_maxIndices[3] = src;\n" -"}\n" -"__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" \n" -" \n" -" sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n" -" sortData[i].y = i;\n" -" \n" -"}\n" -"__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numObjects)\n" -" return;\n" -" \n" -" sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n" -"}\n" -"__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numAabbs)\n" -" return;\n" -" \n" -" btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n" -" \n" -" float4 s;\n" -" s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n" -" sum[i]=s;\n" -" sum2[i]=s*s; \n" -"}\n" -; +static const char* sapCL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Erwin Coumans\n" + "#define NEW_PAIR_MARKER -1\n" + "typedef struct \n" + "{\n" + " union\n" + " {\n" + " float4 m_min;\n" + " float m_minElems[4];\n" + " int m_minIndices[4];\n" + " };\n" + " union\n" + " {\n" + " float4 m_max;\n" + " float m_maxElems[4];\n" + " int m_maxIndices[4];\n" + " };\n" + "} btAabbCL;\n" + "/// conservative test for overlap between two aabbs\n" + "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2);\n" + "bool TestAabbAgainstAabb2(const btAabbCL* aabb1, __local const btAabbCL* aabb2)\n" + "{\n" + " bool overlap = true;\n" + " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" + " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" + " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" + " return overlap;\n" + "}\n" + "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" + "bool TestAabbAgainstAabb2GlobalGlobal(__global const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" + "{\n" + " bool overlap = true;\n" + " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" + " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" + " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" + " return overlap;\n" + "}\n" + "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2);\n" + "bool TestAabbAgainstAabb2Global(const btAabbCL* aabb1, __global const btAabbCL* aabb2)\n" + "{\n" + " bool overlap = true;\n" + " overlap = (aabb1->m_min.x > aabb2->m_max.x || aabb1->m_max.x < aabb2->m_min.x) ? false : overlap;\n" + " overlap = (aabb1->m_min.z > aabb2->m_max.z || aabb1->m_max.z < aabb2->m_min.z) ? false : overlap;\n" + " overlap = (aabb1->m_min.y > aabb2->m_max.y || aabb1->m_max.y < aabb2->m_min.y) ? false : overlap;\n" + " return overlap;\n" + "}\n" + "__kernel void computePairsKernelTwoArrays( __global const btAabbCL* unsortedAabbs, __global const int* unsortedAabbMapping, __global const int* unsortedAabbMapping2, volatile __global int4* pairsOut,volatile __global int* pairCount, int numUnsortedAabbs, int numUnSortedAabbs2, int axis, int maxPairs)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numUnsortedAabbs)\n" + " return;\n" + " int j = get_global_id(1);\n" + " if (j>=numUnSortedAabbs2)\n" + " return;\n" + " __global const btAabbCL* unsortedAabbPtr = &unsortedAabbs[unsortedAabbMapping[i]];\n" + " __global const btAabbCL* unsortedAabbPtr2 = &unsortedAabbs[unsortedAabbMapping2[j]];\n" + " if (TestAabbAgainstAabb2GlobalGlobal(unsortedAabbPtr,unsortedAabbPtr2))\n" + " {\n" + " int4 myPair;\n" + " \n" + " int xIndex = unsortedAabbPtr[0].m_minIndices[3];\n" + " int yIndex = unsortedAabbPtr2[0].m_minIndices[3];\n" + " if (xIndex>yIndex)\n" + " {\n" + " int tmp = xIndex;\n" + " xIndex=yIndex;\n" + " yIndex=tmp;\n" + " }\n" + " \n" + " myPair.x = xIndex;\n" + " myPair.y = yIndex;\n" + " myPair.z = NEW_PAIR_MARKER;\n" + " myPair.w = NEW_PAIR_MARKER;\n" + " int curPair = atomic_inc (pairCount);\n" + " if (curPair<maxPairs)\n" + " {\n" + " pairsOut[curPair] = myPair; //flush to main memory\n" + " }\n" + " }\n" + "}\n" + "__kernel void computePairsKernelBruteForce( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numObjects)\n" + " return;\n" + " for (int j=i+1;j<numObjects;j++)\n" + " {\n" + " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" + " {\n" + " int4 myPair;\n" + " myPair.x = aabbs[i].m_minIndices[3];\n" + " myPair.y = aabbs[j].m_minIndices[3];\n" + " myPair.z = NEW_PAIR_MARKER;\n" + " myPair.w = NEW_PAIR_MARKER;\n" + " int curPair = atomic_inc (pairCount);\n" + " if (curPair<maxPairs)\n" + " {\n" + " pairsOut[curPair] = myPair; //flush to main memory\n" + " }\n" + " }\n" + " }\n" + "}\n" + "__kernel void computePairsKernelOriginal( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numObjects)\n" + " return;\n" + " for (int j=i+1;j<numObjects;j++)\n" + " {\n" + " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" + " {\n" + " break;\n" + " }\n" + " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" + " {\n" + " int4 myPair;\n" + " myPair.x = aabbs[i].m_minIndices[3];\n" + " myPair.y = aabbs[j].m_minIndices[3];\n" + " myPair.z = NEW_PAIR_MARKER;\n" + " myPair.w = NEW_PAIR_MARKER;\n" + " int curPair = atomic_inc (pairCount);\n" + " if (curPair<maxPairs)\n" + " {\n" + " pairsOut[curPair] = myPair; //flush to main memory\n" + " }\n" + " }\n" + " }\n" + "}\n" + "__kernel void computePairsKernelBarrier( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" + "{\n" + " int i = get_global_id(0);\n" + " int localId = get_local_id(0);\n" + " __local int numActiveWgItems[1];\n" + " __local int breakRequest[1];\n" + " if (localId==0)\n" + " {\n" + " numActiveWgItems[0] = 0;\n" + " breakRequest[0] = 0;\n" + " }\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " atomic_inc(numActiveWgItems);\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " int localBreak = 0;\n" + " int j=i+1;\n" + " do\n" + " {\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " \n" + " if (j<numObjects)\n" + " {\n" + " if(aabbs[i].m_maxElems[axis] < (aabbs[j].m_minElems[axis])) \n" + " {\n" + " if (!localBreak)\n" + " {\n" + " atomic_inc(breakRequest);\n" + " localBreak = 1;\n" + " }\n" + " }\n" + " }\n" + " \n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " \n" + " if (j>=numObjects && !localBreak)\n" + " {\n" + " atomic_inc(breakRequest);\n" + " localBreak = 1;\n" + " }\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " \n" + " if (!localBreak)\n" + " {\n" + " if (TestAabbAgainstAabb2GlobalGlobal(&aabbs[i],&aabbs[j]))\n" + " {\n" + " int4 myPair;\n" + " myPair.x = aabbs[i].m_minIndices[3];\n" + " myPair.y = aabbs[j].m_minIndices[3];\n" + " myPair.z = NEW_PAIR_MARKER;\n" + " myPair.w = NEW_PAIR_MARKER;\n" + " int curPair = atomic_inc (pairCount);\n" + " if (curPair<maxPairs)\n" + " {\n" + " pairsOut[curPair] = myPair; //flush to main memory\n" + " }\n" + " }\n" + " }\n" + " j++;\n" + " } while (breakRequest[0]<numActiveWgItems[0]);\n" + "}\n" + "__kernel void computePairsKernelLocalSharedMemory( __global const btAabbCL* aabbs, volatile __global int4* pairsOut,volatile __global int* pairCount, int numObjects, int axis, int maxPairs)\n" + "{\n" + " int i = get_global_id(0);\n" + " int localId = get_local_id(0);\n" + " __local int numActiveWgItems[1];\n" + " __local int breakRequest[1];\n" + " __local btAabbCL localAabbs[128];// = aabbs[i];\n" + " \n" + " btAabbCL myAabb;\n" + " \n" + " myAabb = (i<numObjects)? aabbs[i]:aabbs[0];\n" + " float testValue = myAabb.m_maxElems[axis];\n" + " \n" + " if (localId==0)\n" + " {\n" + " numActiveWgItems[0] = 0;\n" + " breakRequest[0] = 0;\n" + " }\n" + " int localCount=0;\n" + " int block=0;\n" + " localAabbs[localId] = (i+block)<numObjects? aabbs[i+block] : aabbs[0];\n" + " localAabbs[localId+64] = (i+block+64)<numObjects? aabbs[i+block+64]: aabbs[0];\n" + " \n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " atomic_inc(numActiveWgItems);\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " int localBreak = 0;\n" + " \n" + " int j=i+1;\n" + " do\n" + " {\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " \n" + " if (j<numObjects)\n" + " {\n" + " if(testValue < (localAabbs[localCount+localId+1].m_minElems[axis])) \n" + " {\n" + " if (!localBreak)\n" + " {\n" + " atomic_inc(breakRequest);\n" + " localBreak = 1;\n" + " }\n" + " }\n" + " }\n" + " \n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " \n" + " if (j>=numObjects && !localBreak)\n" + " {\n" + " atomic_inc(breakRequest);\n" + " localBreak = 1;\n" + " }\n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " \n" + " if (!localBreak)\n" + " {\n" + " if (TestAabbAgainstAabb2(&myAabb,&localAabbs[localCount+localId+1]))\n" + " {\n" + " int4 myPair;\n" + " myPair.x = myAabb.m_minIndices[3];\n" + " myPair.y = localAabbs[localCount+localId+1].m_minIndices[3];\n" + " myPair.z = NEW_PAIR_MARKER;\n" + " myPair.w = NEW_PAIR_MARKER;\n" + " int curPair = atomic_inc (pairCount);\n" + " if (curPair<maxPairs)\n" + " {\n" + " pairsOut[curPair] = myPair; //flush to main memory\n" + " }\n" + " }\n" + " }\n" + " \n" + " barrier(CLK_LOCAL_MEM_FENCE);\n" + " localCount++;\n" + " if (localCount==64)\n" + " {\n" + " localCount = 0;\n" + " block+=64; \n" + " localAabbs[localId] = ((i+block)<numObjects) ? aabbs[i+block] : aabbs[0];\n" + " localAabbs[localId+64] = ((i+64+block)<numObjects) ? aabbs[i+block+64] : aabbs[0];\n" + " }\n" + " j++;\n" + " \n" + " } while (breakRequest[0]<numActiveWgItems[0]);\n" + " \n" + "}\n" + "//http://stereopsis.com/radix.html\n" + "unsigned int FloatFlip(float fl);\n" + "unsigned int FloatFlip(float fl)\n" + "{\n" + " unsigned int f = *(unsigned int*)&fl;\n" + " unsigned int mask = -(int)(f >> 31) | 0x80000000;\n" + " return f ^ mask;\n" + "}\n" + "float IFloatFlip(unsigned int f);\n" + "float IFloatFlip(unsigned int f)\n" + "{\n" + " unsigned int mask = ((f >> 31) - 1) | 0x80000000;\n" + " unsigned int fl = f ^ mask;\n" + " return *(float*)&fl;\n" + "}\n" + "__kernel void copyAabbsKernel( __global const btAabbCL* allAabbs, __global btAabbCL* destAabbs, int numObjects)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numObjects)\n" + " return;\n" + " int src = destAabbs[i].m_maxIndices[3];\n" + " destAabbs[i] = allAabbs[src];\n" + " destAabbs[i].m_maxIndices[3] = src;\n" + "}\n" + "__kernel void flipFloatKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global int2* sortData, int numObjects, int axis)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numObjects)\n" + " return;\n" + " \n" + " \n" + " sortData[i].x = FloatFlip(allAabbs[smallAabbMapping[i]].m_minElems[axis]);\n" + " sortData[i].y = i;\n" + " \n" + "}\n" + "__kernel void scatterKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, volatile __global const int2* sortData, __global btAabbCL* sortedAabbs, int numObjects)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numObjects)\n" + " return;\n" + " \n" + " sortedAabbs[i] = allAabbs[smallAabbMapping[sortData[i].y]];\n" + "}\n" + "__kernel void prepareSumVarianceKernel( __global const btAabbCL* allAabbs, __global const int* smallAabbMapping, __global float4* sum, __global float4* sum2,int numAabbs)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numAabbs)\n" + " return;\n" + " \n" + " btAabbCL smallAabb = allAabbs[smallAabbMapping[i]];\n" + " \n" + " float4 s;\n" + " s = (smallAabb.m_max+smallAabb.m_min)*0.5f;\n" + " sum[i]=s;\n" + " sum2[i]=s*s; \n" + "}\n"; |