summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp213
1 files changed, 213 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp
new file mode 100644
index 0000000000..a4980f71e1
--- /dev/null
+++ b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp
@@ -0,0 +1,213 @@
+/*
+Copyright (c) 2012 Advanced Micro Devices, Inc.
+
+This software is provided 'as-is', without any express or implied warranty.
+In no event will the authors be held liable for any damages arising from the use of this software.
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it freely,
+subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+3. This notice may not be removed or altered from any source distribution.
+*/
+//Originally written by Takahiro Harada
+//Host-code rewritten by Erwin Coumans
+
+#define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl"
+#define KERNEL0 "SearchSortDataLowerKernel"
+#define KERNEL1 "SearchSortDataUpperKernel"
+#define KERNEL2 "SubtractKernel"
+
+
+#include "b3BoundSearchCL.h"
+#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
+#include "b3LauncherCL.h"
+#include "kernels/BoundSearchKernelsCL.h"
+
+b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize)
+ :m_context(ctx),
+ m_device(device),
+ m_queue(queue)
+{
+
+ const char* additionalMacros = "";
+ //const char* srcFileNameForCaching="";
+
+ cl_int pErrNum;
+ const char* kernelSource = boundSearchKernelsCL;
+
+ cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH);
+ b3Assert(boundSearchProg);
+
+ m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros );
+ b3Assert(m_lowerSortDataKernel );
+
+ m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros );
+ b3Assert(m_upperSortDataKernel);
+
+ m_subtractKernel = 0;
+
+ if( maxSize )
+ {
+ m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros );
+ b3Assert(m_subtractKernel);
+ }
+
+ //m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST );
+
+ m_lower = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue,maxSize );
+ m_upper = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue, maxSize );
+
+ m_filler = new b3FillCL(ctx,device,queue);
+}
+
+b3BoundSearchCL::~b3BoundSearchCL()
+{
+
+ delete m_lower;
+ delete m_upper;
+ delete m_filler;
+
+ clReleaseKernel(m_lowerSortDataKernel);
+ clReleaseKernel(m_upperSortDataKernel);
+ clReleaseKernel(m_subtractKernel);
+
+
+}
+
+
+void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option )
+{
+ b3Int4 constBuffer;
+ constBuffer.x = nSrc;
+ constBuffer.y = nDst;
+
+ if( option == BOUND_LOWER )
+ {
+ b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) };
+
+ b3LauncherCL launcher( m_queue, m_lowerSortDataKernel,"m_lowerSortDataKernel" );
+ launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
+ launcher.setConst( nSrc );
+ launcher.setConst( nDst );
+
+ launcher.launch1D( nSrc, 64 );
+ }
+ else if( option == BOUND_UPPER )
+ {
+ b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
+
+ b3LauncherCL launcher(m_queue, m_upperSortDataKernel,"m_upperSortDataKernel" );
+ launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
+ launcher.setConst( nSrc );
+ launcher.setConst( nDst );
+
+ launcher.launch1D( nSrc, 64 );
+ }
+ else if( option == COUNT )
+ {
+ b3Assert( m_lower );
+ b3Assert( m_upper );
+ b3Assert( m_lower->capacity() <= (int)nDst );
+ b3Assert( m_upper->capacity() <= (int)nDst );
+
+ int zero = 0;
+ m_filler->execute( *m_lower, zero, nDst );
+ m_filler->execute( *m_upper, zero, nDst );
+
+ execute( src, nSrc, *m_lower, nDst, BOUND_LOWER );
+ execute( src, nSrc, *m_upper, nDst, BOUND_UPPER );
+
+ {
+ b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) };
+
+ b3LauncherCL launcher( m_queue, m_subtractKernel ,"m_subtractKernel");
+ launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
+ launcher.setConst( nSrc );
+ launcher.setConst( nDst );
+
+ launcher.launch1D( nDst, 64 );
+ }
+ }
+ else
+ {
+ b3Assert( 0 );
+ }
+
+}
+
+
+void b3BoundSearchCL::executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc,
+ b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option )
+{
+
+
+ for(int i=0; i<nSrc-1; i++)
+ b3Assert( src[i].m_key <= src[i+1].m_key );
+
+ b3SortData minData,zeroData,maxData;
+ minData.m_key = -1;
+ minData.m_value = -1;
+ zeroData.m_key=0;
+ zeroData.m_value=0;
+ maxData.m_key = nDst;
+ maxData.m_value = nDst;
+
+ if( option == BOUND_LOWER )
+ {
+ for(int i=0; i<nSrc; i++)
+ {
+ b3SortData& iData = (i==0)? minData: src[i-1];
+ b3SortData& jData = (i==nSrc)? maxData: src[i];
+
+ if( iData.m_key != jData.m_key )
+ {
+ int k = jData.m_key;
+ {
+ dst[k] = i;
+ }
+ }
+ }
+ }
+ else if( option == BOUND_UPPER )
+ {
+ for(int i=1; i<nSrc+1; i++)
+ {
+ b3SortData& iData = src[i-1];
+ b3SortData& jData = (i==nSrc)? maxData: src[i];
+
+ if( iData.m_key != jData.m_key )
+ {
+ int k = iData.m_key;
+ {
+ dst[k] = i;
+ }
+ }
+ }
+ }
+ else if( option == COUNT )
+ {
+ b3AlignedObjectArray<unsigned int> lower;
+ lower.resize(nDst );
+ b3AlignedObjectArray<unsigned int> upper;
+ upper.resize(nDst );
+
+ for(int i=0; i<nDst; i++)
+ {
+ lower[i] = upper[i] = 0;
+ }
+
+ executeHost( src, nSrc, lower, nDst, BOUND_LOWER );
+ executeHost( src, nSrc, upper, nDst, BOUND_UPPER );
+
+ for( int i=0; i<nDst; i++)
+ {
+ dst[i] = upper[i] - lower[i];
+ }
+ }
+ else
+ {
+ b3Assert( 0 );
+ }
+}