diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp | 213 |
1 files changed, 213 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp new file mode 100644 index 0000000000..a4980f71e1 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3BoundSearchCL.cpp @@ -0,0 +1,213 @@ +/* +Copyright (c) 2012 Advanced Micro Devices, Inc. + +This software is provided 'as-is', without any express or implied warranty. +In no event will the authors be held liable for any damages arising from the use of this software. +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it freely, +subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. +2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. +3. This notice may not be removed or altered from any source distribution. +*/ +//Originally written by Takahiro Harada +//Host-code rewritten by Erwin Coumans + +#define BOUNDSEARCH_PATH "src/Bullet3OpenCL/ParallelPrimitives/kernels/BoundSearchKernels.cl" +#define KERNEL0 "SearchSortDataLowerKernel" +#define KERNEL1 "SearchSortDataUpperKernel" +#define KERNEL2 "SubtractKernel" + + +#include "b3BoundSearchCL.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "b3LauncherCL.h" +#include "kernels/BoundSearchKernelsCL.h" + +b3BoundSearchCL::b3BoundSearchCL(cl_context ctx, cl_device_id device, cl_command_queue queue, int maxSize) + :m_context(ctx), + m_device(device), + m_queue(queue) +{ + + const char* additionalMacros = ""; + //const char* srcFileNameForCaching=""; + + cl_int pErrNum; + const char* kernelSource = boundSearchKernelsCL; + + cl_program boundSearchProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, kernelSource, &pErrNum,additionalMacros, BOUNDSEARCH_PATH); + b3Assert(boundSearchProg); + + m_lowerSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataLowerKernel", &pErrNum, boundSearchProg,additionalMacros ); + b3Assert(m_lowerSortDataKernel ); + + m_upperSortDataKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SearchSortDataUpperKernel", &pErrNum, boundSearchProg,additionalMacros ); + b3Assert(m_upperSortDataKernel); + + m_subtractKernel = 0; + + if( maxSize ) + { + m_subtractKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, kernelSource, "SubtractKernel", &pErrNum, boundSearchProg,additionalMacros ); + b3Assert(m_subtractKernel); + } + + //m_constBuffer = new b3OpenCLArray<b3Int4>( device, 1, BufferBase::BUFFER_CONST ); + + m_lower = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue,maxSize ); + m_upper = (maxSize == 0)? 0: new b3OpenCLArray<unsigned int>(ctx,queue, maxSize ); + + m_filler = new b3FillCL(ctx,device,queue); +} + +b3BoundSearchCL::~b3BoundSearchCL() +{ + + delete m_lower; + delete m_upper; + delete m_filler; + + clReleaseKernel(m_lowerSortDataKernel); + clReleaseKernel(m_upperSortDataKernel); + clReleaseKernel(m_subtractKernel); + + +} + + +void b3BoundSearchCL::execute(b3OpenCLArray<b3SortData>& src, int nSrc, b3OpenCLArray<unsigned int>& dst, int nDst, Option option ) +{ + b3Int4 constBuffer; + constBuffer.x = nSrc; + constBuffer.y = nDst; + + if( option == BOUND_LOWER ) + { + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL()) }; + + b3LauncherCL launcher( m_queue, m_lowerSortDataKernel,"m_lowerSortDataKernel" ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nSrc ); + launcher.setConst( nDst ); + + launcher.launch1D( nSrc, 64 ); + } + else if( option == BOUND_UPPER ) + { + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( src.getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; + + b3LauncherCL launcher(m_queue, m_upperSortDataKernel,"m_upperSortDataKernel" ); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nSrc ); + launcher.setConst( nDst ); + + launcher.launch1D( nSrc, 64 ); + } + else if( option == COUNT ) + { + b3Assert( m_lower ); + b3Assert( m_upper ); + b3Assert( m_lower->capacity() <= (int)nDst ); + b3Assert( m_upper->capacity() <= (int)nDst ); + + int zero = 0; + m_filler->execute( *m_lower, zero, nDst ); + m_filler->execute( *m_upper, zero, nDst ); + + execute( src, nSrc, *m_lower, nDst, BOUND_LOWER ); + execute( src, nSrc, *m_upper, nDst, BOUND_UPPER ); + + { + b3BufferInfoCL bInfo[] = { b3BufferInfoCL( m_upper->getBufferCL(), true ), b3BufferInfoCL( m_lower->getBufferCL(), true ), b3BufferInfoCL( dst.getBufferCL() ) }; + + b3LauncherCL launcher( m_queue, m_subtractKernel ,"m_subtractKernel"); + launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst( nSrc ); + launcher.setConst( nDst ); + + launcher.launch1D( nDst, 64 ); + } + } + else + { + b3Assert( 0 ); + } + +} + + +void b3BoundSearchCL::executeHost( b3AlignedObjectArray<b3SortData>& src, int nSrc, + b3AlignedObjectArray<unsigned int>& dst, int nDst, Option option ) +{ + + + for(int i=0; i<nSrc-1; i++) + b3Assert( src[i].m_key <= src[i+1].m_key ); + + b3SortData minData,zeroData,maxData; + minData.m_key = -1; + minData.m_value = -1; + zeroData.m_key=0; + zeroData.m_value=0; + maxData.m_key = nDst; + maxData.m_value = nDst; + + if( option == BOUND_LOWER ) + { + for(int i=0; i<nSrc; i++) + { + b3SortData& iData = (i==0)? minData: src[i-1]; + b3SortData& jData = (i==nSrc)? maxData: src[i]; + + if( iData.m_key != jData.m_key ) + { + int k = jData.m_key; + { + dst[k] = i; + } + } + } + } + else if( option == BOUND_UPPER ) + { + for(int i=1; i<nSrc+1; i++) + { + b3SortData& iData = src[i-1]; + b3SortData& jData = (i==nSrc)? maxData: src[i]; + + if( iData.m_key != jData.m_key ) + { + int k = iData.m_key; + { + dst[k] = i; + } + } + } + } + else if( option == COUNT ) + { + b3AlignedObjectArray<unsigned int> lower; + lower.resize(nDst ); + b3AlignedObjectArray<unsigned int> upper; + upper.resize(nDst ); + + for(int i=0; i<nDst; i++) + { + lower[i] = upper[i] = 0; + } + + executeHost( src, nSrc, lower, nDst, BOUND_LOWER ); + executeHost( src, nSrc, upper, nDst, BOUND_UPPER ); + + for( int i=0; i<nDst; i++) + { + dst[i] = upper[i] - lower[i]; + } + } + else + { + b3Assert( 0 ); + } +} |