#ifndef B3_LAUNCHER_CL_H #define B3_LAUNCHER_CL_H #include "b3BufferInfoCL.h" #include "Bullet3Common/b3MinMax.h" #include "b3OpenCLArray.h" #include <stdio.h> #define B3_DEBUG_SERIALIZE_CL #ifdef _WIN32 #pragma warning(disable :4996) #endif #define B3_CL_MAX_ARG_SIZE 16 B3_ATTRIBUTE_ALIGNED16(struct) b3KernelArgData { int m_isBuffer; int m_argIndex; int m_argSizeInBytes; int m_unusedPadding; union { cl_mem m_clBuffer; unsigned char m_argData[B3_CL_MAX_ARG_SIZE]; }; }; class b3LauncherCL { cl_command_queue m_commandQueue; cl_kernel m_kernel; int m_idx; b3AlignedObjectArray<b3KernelArgData> m_kernelArguments; int m_serializationSizeInBytes; bool m_enableSerialization; const char* m_name; public: b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays; b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name); virtual ~b3LauncherCL(); void setBuffer( cl_mem clBuffer); void setBuffers( b3BufferInfoCL* buffInfo, int n ); int getSerializationBufferSize() const { return m_serializationSizeInBytes; } int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx); inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx); int serializeArguments(unsigned char* destBuffer, int destBufferCapacity); int getNumArguments() const { return m_kernelArguments.size(); } b3KernelArgData getArgument(int index) { return m_kernelArguments[index]; } void serializeToFile(const char* fileName, int numWorkItems); template<typename T> inline void setConst( const T& consts ) { int sz=sizeof(T); b3Assert(sz<=B3_CL_MAX_ARG_SIZE); if (m_enableSerialization) { b3KernelArgData kernelArg; kernelArg.m_argIndex = m_idx; kernelArg.m_isBuffer = 0; T* destArg = (T*)kernelArg.m_argData; *destArg = consts; kernelArg.m_argSizeInBytes = sizeof(T); m_kernelArguments.push_back(kernelArg); m_serializationSizeInBytes+=sizeof(b3KernelArgData); } cl_int status = clSetKernelArg( m_kernel, m_idx++, sz, &consts ); b3Assert( status == CL_SUCCESS ); } inline void launch1D( int numThreads, int localSize = 64) { launch2D( numThreads, 1, localSize, 1 ); } inline void launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY ) { size_t gRange[3] = {1,1,1}; size_t lRange[3] = {1,1,1}; lRange[0] = localSizeX; lRange[1] = localSizeY; gRange[0] = b3Max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1)); gRange[0] *= lRange[0]; gRange[1] = b3Max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1)); gRange[1] *= lRange[1]; cl_int status = clEnqueueNDRangeKernel( m_commandQueue, m_kernel, 2, NULL, gRange, lRange, 0,0,0 ); if (status != CL_SUCCESS) { printf("Error: OpenCL status = %d\n",status); } b3Assert( status == CL_SUCCESS ); } void enableSerialization(bool serialize) { m_enableSerialization = serialize; } }; #endif //B3_LAUNCHER_CL_H