#ifndef B3_OPENCL_ARRAY_H #define B3_OPENCL_ARRAY_H #include "Bullet3Common/b3AlignedObjectArray.h" #include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" template <typename T> class b3OpenCLArray { size_t m_size; size_t m_capacity; cl_mem m_clBuffer; cl_context m_clContext; cl_command_queue m_commandQueue; bool m_ownsMemory; bool m_allowGrowingCapacity; void deallocate() { if (m_clBuffer && m_ownsMemory) { clReleaseMemObject(m_clBuffer); } m_clBuffer = 0; m_capacity=0; } b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src); B3_FORCE_INLINE size_t allocSize(size_t size) { return (size ? size*2 : 1); } public: b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity=0, bool allowGrowingCapacity=true) :m_size(0), m_capacity(0),m_clBuffer(0), m_clContext(ctx),m_commandQueue(queue), m_ownsMemory(true),m_allowGrowingCapacity(true) { if (initialCapacity) { reserve(initialCapacity); } m_allowGrowingCapacity = allowGrowingCapacity; } ///this is an error-prone method with no error checking, be careful! void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements) { deallocate(); m_ownsMemory = false; m_allowGrowingCapacity = false; m_clBuffer = buffer; m_size = sizeInElements; m_capacity = sizeInElements; } // we could enable this assignment, but need to make sure to avoid accidental deep copies // b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src) // { // copyFromArray(src); // return *this; // } cl_mem getBufferCL() const { return m_clBuffer; } virtual ~b3OpenCLArray() { deallocate(); m_size=0; m_capacity=0; } B3_FORCE_INLINE bool push_back(const T& _Val,bool waitForCompletion=true) { bool result = true; size_t sz = size(); if( sz == capacity() ) { result = reserve( allocSize(size()) ); } copyFromHostPointer(&_Val, 1, sz, waitForCompletion); m_size++; return result; } B3_FORCE_INLINE T forcedAt(size_t n) const { b3Assert(n>=0); b3Assert(n<capacity()); T elem; copyToHostPointer(&elem,1,n,true); return elem; } B3_FORCE_INLINE T at(size_t n) const { b3Assert(n>=0); b3Assert(n<size()); T elem; copyToHostPointer(&elem,1,n,true); return elem; } B3_FORCE_INLINE bool resize(size_t newsize, bool copyOldContents=true) { bool result = true; size_t curSize = size(); if (newsize < curSize) { //leave the OpenCL memory for now } else { if (newsize > size()) { result = reserve(newsize,copyOldContents); } //leave new data uninitialized (init in debug mode?) //for (size_t i=curSize;i<newsize;i++) ... } if (result) { m_size = newsize; } else { m_size = 0; } return result; } B3_FORCE_INLINE size_t size() const { return m_size; } B3_FORCE_INLINE size_t capacity() const { return m_capacity; } B3_FORCE_INLINE bool reserve(size_t _Count, bool copyOldContents=true) { bool result=true; // determine new minimum length of allocated storage if (capacity() < _Count) { // not enough room, reallocate if (m_allowGrowingCapacity) { cl_int ciErrNum; //create a new OpenCL buffer size_t memSizeInBytes = sizeof(T)*_Count; cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum); if (ciErrNum!=CL_SUCCESS) { b3Error("OpenCL out-of-memory\n"); _Count = 0; result = false; } //#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS #ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS unsigned char* src = (unsigned char*)malloc(memSizeInBytes); for (size_t i=0;i<memSizeInBytes;i++) src[i] = 0xbb; ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 ); b3Assert(ciErrNum==CL_SUCCESS); clFinish(m_commandQueue); free(src); #endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS if (result) { if (copyOldContents) copyToCL(buf, size()); } //deallocate the old buffer deallocate(); m_clBuffer = buf; m_capacity = _Count; } else { //fail: assert and b3Assert(0); deallocate(); result=false; } } return result; } void copyToCL(cl_mem destination, size_t numElements, size_t firstElem=0, size_t dstOffsetInElems=0) const { if (numElements<=0) return; b3Assert(m_clBuffer); b3Assert(destination); //likely some error, destination is same as source b3Assert(m_clBuffer != destination); b3Assert((firstElem+numElements)<=m_size); cl_int status = 0; b3Assert(numElements>0); b3Assert(numElements<=m_size); size_t srcOffsetBytes = sizeof(T)*firstElem; size_t dstOffsetInBytes = sizeof(T)*dstOffsetInElems; status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination, srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 ); b3Assert( status == CL_SUCCESS ); } void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true) { size_t newSize = srcArray.size(); bool copyOldContents = false; resize (newSize,copyOldContents); if (newSize) copyFromHostPointer(&srcArray[0],newSize,0,waitForCompletion); } void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem= 0, bool waitForCompletion=true) { b3Assert(numElems+destFirstElem <= capacity()); if (numElems+destFirstElem) { cl_int status = 0; size_t sizeInBytes=sizeof(T)*numElems; status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes, src, 0,0,0 ); b3Assert(status == CL_SUCCESS ); if (waitForCompletion) clFinish(m_commandQueue); } else { b3Error("copyFromHostPointer invalid range\n"); } } void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion=true) const { destArray.resize(this->size()); if (size()) copyToHostPointer(&destArray[0], size(),0,waitForCompletion); } void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem=0, bool waitForCompletion=true) const { b3Assert(numElem+srcFirstElem <= capacity()); if(numElem+srcFirstElem <= capacity()) { cl_int status = 0; status = clEnqueueReadBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*srcFirstElem, sizeof(T)*numElem, destPtr, 0,0,0 ); b3Assert( status==CL_SUCCESS ); if (waitForCompletion) clFinish(m_commandQueue); } else { b3Error("copyToHostPointer invalid range\n"); } } void copyFromOpenCLArray(const b3OpenCLArray& src) { size_t newSize = src.size(); resize(newSize); if (size()) { src.copyToCL(m_clBuffer,size()); } } }; #endif //B3_OPENCL_ARRAY_H