#ifndef B3_OPENCL_ARRAY_H
#define B3_OPENCL_ARRAY_H

#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h"

template <typename T>
class b3OpenCLArray
{
	size_t	m_size;
	size_t	m_capacity;
	cl_mem	m_clBuffer;

	cl_context		 m_clContext;
	cl_command_queue m_commandQueue;

	bool	m_ownsMemory;

	bool	m_allowGrowingCapacity;

	void deallocate()
	{
		if (m_clBuffer && m_ownsMemory)
		{
			clReleaseMemObject(m_clBuffer);
		}
		m_clBuffer = 0;
		m_capacity=0;
	}

	b3OpenCLArray<T>& operator=(const b3OpenCLArray<T>& src);

	B3_FORCE_INLINE	size_t	allocSize(size_t size)
		{
			return (size ? size*2 : 1);
		}

public:

	b3OpenCLArray(cl_context ctx, cl_command_queue queue, size_t initialCapacity=0, bool allowGrowingCapacity=true)
	:m_size(0),  m_capacity(0),m_clBuffer(0),
	m_clContext(ctx),m_commandQueue(queue),
	m_ownsMemory(true),m_allowGrowingCapacity(true)
	{
		if (initialCapacity)
		{
			reserve(initialCapacity);
		}
		m_allowGrowingCapacity = allowGrowingCapacity;
	}

	///this is an error-prone method with no error checking, be careful!
	void setFromOpenCLBuffer(cl_mem buffer, size_t sizeInElements)
	{
		deallocate();
		m_ownsMemory = false;
		m_allowGrowingCapacity = false;
		m_clBuffer = buffer;
		m_size = sizeInElements;
		m_capacity = sizeInElements;
	}

// we could enable this assignment, but need to make sure to avoid accidental deep copies
//	b3OpenCLArray<T>& operator=(const b3AlignedObjectArray<T>& src)
//	{
//		copyFromArray(src);
//		return *this;
//	}


	cl_mem	getBufferCL() const
	{
		return m_clBuffer;
	}


	virtual ~b3OpenCLArray()
	{
		deallocate();
		m_size=0;
		m_capacity=0;
	}

	B3_FORCE_INLINE	bool push_back(const T& _Val,bool waitForCompletion=true)
	{
		bool result = true;
		size_t sz = size();
		if( sz == capacity() )
		{
			result = reserve( allocSize(size()) );
		}
		copyFromHostPointer(&_Val, 1, sz, waitForCompletion);
		m_size++;
		return result;
	}

	B3_FORCE_INLINE T forcedAt(size_t n) const
	{
		b3Assert(n>=0);
		b3Assert(n<capacity());
		T elem;
		copyToHostPointer(&elem,1,n,true);
		return elem;
	}

	B3_FORCE_INLINE T at(size_t n) const
	{
		b3Assert(n>=0);
		b3Assert(n<size());
		T elem;
		copyToHostPointer(&elem,1,n,true);
		return elem;
	}

	B3_FORCE_INLINE	bool resize(size_t newsize, bool copyOldContents=true)
	{
		bool result = true;
		size_t curSize = size();

		if (newsize < curSize)
		{
			//leave the OpenCL memory for now
		} else
		{
			if (newsize > size())
			{
				result = reserve(newsize,copyOldContents);
			}

			//leave new data uninitialized (init in debug mode?)
			//for (size_t i=curSize;i<newsize;i++) ...
		}

		if (result)
		{
			m_size = newsize;
		} else
		{
			m_size = 0;
		}
		return result;
	}

	B3_FORCE_INLINE size_t size() const
	{
		return m_size;
	}

	B3_FORCE_INLINE	size_t capacity() const
	{
		return m_capacity;
	}

	B3_FORCE_INLINE	bool reserve(size_t _Count, bool copyOldContents=true)
	{
		bool result=true;
		// determine new minimum length of allocated storage
		if (capacity() < _Count)
		{	// not enough room, reallocate

			if (m_allowGrowingCapacity)
			{
				cl_int ciErrNum;
				//create a new OpenCL buffer
				size_t memSizeInBytes = sizeof(T)*_Count;
				cl_mem buf = clCreateBuffer(m_clContext, CL_MEM_READ_WRITE, memSizeInBytes, NULL, &ciErrNum);
				if (ciErrNum!=CL_SUCCESS)
				{
					b3Error("OpenCL out-of-memory\n");
					_Count = 0;
					result = false;
				}
//#define B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
#ifdef B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS
				unsigned char* src = (unsigned char*)malloc(memSizeInBytes);
				for (size_t i=0;i<memSizeInBytes;i++)
					src[i] = 0xbb;
				ciErrNum = clEnqueueWriteBuffer( m_commandQueue, buf, CL_TRUE, 0, memSizeInBytes, src, 0,0,0 );
				b3Assert(ciErrNum==CL_SUCCESS);
				clFinish(m_commandQueue);
				free(src);
#endif //B3_ALWAYS_INITIALIZE_OPENCL_BUFFERS

				if (result)
				{
					if (copyOldContents)
						copyToCL(buf, size());
				}

				//deallocate the old buffer
				deallocate();

				m_clBuffer = buf;

				m_capacity = _Count;
			} else
			{
				//fail: assert and
				b3Assert(0);
				deallocate();
				result=false;
			}
		}
		return result;
	}


	void copyToCL(cl_mem destination, size_t numElements, size_t firstElem=0, size_t dstOffsetInElems=0) const
	{
		if (numElements<=0)
			return;

		b3Assert(m_clBuffer);
		b3Assert(destination);

		//likely some error, destination is same as source
		b3Assert(m_clBuffer != destination);

		b3Assert((firstElem+numElements)<=m_size);

		cl_int status = 0;


		b3Assert(numElements>0);
		b3Assert(numElements<=m_size);

		size_t srcOffsetBytes = sizeof(T)*firstElem;
		size_t dstOffsetInBytes = sizeof(T)*dstOffsetInElems;

		status = clEnqueueCopyBuffer( m_commandQueue, m_clBuffer, destination,
			srcOffsetBytes, dstOffsetInBytes, sizeof(T)*numElements, 0, 0, 0 );

		b3Assert( status == CL_SUCCESS );
	}

	void copyFromHost(const b3AlignedObjectArray<T>& srcArray, bool waitForCompletion=true)
	{
		size_t newSize = srcArray.size();

		bool copyOldContents = false;
		resize (newSize,copyOldContents);
		if (newSize)
			copyFromHostPointer(&srcArray[0],newSize,0,waitForCompletion);

	}

	void copyFromHostPointer(const T* src, size_t numElems, size_t destFirstElem= 0, bool waitForCompletion=true)
	{
		b3Assert(numElems+destFirstElem <= capacity());

		if (numElems+destFirstElem)
		{
			cl_int status = 0;
			size_t sizeInBytes=sizeof(T)*numElems;
			status = clEnqueueWriteBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*destFirstElem, sizeInBytes,
			src, 0,0,0 );
			b3Assert(status == CL_SUCCESS );
			if (waitForCompletion)
				clFinish(m_commandQueue);
		} else
		{
			b3Error("copyFromHostPointer invalid range\n");
		}
	}


	void copyToHost(b3AlignedObjectArray<T>& destArray, bool waitForCompletion=true) const
	{
		destArray.resize(this->size());
		if (size())
			copyToHostPointer(&destArray[0], size(),0,waitForCompletion);
	}

	void copyToHostPointer(T* destPtr, size_t numElem, size_t srcFirstElem=0, bool waitForCompletion=true) const
	{
		b3Assert(numElem+srcFirstElem <= capacity());

		if(numElem+srcFirstElem <= capacity())
		{
			cl_int status = 0;
			status = clEnqueueReadBuffer( m_commandQueue, m_clBuffer, 0, sizeof(T)*srcFirstElem, sizeof(T)*numElem,
			destPtr, 0,0,0 );
			b3Assert( status==CL_SUCCESS );

			if (waitForCompletion)
				clFinish(m_commandQueue);
		} else
		{
			b3Error("copyToHostPointer invalid range\n");
		}
	}

	void copyFromOpenCLArray(const b3OpenCLArray& src)
	{
		size_t newSize = src.size();
		resize(newSize);
		if (size())
		{
			src.copyToCL(m_clBuffer,size());
		}
	}

};


#endif //B3_OPENCL_ARRAY_H