summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/src/Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h
blob: 1b267b31ef9765cfd17cddf5812312bdd824da8b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135

#ifndef B3_LAUNCHER_CL_H
#define B3_LAUNCHER_CL_H

#include "b3BufferInfoCL.h"
#include "Bullet3Common/b3MinMax.h"
#include "b3OpenCLArray.h"
#include <stdio.h>

#define B3_DEBUG_SERIALIZE_CL


#ifdef _WIN32
#pragma warning(disable :4996)
#endif
#define B3_CL_MAX_ARG_SIZE 16
B3_ATTRIBUTE_ALIGNED16(struct) b3KernelArgData
{
    int m_isBuffer;
    int m_argIndex;
    int m_argSizeInBytes;
	int m_unusedPadding;
    union
    {
        cl_mem m_clBuffer;
        unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
    };
    
};

class b3LauncherCL
{

	cl_command_queue m_commandQueue;
	cl_kernel m_kernel;
	int m_idx;
	
    b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
    int m_serializationSizeInBytes;
	bool	m_enableSerialization;

	const char* m_name;
	public:

     b3AlignedObjectArray<b3OpenCLArray<unsigned char>* > m_arrays;
    
		b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
    
    virtual ~b3LauncherCL();
    
		void setBuffer( cl_mem clBuffer);

		void setBuffers( b3BufferInfoCL* buffInfo, int n );
    
    int getSerializationBufferSize() const 
    {
        return m_serializationSizeInBytes;
    }
    
    int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);

	inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);

    int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
    
	int getNumArguments() const
	{
		return m_kernelArguments.size();
	}

	b3KernelArgData getArgument(int index)
	{
		return m_kernelArguments[index];
	}

	void serializeToFile(const char* fileName, int numWorkItems);

	template<typename T>
		inline void setConst( const T& consts )
		{
			int sz=sizeof(T);
			b3Assert(sz<=B3_CL_MAX_ARG_SIZE);

			if (m_enableSerialization)
			{
				b3KernelArgData kernelArg;
				kernelArg.m_argIndex = m_idx;
				kernelArg.m_isBuffer = 0;
				T* destArg = (T*)kernelArg.m_argData;
				*destArg = consts;
				kernelArg.m_argSizeInBytes = sizeof(T);
				m_kernelArguments.push_back(kernelArg);
				m_serializationSizeInBytes+=sizeof(b3KernelArgData);
			}
            
			cl_int status = clSetKernelArg( m_kernel, m_idx++, sz, &consts );
			b3Assert( status == CL_SUCCESS );
		}

		inline void launch1D( int numThreads, int localSize = 64)
		{
			launch2D( numThreads, 1, localSize, 1 );
		}

		inline void launch2D( int numThreadsX, int numThreadsY, int localSizeX, int localSizeY )
		{
			size_t gRange[3] = {1,1,1};
			size_t lRange[3] = {1,1,1};
			lRange[0] = localSizeX;
			lRange[1] = localSizeY;
			gRange[0] = b3Max((size_t)1, (numThreadsX/lRange[0])+(!(numThreadsX%lRange[0])?0:1));
			gRange[0] *= lRange[0];
			gRange[1] = b3Max((size_t)1, (numThreadsY/lRange[1])+(!(numThreadsY%lRange[1])?0:1));
			gRange[1] *= lRange[1];

			cl_int status = clEnqueueNDRangeKernel( m_commandQueue, 
				m_kernel, 2, NULL, gRange, lRange, 0,0,0 );
            if (status != CL_SUCCESS)
            {
                printf("Error: OpenCL status = %d\n",status);
            }
			b3Assert( status == CL_SUCCESS );

		}
	
		void	enableSerialization(bool serialize)
		{
			m_enableSerialization = serialize;
		}
		
};



#endif //B3_LAUNCHER_CL_H