1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#ifndef B3_LAUNCHER_CL_H
#define B3_LAUNCHER_CL_H
#include "b3BufferInfoCL.h"
#include "Bullet3Common/b3MinMax.h"
#include "b3OpenCLArray.h"
#include <stdio.h>
#define B3_DEBUG_SERIALIZE_CL
#ifdef _WIN32
#pragma warning(disable : 4996)
#endif
#define B3_CL_MAX_ARG_SIZE 16
B3_ATTRIBUTE_ALIGNED16(struct)
b3KernelArgData
{
int m_isBuffer;
int m_argIndex;
int m_argSizeInBytes;
int m_unusedPadding;
union {
cl_mem m_clBuffer;
unsigned char m_argData[B3_CL_MAX_ARG_SIZE];
};
};
class b3LauncherCL
{
cl_command_queue m_commandQueue;
cl_kernel m_kernel;
int m_idx;
b3AlignedObjectArray<b3KernelArgData> m_kernelArguments;
int m_serializationSizeInBytes;
bool m_enableSerialization;
const char* m_name;
public:
b3AlignedObjectArray<b3OpenCLArray<unsigned char>*> m_arrays;
b3LauncherCL(cl_command_queue queue, cl_kernel kernel, const char* name);
virtual ~b3LauncherCL();
void setBuffer(cl_mem clBuffer);
void setBuffers(b3BufferInfoCL* buffInfo, int n);
int getSerializationBufferSize() const
{
return m_serializationSizeInBytes;
}
int deserializeArgs(unsigned char* buf, int bufSize, cl_context ctx);
inline int validateResults(unsigned char* goldBuffer, int goldBufferCapacity, cl_context ctx);
int serializeArguments(unsigned char* destBuffer, int destBufferCapacity);
int getNumArguments() const
{
return m_kernelArguments.size();
}
b3KernelArgData getArgument(int index)
{
return m_kernelArguments[index];
}
void serializeToFile(const char* fileName, int numWorkItems);
template <typename T>
inline void setConst(const T& consts)
{
int sz = sizeof(T);
b3Assert(sz <= B3_CL_MAX_ARG_SIZE);
if (m_enableSerialization)
{
b3KernelArgData kernelArg;
kernelArg.m_argIndex = m_idx;
kernelArg.m_isBuffer = 0;
T* destArg = (T*)kernelArg.m_argData;
*destArg = consts;
kernelArg.m_argSizeInBytes = sizeof(T);
m_kernelArguments.push_back(kernelArg);
m_serializationSizeInBytes += sizeof(b3KernelArgData);
}
cl_int status = clSetKernelArg(m_kernel, m_idx++, sz, &consts);
b3Assert(status == CL_SUCCESS);
}
inline void launch1D(int numThreads, int localSize = 64)
{
launch2D(numThreads, 1, localSize, 1);
}
inline void launch2D(int numThreadsX, int numThreadsY, int localSizeX, int localSizeY)
{
size_t gRange[3] = {1, 1, 1};
size_t lRange[3] = {1, 1, 1};
lRange[0] = localSizeX;
lRange[1] = localSizeY;
gRange[0] = b3Max((size_t)1, (numThreadsX / lRange[0]) + (!(numThreadsX % lRange[0]) ? 0 : 1));
gRange[0] *= lRange[0];
gRange[1] = b3Max((size_t)1, (numThreadsY / lRange[1]) + (!(numThreadsY % lRange[1]) ? 0 : 1));
gRange[1] *= lRange[1];
cl_int status = clEnqueueNDRangeKernel(m_commandQueue,
m_kernel, 2, NULL, gRange, lRange, 0, 0, 0);
if (status != CL_SUCCESS)
{
printf("Error: OpenCL status = %d\n", status);
}
b3Assert(status == CL_SUCCESS);
}
void enableSerialization(bool serialize)
{
m_enableSerialization = serialize;
}
};
#endif //B3_LAUNCHER_CL_H
|