summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h103
1 files changed, 46 insertions, 57 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h
index 975bd80e53..69caf182d7 100644
--- a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h
+++ b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h
@@ -6,90 +6,79 @@
struct b3SortData
{
- union
- {
+ union {
unsigned int m_key;
unsigned int x;
};
- union
- {
+ union {
unsigned int m_value;
unsigned int y;
-
};
};
#include "b3BufferInfoCL.h"
-class b3RadixSort32CL
+class b3RadixSort32CL
{
+ b3OpenCLArray<unsigned int>* m_workBuffer1;
+ b3OpenCLArray<unsigned int>* m_workBuffer2;
- b3OpenCLArray<unsigned int>* m_workBuffer1;
- b3OpenCLArray<unsigned int>* m_workBuffer2;
-
- b3OpenCLArray<b3SortData>* m_workBuffer3;
- b3OpenCLArray<b3SortData>* m_workBuffer4;
-
- b3OpenCLArray<unsigned int>* m_workBuffer3a;
- b3OpenCLArray<unsigned int>* m_workBuffer4a;
+ b3OpenCLArray<b3SortData>* m_workBuffer3;
+ b3OpenCLArray<b3SortData>* m_workBuffer4;
- cl_command_queue m_commandQueue;
+ b3OpenCLArray<unsigned int>* m_workBuffer3a;
+ b3OpenCLArray<unsigned int>* m_workBuffer4a;
- cl_kernel m_streamCountSortDataKernel;
- cl_kernel m_streamCountKernel;
+ cl_command_queue m_commandQueue;
- cl_kernel m_prefixScanKernel;
- cl_kernel m_sortAndScatterSortDataKernel;
- cl_kernel m_sortAndScatterKernel;
+ cl_kernel m_streamCountSortDataKernel;
+ cl_kernel m_streamCountKernel;
+ cl_kernel m_prefixScanKernel;
+ cl_kernel m_sortAndScatterSortDataKernel;
+ cl_kernel m_sortAndScatterKernel;
- bool m_deviceCPU;
+ bool m_deviceCPU;
- class b3PrefixScanCL* m_scan;
- class b3FillCL* m_fill;
+ class b3PrefixScanCL* m_scan;
+ class b3FillCL* m_fill;
public:
struct b3ConstData
- {
- int m_n;
- int m_nWGs;
- int m_startBit;
- int m_nBlocksPerWG;
- };
+ {
+ int m_n;
+ int m_nWGs;
+ int m_startBit;
+ int m_nBlocksPerWG;
+ };
enum
- {
- DATA_ALIGNMENT = 256,
- WG_SIZE = 64,
- BLOCK_SIZE = 256,
- ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE),
- BITS_PER_PASS = 4,
- NUM_BUCKET=(1<<BITS_PER_PASS),
- // if you change this, change nPerWI in kernel as well
- NUM_WGS = 20*6, // cypress
-// NUM_WGS = 24*6, // cayman
-// NUM_WGS = 32*4, // nv
- };
-
+ {
+ DATA_ALIGNMENT = 256,
+ WG_SIZE = 64,
+ BLOCK_SIZE = 256,
+ ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE / WG_SIZE),
+ BITS_PER_PASS = 4,
+ NUM_BUCKET = (1 << BITS_PER_PASS),
+ // if you change this, change nPerWI in kernel as well
+ NUM_WGS = 20 * 6, // cypress
+ // NUM_WGS = 24*6, // cayman
+ // NUM_WGS = 32*4, // nv
+ };
private:
-
-
public:
+ b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity = 0);
- b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0);
+ virtual ~b3RadixSort32CL();
- virtual ~b3RadixSort32CL();
+ void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
+ b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
- void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn,
- b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32);
-
- ///keys only
- void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32 );
-
- void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32 );
- void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
- void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
+ ///keys only
+ void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32);
+ void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
+ void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32);
+ void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32);
};
-#endif //B3_RADIXSORT32_H
-
+#endif //B3_RADIXSORT32_H