diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h | 95 |
1 files changed, 95 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h new file mode 100644 index 0000000000..975bd80e53 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h @@ -0,0 +1,95 @@ + +#ifndef B3_RADIXSORT32_H +#define B3_RADIXSORT32_H + +#include "b3OpenCLArray.h" + +struct b3SortData +{ + union + { + unsigned int m_key; + unsigned int x; + }; + + union + { + unsigned int m_value; + unsigned int y; + + }; +}; +#include "b3BufferInfoCL.h" + +class b3RadixSort32CL +{ + + b3OpenCLArray<unsigned int>* m_workBuffer1; + b3OpenCLArray<unsigned int>* m_workBuffer2; + + b3OpenCLArray<b3SortData>* m_workBuffer3; + b3OpenCLArray<b3SortData>* m_workBuffer4; + + b3OpenCLArray<unsigned int>* m_workBuffer3a; + b3OpenCLArray<unsigned int>* m_workBuffer4a; + + cl_command_queue m_commandQueue; + + cl_kernel m_streamCountSortDataKernel; + cl_kernel m_streamCountKernel; + + cl_kernel m_prefixScanKernel; + cl_kernel m_sortAndScatterSortDataKernel; + cl_kernel m_sortAndScatterKernel; + + + bool m_deviceCPU; + + class b3PrefixScanCL* m_scan; + class b3FillCL* m_fill; + +public: + struct b3ConstData + { + int m_n; + int m_nWGs; + int m_startBit; + int m_nBlocksPerWG; + }; + enum + { + DATA_ALIGNMENT = 256, + WG_SIZE = 64, + BLOCK_SIZE = 256, + ELEMENTS_PER_WORK_ITEM = (BLOCK_SIZE/WG_SIZE), + BITS_PER_PASS = 4, + NUM_BUCKET=(1<<BITS_PER_PASS), + // if you change this, change nPerWI in kernel as well + NUM_WGS = 20*6, // cypress +// NUM_WGS = 24*6, // cayman +// NUM_WGS = 32*4, // nv + }; + + +private: + + +public: + + b3RadixSort32CL(cl_context ctx, cl_device_id device, cl_command_queue queue, int initialCapacity =0); + + virtual ~b3RadixSort32CL(); + + void execute(b3OpenCLArray<unsigned int>& keysIn, b3OpenCLArray<unsigned int>& keysOut, b3OpenCLArray<unsigned int>& valuesIn, + b3OpenCLArray<unsigned int>& valuesOut, int n, int sortBits = 32); + + ///keys only + void execute(b3OpenCLArray<unsigned int>& keysInOut, int sortBits = 32 ); + + void execute(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32 ); + void executeHost(b3OpenCLArray<b3SortData>& keyValuesInOut, int sortBits = 32); + void executeHost(b3AlignedObjectArray<b3SortData>& keyValuesInOut, int sortBits = 32); + +}; +#endif //B3_RADIXSORT32_H + |