summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h151
1 files changed, 151 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h
new file mode 100644
index 0000000000..8d36ac78f2
--- /dev/null
+++ b/thirdparty/bullet/Bullet3OpenCL/BroadphaseCollision/b3GpuSapBroadphase.h
@@ -0,0 +1,151 @@
+#ifndef B3_GPU_SAP_BROADPHASE_H
+#define B3_GPU_SAP_BROADPHASE_H
+
+#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
+#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" //b3Int2
+class b3Vector3;
+#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h"
+
+#include "b3SapAabb.h"
+#include "Bullet3Common/shared/b3Int2.h"
+
+#include "b3GpuBroadphaseInterface.h"
+
+
+class b3GpuSapBroadphase : public b3GpuBroadphaseInterface
+{
+
+ cl_context m_context;
+ cl_device_id m_device;
+ cl_command_queue m_queue;
+ cl_kernel m_flipFloatKernel;
+ cl_kernel m_scatterKernel ;
+ cl_kernel m_copyAabbsKernel;
+ cl_kernel m_sapKernel;
+ cl_kernel m_sap2Kernel;
+ cl_kernel m_prepareSumVarianceKernel;
+
+
+ class b3RadixSort32CL* m_sorter;
+
+ ///test for 3d SAP
+ b3AlignedObjectArray<b3SortData> m_sortedAxisCPU[3][2];
+ b3AlignedObjectArray<b3UnsignedInt2> m_objectMinMaxIndexCPU[3][2];
+ b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0;
+ b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1;
+ b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2;
+ b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis0prev;
+ b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis1prev;
+ b3OpenCLArray<b3UnsignedInt2> m_objectMinMaxIndexGPUaxis2prev;
+
+ b3OpenCLArray<b3SortData> m_sortedAxisGPU0;
+ b3OpenCLArray<b3SortData> m_sortedAxisGPU1;
+ b3OpenCLArray<b3SortData> m_sortedAxisGPU2;
+ b3OpenCLArray<b3SortData> m_sortedAxisGPU0prev;
+ b3OpenCLArray<b3SortData> m_sortedAxisGPU1prev;
+ b3OpenCLArray<b3SortData> m_sortedAxisGPU2prev;
+
+
+ b3OpenCLArray<b3Int4> m_addedHostPairsGPU;
+ b3OpenCLArray<b3Int4> m_removedHostPairsGPU;
+ b3OpenCLArray<int> m_addedCountGPU;
+ b3OpenCLArray<int> m_removedCountGPU;
+
+ int m_currentBuffer;
+
+public:
+
+ b3OpenCLArray<int> m_pairCount;
+
+
+ b3OpenCLArray<b3SapAabb> m_allAabbsGPU;
+ b3AlignedObjectArray<b3SapAabb> m_allAabbsCPU;
+
+ virtual b3OpenCLArray<b3SapAabb>& getAllAabbsGPU()
+ {
+ return m_allAabbsGPU;
+ }
+ virtual b3AlignedObjectArray<b3SapAabb>& getAllAabbsCPU()
+ {
+ return m_allAabbsCPU;
+ }
+
+ b3OpenCLArray<b3Vector3> m_sum;
+ b3OpenCLArray<b3Vector3> m_sum2;
+ b3OpenCLArray<b3Vector3> m_dst;
+
+ b3OpenCLArray<int> m_smallAabbsMappingGPU;
+ b3AlignedObjectArray<int> m_smallAabbsMappingCPU;
+
+ b3OpenCLArray<int> m_largeAabbsMappingGPU;
+ b3AlignedObjectArray<int> m_largeAabbsMappingCPU;
+
+
+ b3OpenCLArray<b3Int4> m_overlappingPairs;
+
+ //temporary gpu work memory
+ b3OpenCLArray<b3SortData> m_gpuSmallSortData;
+ b3OpenCLArray<b3SapAabb> m_gpuSmallSortedAabbs;
+
+ class b3PrefixScanFloat4CL* m_prefixScanFloat4;
+
+ enum b3GpuSapKernelType
+ {
+ B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU=1,
+ B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU,
+ B3_GPU_SAP_KERNEL_ORIGINAL,
+ B3_GPU_SAP_KERNEL_BARRIER,
+ B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY
+ };
+
+ b3GpuSapBroadphase(cl_context ctx,cl_device_id device, cl_command_queue q , b3GpuSapKernelType kernelType=B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
+ virtual ~b3GpuSapBroadphase();
+
+ static b3GpuBroadphaseInterface* CreateFuncBruteForceCpu(cl_context ctx,cl_device_id device, cl_command_queue q)
+ {
+ return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_CPU);
+ }
+
+ static b3GpuBroadphaseInterface* CreateFuncBruteForceGpu(cl_context ctx,cl_device_id device, cl_command_queue q)
+ {
+ return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BRUTE_FORCE_GPU);
+ }
+
+ static b3GpuBroadphaseInterface* CreateFuncOriginal(cl_context ctx,cl_device_id device, cl_command_queue q)
+ {
+ return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_ORIGINAL);
+ }
+ static b3GpuBroadphaseInterface* CreateFuncBarrier(cl_context ctx,cl_device_id device, cl_command_queue q)
+ {
+ return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_BARRIER);
+ }
+ static b3GpuBroadphaseInterface* CreateFuncLocalMemory(cl_context ctx,cl_device_id device, cl_command_queue q)
+ {
+ return new b3GpuSapBroadphase(ctx,device,q,B3_GPU_SAP_KERNEL_LOCAL_SHARED_MEMORY);
+ }
+
+
+ virtual void calculateOverlappingPairs(int maxPairs);
+ virtual void calculateOverlappingPairsHost(int maxPairs);
+
+ void reset();
+
+ void init3dSap();
+ virtual void calculateOverlappingPairsHostIncremental3Sap();
+
+ virtual void createProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask);
+ virtual void createLargeProxy(const b3Vector3& aabbMin, const b3Vector3& aabbMax, int userPtr , int collisionFilterGroup, int collisionFilterMask);
+
+ //call writeAabbsToGpu after done making all changes (createProxy etc)
+ virtual void writeAabbsToGpu();
+
+ virtual cl_mem getAabbBufferWS();
+ virtual int getNumOverlap();
+ virtual cl_mem getOverlappingPairBuffer();
+
+ virtual b3OpenCLArray<b3Int4>& getOverlappingPairsGPU();
+ virtual b3OpenCLArray<int>& getSmallAabbIndicesGPU();
+ virtual b3OpenCLArray<int>& getLargeAabbIndicesGPU();
+};
+
+#endif //B3_GPU_SAP_BROADPHASE_H \ No newline at end of file