diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/Raycast')
4 files changed, 1243 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp new file mode 100644 index 0000000000..161e304f09 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp @@ -0,0 +1,391 @@ + +#include "b3GpuRaycast.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" +#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h" + + +#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3FillCL.h" +#include "Bullet3OpenCL/ParallelPrimitives/b3RadixSort32CL.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuBroadphaseInterface.h" +#include "Bullet3OpenCL/BroadphaseCollision/b3GpuParallelLinearBvh.h" + +#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h" + + +#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl" + + + +struct b3GpuRaycastInternalData +{ + cl_context m_context; + cl_device_id m_device; + cl_command_queue m_q; + cl_kernel m_raytraceKernel; + cl_kernel m_raytracePairsKernel; + cl_kernel m_findRayRigidPairIndexRanges; + + b3GpuParallelLinearBvh* m_plbvh; + b3RadixSort32CL* m_radixSorter; + b3FillCL* m_fill; + + //1 element per ray + b3OpenCLArray<b3RayInfo>* m_gpuRays; + b3OpenCLArray<b3RayHit>* m_gpuHitResults; + b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay; + b3OpenCLArray<int>* m_numRayRigidPairsPerRay; + + //1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB + b3OpenCLArray<int>* m_gpuNumRayRigidPairs; + b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index + + int m_test; +}; + +b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q) +{ + m_data = new b3GpuRaycastInternalData; + m_data->m_context = ctx; + m_data->m_device = device; + m_data->m_q = q; + m_data->m_raytraceKernel = 0; + m_data->m_raytracePairsKernel = 0; + m_data->m_findRayRigidPairIndexRanges = 0; + + m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q); + m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q); + m_data->m_fill = new b3FillCL(ctx, device, q); + + m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q); + m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q); + m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q); + m_data->m_numRayRigidPairsPerRay = new b3OpenCLArray<int>(ctx, q); + m_data->m_gpuNumRayRigidPairs = new b3OpenCLArray<int>(ctx, q); + m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q); + + { + cl_int errNum=0; + cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH); + b3Assert(errNum==CL_SUCCESS); + m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); + m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); + m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog); + b3Assert(errNum==CL_SUCCESS); + clReleaseProgram(prog); + } + + +} + +b3GpuRaycast::~b3GpuRaycast() +{ + clReleaseKernel(m_data->m_raytraceKernel); + clReleaseKernel(m_data->m_raytracePairsKernel); + clReleaseKernel(m_data->m_findRayRigidPairIndexRanges); + + delete m_data->m_plbvh; + delete m_data->m_radixSorter; + delete m_data->m_fill; + + delete m_data->m_gpuRays; + delete m_data->m_gpuHitResults; + delete m_data->m_firstRayRigidPairIndexPerRay; + delete m_data->m_numRayRigidPairsPerRay; + delete m_data->m_gpuNumRayRigidPairs; + delete m_data->m_gpuRayRigidPairs; + + delete m_data; +} + +bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction) +{ + b3Vector3 rs = rayFrom - spherePos; + b3Vector3 rayDir = rayTo-rayFrom; + + float A = b3Dot(rayDir,rayDir); + float B = b3Dot(rs, rayDir); + float C = b3Dot(rs, rs) - (radius * radius); + + float D = B * B - A*C; + + if (D > 0.0) + { + float t = (-B - sqrt(D))/A; + + if ( (t >= 0.0f) && (t < hitFraction) ) + { + hitFraction = t; + return true; + } + } + return false; +} + +bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly, + const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal) +{ + float exitFraction = hitFraction; + float enterFraction = -0.1f; + b3Vector3 curHitNormal=b3MakeVector3(0,0,0); + for (int i=0;i<poly.m_numFaces;i++) + { + const b3GpuFace& face = faces[poly.m_faceOffset+i]; + float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w; + float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w; + if (fromPlaneDist<0.f) + { + if (toPlaneDist >= 0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (exitFraction>fraction) + { + exitFraction = fraction; + } + } + } else + { + if (toPlaneDist<0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (enterFraction <= fraction) + { + enterFraction = fraction; + curHitNormal = face.m_plane; + curHitNormal.w = 0.f; + } + } else + { + return false; + } + } + if (exitFraction <= enterFraction) + return false; + } + + if (enterFraction < 0.f) + return false; + + hitFraction = enterFraction; + hitNormal = curHitNormal; + return true; +} + +void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, + int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData) +{ + +// return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables); + + B3_PROFILE("castRaysHost"); + for (int r=0;r<rays.size();r++) + { + b3Vector3 rayFrom = rays[r].m_from; + b3Vector3 rayTo = rays[r].m_to; + float hitFraction = hitResults[r].m_hitFraction; + + int hitBodyIndex= -1; + b3Vector3 hitNormal; + + for (int b=0;b<numBodies;b++) + { + + const b3Vector3& pos = bodies[b].m_pos; + //const b3Quaternion& orn = bodies[b].m_quat; + + switch (collidables[bodies[b].m_collidableIdx].m_shapeType) + { + case SHAPE_SPHERE: + { + b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius; + if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction)) + { + hitBodyIndex = b; + b3Vector3 hitPoint; + hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); + hitNormal = (hitPoint-bodies[b].m_pos).normalize(); + } + } + case SHAPE_CONVEX_HULL: + { + + b3Transform convexWorldTransform; + convexWorldTransform.setIdentity(); + convexWorldTransform.setOrigin(bodies[b].m_pos); + convexWorldTransform.setRotation(bodies[b].m_quat); + b3Transform convexWorld2Local = convexWorldTransform.inverse(); + + b3Vector3 rayFromLocal = convexWorld2Local(rayFrom); + b3Vector3 rayToLocal = convexWorld2Local(rayTo); + + + int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex; + const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex]; + if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal)) + { + hitBodyIndex = b; + } + + + break; + } + default: + { + static bool once=true; + if (once) + { + once=false; + b3Warning("Raytest: unsupported shape type\n"); + } + } + } + } + if (hitBodyIndex>=0) + { + + hitResults[r].m_hitFraction = hitFraction; + hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction); + hitResults[r].m_hitNormal = hitNormal; + hitResults[r].m_hitBody = hitBodyIndex; + } + + } +} +///todo: add some acceleration structure (AABBs, tree etc) +void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, + int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase) +{ + //castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData); + + B3_PROFILE("castRaysGPU"); + + { + B3_PROFILE("raycast copyFromHost"); + m_data->m_gpuRays->copyFromHost(rays); + m_data->m_gpuHitResults->copyFromHost(hitResults); + + } + + int numRays = hitResults.size(); + { + m_data->m_firstRayRigidPairIndexPerRay->resize(numRays); + m_data->m_numRayRigidPairsPerRay->resize(numRays); + + m_data->m_gpuNumRayRigidPairs->resize(1); + m_data->m_gpuRayRigidPairs->resize(numRays * 16); + } + + //run kernel + const bool USE_BRUTE_FORCE_RAYCAST = false; + if(USE_BRUTE_FORCE_RAYCAST) + { + B3_PROFILE("raycast launch1D"); + + b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel"); + int numRays = rays.size(); + launcher.setConst(numRays); + + launcher.setBuffer(m_data->m_gpuRays->getBufferCL()); + launcher.setBuffer(m_data->m_gpuHitResults->getBufferCL()); + + launcher.setConst(numBodies); + launcher.setBuffer(narrowphaseData->m_bodyBufferGPU->getBufferCL()); + launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL()); + launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL()); + launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()); + + launcher.launch1D(numRays); + clFinish(m_data->m_q); + } + else + { + m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() ); + + m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs); + + int numRayRigidPairs = -1; + m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1); + if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() ) + { + numRayRigidPairs = m_data->m_gpuRayRigidPairs->size(); + m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1); + } + + m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct + + //Sort ray-rigid pairs by ray index + { + B3_PROFILE("sort ray-rigid pairs"); + m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) ); + } + + //detect start,count of each ray pair + { + B3_PROFILE("detect ray-rigid pair index ranges"); + + { + B3_PROFILE("reset ray-rigid pair index ranges"); + + m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index + m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays); + clFinish(m_data->m_q); + } + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ), + + b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), + b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ) + }; + + b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numRayRigidPairs); + + launcher.launch1D(numRayRigidPairs); + clFinish(m_data->m_q); + } + + { + B3_PROFILE("ray-rigid intersection"); + + b3BufferInfoCL bufferInfo[] = + { + b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ), + b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ), + b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ), + b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ), + + b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ), + b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ), + b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ), + b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ), + + b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ) + }; + + b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel"); + launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) ); + launcher.setConst(numRays); + + launcher.launch1D(numRays); + clFinish(m_data->m_q); + } + } + + + + //copy results + { + B3_PROFILE("raycast copyToHost"); + m_data->m_gpuHitResults->copyToHost(hitResults); + } + +}
\ No newline at end of file diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h new file mode 100644 index 0000000000..3a5cf44b79 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h @@ -0,0 +1,32 @@ +#ifndef B3_GPU_RAYCAST_H +#define B3_GPU_RAYCAST_H + +#include "Bullet3Common/b3Vector3.h" +#include "Bullet3OpenCL/Initialize/b3OpenCLInclude.h" + +#include "Bullet3Common/b3AlignedObjectArray.h" +#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h" + + + +class b3GpuRaycast +{ +protected: + struct b3GpuRaycastInternalData* m_data; +public: + b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q); + virtual ~b3GpuRaycast(); + + void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults, + int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData); + + void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults, + int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, + const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase); + + + +}; + +#endif //B3_GPU_RAYCAST_H diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl new file mode 100644 index 0000000000..e72d96876b --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl @@ -0,0 +1,439 @@ + +#define SHAPE_CONVEX_HULL 3 +#define SHAPE_PLANE 4 +#define SHAPE_CONCAVE_TRIMESH 5 +#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6 +#define SHAPE_SPHERE 7 + + +typedef struct +{ + float4 m_from; + float4 m_to; +} b3RayInfo; + +typedef struct +{ + float m_hitFraction; + int m_hitResult0; + int m_hitResult1; + int m_hitResult2; + float4 m_hitPoint; + float4 m_hitNormal; +} b3RayHit; + +typedef struct +{ + float4 m_pos; + float4 m_quat; + float4 m_linVel; + float4 m_angVel; + + unsigned int m_collidableIdx; + float m_invMass; + float m_restituitionCoeff; + float m_frictionCoeff; +} Body; + +typedef struct Collidable +{ + union { + int m_numChildShapes; + int m_bvhIndex; + }; + float m_radius; + int m_shapeType; + int m_shapeIndex; +} Collidable; + + +typedef struct +{ + float4 m_localCenter; + float4 m_extents; + float4 mC; + float4 mE; + + float m_radius; + int m_faceOffset; + int m_numFaces; + int m_numVertices; + + int m_vertexOffset; + int m_uniqueEdgesOffset; + int m_numUniqueEdges; + int m_unused; + +} ConvexPolyhedronCL; + +typedef struct +{ + float4 m_plane; + int m_indexOffset; + int m_numIndices; +} b3GpuFace; + + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline + Quaternion qtMul(Quaternion a, Quaternion b); + +__inline + Quaternion qtNormalize(Quaternion in); + + +__inline + Quaternion qtInvert(Quaternion q); + + +__inline + float dot3F4(float4 a, float4 b) +{ + float4 a1 = (float4)(a.xyz,0.f); + float4 b1 = (float4)(b.xyz,0.f); + return dot(a1, b1); +} + + +__inline + Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross( a, b ); + ans += a.w*b+b.w*a; + // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline + Quaternion qtNormalize(Quaternion in) +{ + return fast_normalize(in); + // in /= length( in ); + // return in; +} +__inline + float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(q,vcpy); + out = qtMul(out,qInv); + return out; +} + +__inline + Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline + float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + + + +void trInverse(float4 translationIn, Quaternion orientationIn, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtInvert(orientationIn); + *translationOut = qtRotate(*orientationOut, -translationIn); +} + + + + + +bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset, + __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal) +{ + rayFromLocal.w = 0.f; + rayToLocal.w = 0.f; + bool result = true; + + float exitFraction = hitFraction[0]; + float enterFraction = -0.3f; + float4 curHitNormal = (float4)(0,0,0,0); + for (int i=0;i<numFaces && result;i++) + { + b3GpuFace face = faces[faceOffset+i]; + float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w; + float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w; + if (fromPlaneDist<0.f) + { + if (toPlaneDist >= 0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (exitFraction>fraction) + { + exitFraction = fraction; + } + } + } else + { + if (toPlaneDist<0.f) + { + float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist); + if (enterFraction <= fraction) + { + enterFraction = fraction; + curHitNormal = face.m_plane; + curHitNormal.w = 0.f; + } + } else + { + result = false; + } + } + if (exitFraction <= enterFraction) + result = false; + } + + if (enterFraction < 0.f) + { + result = false; + } + + if (result) + { + hitFraction[0] = enterFraction; + hitNormal[0] = curHitNormal; + } + return result; +} + + + + + + +bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction) +{ + float4 rs = rayFrom - spherePos; + rs.w = 0.f; + float4 rayDir = rayTo-rayFrom; + rayDir.w = 0.f; + float A = dot(rayDir,rayDir); + float B = dot(rs, rayDir); + float C = dot(rs, rs) - (radius * radius); + + float D = B * B - A*C; + + if (D > 0.0f) + { + float t = (-B - sqrt(D))/A; + + if ( (t >= 0.0f) && (t < (*hitFraction)) ) + { + *hitFraction = t; + return true; + } + } + return false; +} + +float4 setInterpolate3(float4 from, float4 to, float t) +{ + float s = 1.0f - t; + float4 result; + result = s * from + t * to; + result.w = 0.f; + return result; +} + +__kernel void rayCastKernel( + int numRays, + const __global b3RayInfo* rays, + __global b3RayHit* hitResults, + const int numBodies, + __global Body* bodies, + __global Collidable* collidables, + __global const b3GpuFace* faces, + __global const ConvexPolyhedronCL* convexShapes ) +{ + + int i = get_global_id(0); + if (i>=numRays) + return; + + hitResults[i].m_hitFraction = 1.f; + + float4 rayFrom = rays[i].m_from; + float4 rayTo = rays[i].m_to; + float hitFraction = 1.f; + float4 hitPoint; + float4 hitNormal; + int hitBodyIndex= -1; + + int cachedCollidableIndex = -1; + Collidable cachedCollidable; + + for (int b=0;b<numBodies;b++) + { + if (hitResults[i].m_hitResult2==b) + continue; + Body body = bodies[b]; + float4 pos = body.m_pos; + float4 orn = body.m_quat; + if (cachedCollidableIndex != body.m_collidableIdx) + { + cachedCollidableIndex = body.m_collidableIdx; + cachedCollidable = collidables[cachedCollidableIndex]; + } + if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL) + { + + float4 invPos = (float4)(0,0,0,0); + float4 invOrn = (float4)(0,0,0,0); + float4 rayFromLocal = (float4)(0,0,0,0); + float4 rayToLocal = (float4)(0,0,0,0); + invOrn = qtInvert(orn); + invPos = qtRotate(invOrn, -pos); + rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; + rayToLocal = qtRotate( invOrn, rayTo) + invPos; + rayFromLocal.w = 0.f; + rayToLocal.w = 0.f; + int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces; + int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset; + if (numFaces) + { + if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) + { + hitBodyIndex = b; + + } + } + } + if (cachedCollidable.m_shapeType == SHAPE_SPHERE) + { + float radius = cachedCollidable.m_radius; + + if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) + { + hitBodyIndex = b; + hitNormal = (float4) (hitPoint-bodies[b].m_pos); + } + } + } + + if (hitBodyIndex>=0) + { + hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction); + hitResults[i].m_hitFraction = hitFraction; + hitResults[i].m_hitPoint = hitPoint; + hitResults[i].m_hitNormal = normalize(hitNormal); + hitResults[i].m_hitResult0 = hitBodyIndex; + } + +} + + +__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, + __global int* out_firstRayRigidPairIndexPerRay, + __global int* out_numRayRigidPairsPerRay, + int numRayRigidPairs) +{ + int rayRigidPairIndex = get_global_id(0); + if (rayRigidPairIndex >= numRayRigidPairs) return; + + int rayIndex = rayRigidPairs[rayRigidPairIndex].x; + + atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex); + atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]); +} + +__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, + __global b3RayHit* hitResults, + __global int* firstRayRigidPairIndexPerRay, + __global int* numRayRigidPairsPerRay, + + __global Body* bodies, + __global Collidable* collidables, + __global const b3GpuFace* faces, + __global const ConvexPolyhedronCL* convexShapes, + + __global int2* rayRigidPairs, + int numRays) +{ + int i = get_global_id(0); + if (i >= numRays) return; + + float4 rayFrom = rays[i].m_from; + float4 rayTo = rays[i].m_to; + + hitResults[i].m_hitFraction = 1.f; + + float hitFraction = 1.f; + float4 hitPoint; + float4 hitNormal; + int hitBodyIndex = -1; + + // + for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair) + { + int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i]; + int b = rayRigidPairs[rayRigidPairIndex].y; + + if (hitResults[i].m_hitResult2 == b) continue; + + Body body = bodies[b]; + Collidable rigidCollidable = collidables[body.m_collidableIdx]; + + float4 pos = body.m_pos; + float4 orn = body.m_quat; + + if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL) + { + float4 invPos = (float4)(0,0,0,0); + float4 invOrn = (float4)(0,0,0,0); + float4 rayFromLocal = (float4)(0,0,0,0); + float4 rayToLocal = (float4)(0,0,0,0); + invOrn = qtInvert(orn); + invPos = qtRotate(invOrn, -pos); + rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos; + rayToLocal = qtRotate( invOrn, rayTo) + invPos; + rayFromLocal.w = 0.f; + rayToLocal.w = 0.f; + int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces; + int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset; + + if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal)) + { + hitBodyIndex = b; + hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); + } + } + + if (rigidCollidable.m_shapeType == SHAPE_SPHERE) + { + float radius = rigidCollidable.m_radius; + + if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction)) + { + hitBodyIndex = b; + hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction); + hitNormal = (float4) (hitPoint - bodies[b].m_pos); + } + } + } + + if (hitBodyIndex >= 0) + { + hitResults[i].m_hitFraction = hitFraction; + hitResults[i].m_hitPoint = hitPoint; + hitResults[i].m_hitNormal = normalize(hitNormal); + hitResults[i].m_hitResult0 = hitBodyIndex; + } + +} diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h new file mode 100644 index 0000000000..6257909a4d --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h @@ -0,0 +1,381 @@ +//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project +static const char* rayCastKernelCL= \ +"#define SHAPE_CONVEX_HULL 3\n" +"#define SHAPE_PLANE 4\n" +"#define SHAPE_CONCAVE_TRIMESH 5\n" +"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n" +"#define SHAPE_SPHERE 7\n" +"typedef struct\n" +"{\n" +" float4 m_from;\n" +" float4 m_to;\n" +"} b3RayInfo;\n" +"typedef struct\n" +"{\n" +" float m_hitFraction;\n" +" int m_hitResult0;\n" +" int m_hitResult1;\n" +" int m_hitResult2;\n" +" float4 m_hitPoint;\n" +" float4 m_hitNormal;\n" +"} b3RayHit;\n" +"typedef struct\n" +"{\n" +" float4 m_pos;\n" +" float4 m_quat;\n" +" float4 m_linVel;\n" +" float4 m_angVel;\n" +" unsigned int m_collidableIdx;\n" +" float m_invMass;\n" +" float m_restituitionCoeff;\n" +" float m_frictionCoeff;\n" +"} Body;\n" +"typedef struct Collidable\n" +"{\n" +" union {\n" +" int m_numChildShapes;\n" +" int m_bvhIndex;\n" +" };\n" +" float m_radius;\n" +" int m_shapeType;\n" +" int m_shapeIndex;\n" +"} Collidable;\n" +"typedef struct \n" +"{\n" +" float4 m_localCenter;\n" +" float4 m_extents;\n" +" float4 mC;\n" +" float4 mE;\n" +" float m_radius;\n" +" int m_faceOffset;\n" +" int m_numFaces;\n" +" int m_numVertices;\n" +" int m_vertexOffset;\n" +" int m_uniqueEdgesOffset;\n" +" int m_numUniqueEdges;\n" +" int m_unused;\n" +"} ConvexPolyhedronCL;\n" +"typedef struct\n" +"{\n" +" float4 m_plane;\n" +" int m_indexOffset;\n" +" int m_numIndices;\n" +"} b3GpuFace;\n" +"///////////////////////////////////////\n" +"// Quaternion\n" +"///////////////////////////////////////\n" +"typedef float4 Quaternion;\n" +"__inline\n" +" Quaternion qtMul(Quaternion a, Quaternion b);\n" +"__inline\n" +" Quaternion qtNormalize(Quaternion in);\n" +"__inline\n" +" Quaternion qtInvert(Quaternion q);\n" +"__inline\n" +" float dot3F4(float4 a, float4 b)\n" +"{\n" +" float4 a1 = (float4)(a.xyz,0.f);\n" +" float4 b1 = (float4)(b.xyz,0.f);\n" +" return dot(a1, b1);\n" +"}\n" +"__inline\n" +" Quaternion qtMul(Quaternion a, Quaternion b)\n" +"{\n" +" Quaternion ans;\n" +" ans = cross( a, b );\n" +" ans += a.w*b+b.w*a;\n" +" // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" +" ans.w = a.w*b.w - dot3F4(a, b);\n" +" return ans;\n" +"}\n" +"__inline\n" +" Quaternion qtNormalize(Quaternion in)\n" +"{\n" +" return fast_normalize(in);\n" +" // in /= length( in );\n" +" // return in;\n" +"}\n" +"__inline\n" +" float4 qtRotate(Quaternion q, float4 vec)\n" +"{\n" +" Quaternion qInv = qtInvert( q );\n" +" float4 vcpy = vec;\n" +" vcpy.w = 0.f;\n" +" float4 out = qtMul(q,vcpy);\n" +" out = qtMul(out,qInv);\n" +" return out;\n" +"}\n" +"__inline\n" +" Quaternion qtInvert(Quaternion q)\n" +"{\n" +" return (Quaternion)(-q.xyz, q.w);\n" +"}\n" +"__inline\n" +" float4 qtInvRotate(const Quaternion q, float4 vec)\n" +"{\n" +" return qtRotate( qtInvert( q ), vec );\n" +"}\n" +"void trInverse(float4 translationIn, Quaternion orientationIn,\n" +" float4* translationOut, Quaternion* orientationOut)\n" +"{\n" +" *orientationOut = qtInvert(orientationIn);\n" +" *translationOut = qtRotate(*orientationOut, -translationIn);\n" +"}\n" +"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n" +" __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n" +"{\n" +" rayFromLocal.w = 0.f;\n" +" rayToLocal.w = 0.f;\n" +" bool result = true;\n" +" float exitFraction = hitFraction[0];\n" +" float enterFraction = -0.3f;\n" +" float4 curHitNormal = (float4)(0,0,0,0);\n" +" for (int i=0;i<numFaces && result;i++)\n" +" {\n" +" b3GpuFace face = faces[faceOffset+i];\n" +" float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n" +" float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n" +" if (fromPlaneDist<0.f)\n" +" {\n" +" if (toPlaneDist >= 0.f)\n" +" {\n" +" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" +" if (exitFraction>fraction)\n" +" {\n" +" exitFraction = fraction;\n" +" }\n" +" } \n" +" } else\n" +" {\n" +" if (toPlaneDist<0.f)\n" +" {\n" +" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n" +" if (enterFraction <= fraction)\n" +" {\n" +" enterFraction = fraction;\n" +" curHitNormal = face.m_plane;\n" +" curHitNormal.w = 0.f;\n" +" }\n" +" } else\n" +" {\n" +" result = false;\n" +" }\n" +" }\n" +" if (exitFraction <= enterFraction)\n" +" result = false;\n" +" }\n" +" if (enterFraction < 0.f)\n" +" {\n" +" result = false;\n" +" }\n" +" if (result)\n" +" { \n" +" hitFraction[0] = enterFraction;\n" +" hitNormal[0] = curHitNormal;\n" +" }\n" +" return result;\n" +"}\n" +"bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n" +"{\n" +" float4 rs = rayFrom - spherePos;\n" +" rs.w = 0.f;\n" +" float4 rayDir = rayTo-rayFrom;\n" +" rayDir.w = 0.f;\n" +" float A = dot(rayDir,rayDir);\n" +" float B = dot(rs, rayDir);\n" +" float C = dot(rs, rs) - (radius * radius);\n" +" float D = B * B - A*C;\n" +" if (D > 0.0f)\n" +" {\n" +" float t = (-B - sqrt(D))/A;\n" +" if ( (t >= 0.0f) && (t < (*hitFraction)) )\n" +" {\n" +" *hitFraction = t;\n" +" return true;\n" +" }\n" +" }\n" +" return false;\n" +"}\n" +"float4 setInterpolate3(float4 from, float4 to, float t)\n" +"{\n" +" float s = 1.0f - t;\n" +" float4 result;\n" +" result = s * from + t * to;\n" +" result.w = 0.f; \n" +" return result; \n" +"}\n" +"__kernel void rayCastKernel( \n" +" int numRays, \n" +" const __global b3RayInfo* rays, \n" +" __global b3RayHit* hitResults, \n" +" const int numBodies, \n" +" __global Body* bodies,\n" +" __global Collidable* collidables,\n" +" __global const b3GpuFace* faces,\n" +" __global const ConvexPolyhedronCL* convexShapes )\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i>=numRays)\n" +" return;\n" +" hitResults[i].m_hitFraction = 1.f;\n" +" float4 rayFrom = rays[i].m_from;\n" +" float4 rayTo = rays[i].m_to;\n" +" float hitFraction = 1.f;\n" +" float4 hitPoint;\n" +" float4 hitNormal;\n" +" int hitBodyIndex= -1;\n" +" int cachedCollidableIndex = -1;\n" +" Collidable cachedCollidable;\n" +" for (int b=0;b<numBodies;b++)\n" +" {\n" +" if (hitResults[i].m_hitResult2==b)\n" +" continue;\n" +" Body body = bodies[b];\n" +" float4 pos = body.m_pos;\n" +" float4 orn = body.m_quat;\n" +" if (cachedCollidableIndex != body.m_collidableIdx)\n" +" {\n" +" cachedCollidableIndex = body.m_collidableIdx;\n" +" cachedCollidable = collidables[cachedCollidableIndex];\n" +" }\n" +" if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" +" {\n" +" float4 invPos = (float4)(0,0,0,0);\n" +" float4 invOrn = (float4)(0,0,0,0);\n" +" float4 rayFromLocal = (float4)(0,0,0,0);\n" +" float4 rayToLocal = (float4)(0,0,0,0);\n" +" invOrn = qtInvert(orn);\n" +" invPos = qtRotate(invOrn, -pos);\n" +" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" +" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" +" rayFromLocal.w = 0.f;\n" +" rayToLocal.w = 0.f;\n" +" int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n" +" int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n" +" if (numFaces)\n" +" {\n" +" if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" +" {\n" +" hitBodyIndex = b;\n" +" \n" +" }\n" +" }\n" +" }\n" +" if (cachedCollidable.m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" float radius = cachedCollidable.m_radius;\n" +" \n" +" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" +" {\n" +" hitBodyIndex = b;\n" +" hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n" +" }\n" +" }\n" +" }\n" +" if (hitBodyIndex>=0)\n" +" {\n" +" hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n" +" hitResults[i].m_hitFraction = hitFraction;\n" +" hitResults[i].m_hitPoint = hitPoint;\n" +" hitResults[i].m_hitNormal = normalize(hitNormal);\n" +" hitResults[i].m_hitResult0 = hitBodyIndex;\n" +" }\n" +"}\n" +"__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n" +" __global int* out_firstRayRigidPairIndexPerRay,\n" +" __global int* out_numRayRigidPairsPerRay,\n" +" int numRayRigidPairs)\n" +"{\n" +" int rayRigidPairIndex = get_global_id(0);\n" +" if (rayRigidPairIndex >= numRayRigidPairs) return;\n" +" \n" +" int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n" +" \n" +" atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n" +" atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n" +"}\n" +"__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n" +" __global b3RayHit* hitResults, \n" +" __global int* firstRayRigidPairIndexPerRay,\n" +" __global int* numRayRigidPairsPerRay,\n" +" \n" +" __global Body* bodies,\n" +" __global Collidable* collidables,\n" +" __global const b3GpuFace* faces,\n" +" __global const ConvexPolyhedronCL* convexShapes,\n" +" \n" +" __global int2* rayRigidPairs,\n" +" int numRays)\n" +"{\n" +" int i = get_global_id(0);\n" +" if (i >= numRays) return;\n" +" \n" +" float4 rayFrom = rays[i].m_from;\n" +" float4 rayTo = rays[i].m_to;\n" +" \n" +" hitResults[i].m_hitFraction = 1.f;\n" +" \n" +" float hitFraction = 1.f;\n" +" float4 hitPoint;\n" +" float4 hitNormal;\n" +" int hitBodyIndex = -1;\n" +" \n" +" //\n" +" for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n" +" {\n" +" int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n" +" int b = rayRigidPairs[rayRigidPairIndex].y;\n" +" \n" +" if (hitResults[i].m_hitResult2 == b) continue;\n" +" \n" +" Body body = bodies[b];\n" +" Collidable rigidCollidable = collidables[body.m_collidableIdx];\n" +" \n" +" float4 pos = body.m_pos;\n" +" float4 orn = body.m_quat;\n" +" \n" +" if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n" +" {\n" +" float4 invPos = (float4)(0,0,0,0);\n" +" float4 invOrn = (float4)(0,0,0,0);\n" +" float4 rayFromLocal = (float4)(0,0,0,0);\n" +" float4 rayToLocal = (float4)(0,0,0,0);\n" +" invOrn = qtInvert(orn);\n" +" invPos = qtRotate(invOrn, -pos);\n" +" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n" +" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n" +" rayFromLocal.w = 0.f;\n" +" rayToLocal.w = 0.f;\n" +" int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n" +" int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n" +" \n" +" if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n" +" {\n" +" hitBodyIndex = b;\n" +" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" +" }\n" +" }\n" +" \n" +" if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n" +" {\n" +" float radius = rigidCollidable.m_radius;\n" +" \n" +" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n" +" {\n" +" hitBodyIndex = b;\n" +" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n" +" hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n" +" }\n" +" }\n" +" }\n" +" \n" +" if (hitBodyIndex >= 0)\n" +" {\n" +" hitResults[i].m_hitFraction = hitFraction;\n" +" hitResults[i].m_hitPoint = hitPoint;\n" +" hitResults[i].m_hitNormal = normalize(hitNormal);\n" +" hitResults[i].m_hitResult0 = hitBodyIndex;\n" +" }\n" +" \n" +"}\n" +; |