summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/Raycast
diff options
context:
space:
mode:
authorOussama <o.boukhelf@gmail.com>2019-01-03 14:26:51 +0100
committerRĂ©mi Verschelde <rverschelde@gmail.com>2019-01-07 12:30:35 +0100
commit22b7c9dfa80d0f7abca40f061865c2ab3c136a74 (patch)
tree311cd3f22b012329160f9d43810aea429994af48 /thirdparty/bullet/Bullet3OpenCL/Raycast
parenta6722cf36251ddcb538e6ebed9fa4950342b68ba (diff)
Update Bullet to the latest commit 126b676
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/Raycast')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp259
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h22
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h759
3 files changed, 509 insertions, 531 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
index 161e304f09..6571f30548 100644
--- a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
+++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.cpp
@@ -4,7 +4,6 @@
#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h"
#include "Bullet3OpenCL/RigidBody/b3GpuNarrowPhaseInternalData.h"
-
#include "Bullet3OpenCL/Initialize/b3OpenCLUtils.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3OpenCLArray.h"
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
@@ -15,38 +14,35 @@
#include "Bullet3OpenCL/Raycast/kernels/rayCastKernels.h"
-
#define B3_RAYCAST_PATH "src/Bullet3OpenCL/Raycast/kernels/rayCastKernels.cl"
-
-
struct b3GpuRaycastInternalData
{
cl_context m_context;
cl_device_id m_device;
- cl_command_queue m_q;
+ cl_command_queue m_q;
cl_kernel m_raytraceKernel;
cl_kernel m_raytracePairsKernel;
cl_kernel m_findRayRigidPairIndexRanges;
-
+
b3GpuParallelLinearBvh* m_plbvh;
b3RadixSort32CL* m_radixSorter;
b3FillCL* m_fill;
-
+
//1 element per ray
b3OpenCLArray<b3RayInfo>* m_gpuRays;
b3OpenCLArray<b3RayHit>* m_gpuHitResults;
b3OpenCLArray<int>* m_firstRayRigidPairIndexPerRay;
b3OpenCLArray<int>* m_numRayRigidPairsPerRay;
-
+
//1 element per (ray index, rigid index) pair, where the ray intersects with the rigid's AABB
b3OpenCLArray<int>* m_gpuNumRayRigidPairs;
- b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index
-
+ b3OpenCLArray<b3Int2>* m_gpuRayRigidPairs; //x == ray index, y == rigid index
+
int m_test;
};
-b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q)
+b3GpuRaycast::b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q)
{
m_data = new b3GpuRaycastInternalData;
m_data->m_context = ctx;
@@ -59,7 +55,7 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
m_data->m_plbvh = new b3GpuParallelLinearBvh(ctx, device, q);
m_data->m_radixSorter = new b3RadixSort32CL(ctx, device, q);
m_data->m_fill = new b3FillCL(ctx, device, q);
-
+
m_data->m_gpuRays = new b3OpenCLArray<b3RayInfo>(ctx, q);
m_data->m_gpuHitResults = new b3OpenCLArray<b3RayHit>(ctx, q);
m_data->m_firstRayRigidPairIndexPerRay = new b3OpenCLArray<int>(ctx, q);
@@ -68,19 +64,17 @@ b3GpuRaycast::b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue
m_data->m_gpuRayRigidPairs = new b3OpenCLArray<b3Int2>(ctx, q);
{
- cl_int errNum=0;
- cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context,m_data->m_device,rayCastKernelCL,&errNum,"",B3_RAYCAST_PATH);
- b3Assert(errNum==CL_SUCCESS);
- m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastKernel",&errNum,prog);
- b3Assert(errNum==CL_SUCCESS);
- m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "rayCastPairsKernel",&errNum,prog);
- b3Assert(errNum==CL_SUCCESS);
- m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device,rayCastKernelCL, "findRayRigidPairIndexRanges",&errNum,prog);
- b3Assert(errNum==CL_SUCCESS);
+ cl_int errNum = 0;
+ cl_program prog = b3OpenCLUtils::compileCLProgramFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, &errNum, "", B3_RAYCAST_PATH);
+ b3Assert(errNum == CL_SUCCESS);
+ m_data->m_raytraceKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastKernel", &errNum, prog);
+ b3Assert(errNum == CL_SUCCESS);
+ m_data->m_raytracePairsKernel = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "rayCastPairsKernel", &errNum, prog);
+ b3Assert(errNum == CL_SUCCESS);
+ m_data->m_findRayRigidPairIndexRanges = b3OpenCLUtils::compileCLKernelFromString(m_data->m_context, m_data->m_device, rayCastKernelCL, "findRayRigidPairIndexRanges", &errNum, prog);
+ b3Assert(errNum == CL_SUCCESS);
clReleaseProgram(prog);
}
-
-
}
b3GpuRaycast::~b3GpuRaycast()
@@ -88,78 +82,80 @@ b3GpuRaycast::~b3GpuRaycast()
clReleaseKernel(m_data->m_raytraceKernel);
clReleaseKernel(m_data->m_raytracePairsKernel);
clReleaseKernel(m_data->m_findRayRigidPairIndexRanges);
-
+
delete m_data->m_plbvh;
delete m_data->m_radixSorter;
delete m_data->m_fill;
-
+
delete m_data->m_gpuRays;
delete m_data->m_gpuHitResults;
delete m_data->m_firstRayRigidPairIndexPerRay;
delete m_data->m_numRayRigidPairsPerRay;
delete m_data->m_gpuNumRayRigidPairs;
delete m_data->m_gpuRayRigidPairs;
-
+
delete m_data;
}
-bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction)
+bool sphere_intersect(const b3Vector3& spherePos, b3Scalar radius, const b3Vector3& rayFrom, const b3Vector3& rayTo, float& hitFraction)
{
- b3Vector3 rs = rayFrom - spherePos;
- b3Vector3 rayDir = rayTo-rayFrom;
-
- float A = b3Dot(rayDir,rayDir);
- float B = b3Dot(rs, rayDir);
- float C = b3Dot(rs, rs) - (radius * radius);
-
- float D = B * B - A*C;
-
- if (D > 0.0)
- {
- float t = (-B - sqrt(D))/A;
-
- if ( (t >= 0.0f) && (t < hitFraction) )
- {
+ b3Vector3 rs = rayFrom - spherePos;
+ b3Vector3 rayDir = rayTo - rayFrom;
+
+ float A = b3Dot(rayDir, rayDir);
+ float B = b3Dot(rs, rayDir);
+ float C = b3Dot(rs, rs) - (radius * radius);
+
+ float D = B * B - A * C;
+
+ if (D > 0.0)
+ {
+ float t = (-B - sqrt(D)) / A;
+
+ if ((t >= 0.0f) && (t < hitFraction))
+ {
hitFraction = t;
- return true;
+ return true;
}
}
return false;
}
bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const b3ConvexPolyhedronData& poly,
- const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
+ const b3AlignedObjectArray<b3GpuFace>& faces, float& hitFraction, b3Vector3& hitNormal)
{
float exitFraction = hitFraction;
float enterFraction = -0.1f;
- b3Vector3 curHitNormal=b3MakeVector3(0,0,0);
- for (int i=0;i<poly.m_numFaces;i++)
+ b3Vector3 curHitNormal = b3MakeVector3(0, 0, 0);
+ for (int i = 0; i < poly.m_numFaces; i++)
{
- const b3GpuFace& face = faces[poly.m_faceOffset+i];
- float fromPlaneDist = b3Dot(rayFromLocal,face.m_plane)+face.m_plane.w;
- float toPlaneDist = b3Dot(rayToLocal,face.m_plane)+face.m_plane.w;
- if (fromPlaneDist<0.f)
+ const b3GpuFace& face = faces[poly.m_faceOffset + i];
+ float fromPlaneDist = b3Dot(rayFromLocal, face.m_plane) + face.m_plane.w;
+ float toPlaneDist = b3Dot(rayToLocal, face.m_plane) + face.m_plane.w;
+ if (fromPlaneDist < 0.f)
{
if (toPlaneDist >= 0.f)
{
- float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
- if (exitFraction>fraction)
+ float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
+ if (exitFraction > fraction)
{
exitFraction = fraction;
}
- }
- } else
+ }
+ }
+ else
{
- if (toPlaneDist<0.f)
+ if (toPlaneDist < 0.f)
{
- float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);
+ float fraction = fromPlaneDist / (fromPlaneDist - toPlaneDist);
if (enterFraction <= fraction)
{
enterFraction = fraction;
curHitNormal = face.m_plane;
curHitNormal.w = 0.f;
}
- } else
+ }
+ else
{
return false;
}
@@ -176,44 +172,41 @@ bool rayConvex(const b3Vector3& rayFromLocal, const b3Vector3& rayToLocal, const
return true;
}
-void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
- int numBodies,const struct b3RigidBodyData* bodies, int numCollidables,const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
+void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
+ int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables, const struct b3GpuNarrowPhaseInternalData* narrowphaseData)
{
-
-// return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
+ // return castRays(rays,hitResults,numBodies,bodies,numCollidables,collidables);
B3_PROFILE("castRaysHost");
- for (int r=0;r<rays.size();r++)
+ for (int r = 0; r < rays.size(); r++)
{
b3Vector3 rayFrom = rays[r].m_from;
b3Vector3 rayTo = rays[r].m_to;
float hitFraction = hitResults[r].m_hitFraction;
- int hitBodyIndex= -1;
+ int hitBodyIndex = -1;
b3Vector3 hitNormal;
- for (int b=0;b<numBodies;b++)
+ for (int b = 0; b < numBodies; b++)
{
-
const b3Vector3& pos = bodies[b].m_pos;
//const b3Quaternion& orn = bodies[b].m_quat;
-
+
switch (collidables[bodies[b].m_collidableIdx].m_shapeType)
{
- case SHAPE_SPHERE:
+ case SHAPE_SPHERE:
{
b3Scalar radius = collidables[bodies[b].m_collidableIdx].m_radius;
- if (sphere_intersect(pos, radius, rayFrom, rayTo,hitFraction))
+ if (sphere_intersect(pos, radius, rayFrom, rayTo, hitFraction))
{
hitBodyIndex = b;
b3Vector3 hitPoint;
- hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
- hitNormal = (hitPoint-bodies[b].m_pos).normalize();
+ hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
+ hitNormal = (hitPoint - bodies[b].m_pos).normalize();
}
}
- case SHAPE_CONVEX_HULL:
+ case SHAPE_CONVEX_HULL:
{
-
b3Transform convexWorldTransform;
convexWorldTransform.setIdentity();
convexWorldTransform.setOrigin(bodies[b].m_pos);
@@ -222,72 +215,67 @@ void b3GpuRaycast::castRaysHost(const b3AlignedObjectArray<b3RayInfo>& rays, b3A
b3Vector3 rayFromLocal = convexWorld2Local(rayFrom);
b3Vector3 rayToLocal = convexWorld2Local(rayTo);
-
-
+
int shapeIndex = collidables[bodies[b].m_collidableIdx].m_shapeIndex;
const b3ConvexPolyhedronData& poly = narrowphaseData->m_convexPolyhedra[shapeIndex];
- if (rayConvex(rayFromLocal, rayToLocal,poly,narrowphaseData->m_convexFaces, hitFraction, hitNormal))
+ if (rayConvex(rayFromLocal, rayToLocal, poly, narrowphaseData->m_convexFaces, hitFraction, hitNormal))
{
hitBodyIndex = b;
}
-
break;
}
- default:
+ default:
{
- static bool once=true;
+ static bool once = true;
if (once)
{
- once=false;
+ once = false;
b3Warning("Raytest: unsupported shape type\n");
}
}
}
}
- if (hitBodyIndex>=0)
+ if (hitBodyIndex >= 0)
{
-
hitResults[r].m_hitFraction = hitFraction;
- hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to,hitFraction);
+ hitResults[r].m_hitPoint.setInterpolate3(rays[r].m_from, rays[r].m_to, hitFraction);
hitResults[r].m_hitNormal = hitNormal;
hitResults[r].m_hitBody = hitBodyIndex;
}
-
}
}
///todo: add some acceleration structure (AABBs, tree etc)
-void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
- int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
- const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
+void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
+ int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
+ const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase)
{
//castRaysHost(rays,hitResults,numBodies,bodies,numCollidables,collidables,narrowphaseData);
B3_PROFILE("castRaysGPU");
-
+
{
B3_PROFILE("raycast copyFromHost");
m_data->m_gpuRays->copyFromHost(rays);
m_data->m_gpuHitResults->copyFromHost(hitResults);
-
}
-
+
int numRays = hitResults.size();
{
m_data->m_firstRayRigidPairIndexPerRay->resize(numRays);
m_data->m_numRayRigidPairsPerRay->resize(numRays);
-
+
m_data->m_gpuNumRayRigidPairs->resize(1);
m_data->m_gpuRayRigidPairs->resize(numRays * 16);
}
-
+
//run kernel
const bool USE_BRUTE_FORCE_RAYCAST = false;
- if(USE_BRUTE_FORCE_RAYCAST)
+ if (USE_BRUTE_FORCE_RAYCAST)
{
B3_PROFILE("raycast launch1D");
- b3LauncherCL launcher(m_data->m_q,m_data->m_raytraceKernel,"m_raytraceKernel");
+ b3LauncherCL launcher(m_data->m_q, m_data->m_raytraceKernel, "m_raytraceKernel");
int numRays = rays.size();
launcher.setConst(numRays);
@@ -299,93 +287,88 @@ void b3GpuRaycast::castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3Align
launcher.setBuffer(narrowphaseData->m_collidablesGPU->getBufferCL());
launcher.setBuffer(narrowphaseData->m_convexFacesGPU->getBufferCL());
launcher.setBuffer(narrowphaseData->m_convexPolyhedraGPU->getBufferCL());
-
+
launcher.launch1D(numRays);
clFinish(m_data->m_q);
}
else
{
- m_data->m_plbvh->build( broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU() );
+ m_data->m_plbvh->build(broadphase->getAllAabbsGPU(), broadphase->getSmallAabbIndicesGPU(), broadphase->getLargeAabbIndicesGPU());
m_data->m_plbvh->testRaysAgainstBvhAabbs(*m_data->m_gpuRays, *m_data->m_gpuNumRayRigidPairs, *m_data->m_gpuRayRigidPairs);
-
+
int numRayRigidPairs = -1;
m_data->m_gpuNumRayRigidPairs->copyToHostPointer(&numRayRigidPairs, 1);
- if( numRayRigidPairs > m_data->m_gpuRayRigidPairs->size() )
+ if (numRayRigidPairs > m_data->m_gpuRayRigidPairs->size())
{
numRayRigidPairs = m_data->m_gpuRayRigidPairs->size();
m_data->m_gpuNumRayRigidPairs->copyFromHostPointer(&numRayRigidPairs, 1);
}
-
- m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
-
+
+ m_data->m_gpuRayRigidPairs->resize(numRayRigidPairs); //Radix sort needs b3OpenCLArray::size() to be correct
+
//Sort ray-rigid pairs by ray index
{
B3_PROFILE("sort ray-rigid pairs");
- m_data->m_radixSorter->execute( *reinterpret_cast< b3OpenCLArray<b3SortData>* >(m_data->m_gpuRayRigidPairs) );
+ m_data->m_radixSorter->execute(*reinterpret_cast<b3OpenCLArray<b3SortData>*>(m_data->m_gpuRayRigidPairs));
}
-
+
//detect start,count of each ray pair
{
B3_PROFILE("detect ray-rigid pair index ranges");
-
+
{
B3_PROFILE("reset ray-rigid pair index ranges");
-
- m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
+
+ m_data->m_fill->execute(*m_data->m_firstRayRigidPairIndexPerRay, numRayRigidPairs, numRays); //atomic_min used to find first index
m_data->m_fill->execute(*m_data->m_numRayRigidPairsPerRay, 0, numRays);
clFinish(m_data->m_q);
}
-
- b3BufferInfoCL bufferInfo[] =
- {
- b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() ),
-
- b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
- b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() )
- };
-
+
+ b3BufferInfoCL bufferInfo[] =
+ {
+ b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL()),
+
+ b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
+ b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL())};
+
b3LauncherCL launcher(m_data->m_q, m_data->m_findRayRigidPairIndexRanges, "m_findRayRigidPairIndexRanges");
- launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
+ launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
launcher.setConst(numRayRigidPairs);
-
+
launcher.launch1D(numRayRigidPairs);
clFinish(m_data->m_q);
}
-
+
{
B3_PROFILE("ray-rigid intersection");
-
- b3BufferInfoCL bufferInfo[] =
- {
- b3BufferInfoCL( m_data->m_gpuRays->getBufferCL() ),
- b3BufferInfoCL( m_data->m_gpuHitResults->getBufferCL() ),
- b3BufferInfoCL( m_data->m_firstRayRigidPairIndexPerRay->getBufferCL() ),
- b3BufferInfoCL( m_data->m_numRayRigidPairsPerRay->getBufferCL() ),
-
- b3BufferInfoCL( narrowphaseData->m_bodyBufferGPU->getBufferCL() ),
- b3BufferInfoCL( narrowphaseData->m_collidablesGPU->getBufferCL() ),
- b3BufferInfoCL( narrowphaseData->m_convexFacesGPU->getBufferCL() ),
- b3BufferInfoCL( narrowphaseData->m_convexPolyhedraGPU->getBufferCL() ),
-
- b3BufferInfoCL( m_data->m_gpuRayRigidPairs->getBufferCL() )
- };
-
+
+ b3BufferInfoCL bufferInfo[] =
+ {
+ b3BufferInfoCL(m_data->m_gpuRays->getBufferCL()),
+ b3BufferInfoCL(m_data->m_gpuHitResults->getBufferCL()),
+ b3BufferInfoCL(m_data->m_firstRayRigidPairIndexPerRay->getBufferCL()),
+ b3BufferInfoCL(m_data->m_numRayRigidPairsPerRay->getBufferCL()),
+
+ b3BufferInfoCL(narrowphaseData->m_bodyBufferGPU->getBufferCL()),
+ b3BufferInfoCL(narrowphaseData->m_collidablesGPU->getBufferCL()),
+ b3BufferInfoCL(narrowphaseData->m_convexFacesGPU->getBufferCL()),
+ b3BufferInfoCL(narrowphaseData->m_convexPolyhedraGPU->getBufferCL()),
+
+ b3BufferInfoCL(m_data->m_gpuRayRigidPairs->getBufferCL())};
+
b3LauncherCL launcher(m_data->m_q, m_data->m_raytracePairsKernel, "m_raytracePairsKernel");
- launcher.setBuffers( bufferInfo, sizeof(bufferInfo)/sizeof(b3BufferInfoCL) );
+ launcher.setBuffers(bufferInfo, sizeof(bufferInfo) / sizeof(b3BufferInfoCL));
launcher.setConst(numRays);
-
+
launcher.launch1D(numRays);
clFinish(m_data->m_q);
}
}
-
-
//copy results
{
B3_PROFILE("raycast copyToHost");
m_data->m_gpuHitResults->copyToHost(hitResults);
}
-
} \ No newline at end of file
diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h
index 3a5cf44b79..f1f6ffd402 100644
--- a/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h
+++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/b3GpuRaycast.h
@@ -7,26 +7,22 @@
#include "Bullet3Common/b3AlignedObjectArray.h"
#include "Bullet3Collision/NarrowPhaseCollision/b3RaycastInfo.h"
-
-
class b3GpuRaycast
{
protected:
struct b3GpuRaycastInternalData* m_data;
+
public:
- b3GpuRaycast(cl_context ctx,cl_device_id device, cl_command_queue q);
+ b3GpuRaycast(cl_context ctx, cl_device_id device, cl_command_queue q);
virtual ~b3GpuRaycast();
- void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults,
- int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
- const struct b3GpuNarrowPhaseInternalData* narrowphaseData);
-
- void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
- int numBodies,const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
- const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase);
-
+ void castRaysHost(const b3AlignedObjectArray<b3RayInfo>& raysIn, b3AlignedObjectArray<b3RayHit>& hitResults,
+ int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
+ const struct b3GpuNarrowPhaseInternalData* narrowphaseData);
-
+ void castRays(const b3AlignedObjectArray<b3RayInfo>& rays, b3AlignedObjectArray<b3RayHit>& hitResults,
+ int numBodies, const struct b3RigidBodyData* bodies, int numCollidables, const struct b3Collidable* collidables,
+ const struct b3GpuNarrowPhaseInternalData* narrowphaseData, class b3GpuBroadphaseInterface* broadphase);
};
-#endif //B3_GPU_RAYCAST_H
+#endif //B3_GPU_RAYCAST_H
diff --git a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h
index 6257909a4d..94f6a8eb9f 100644
--- a/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h
+++ b/thirdparty/bullet/Bullet3OpenCL/Raycast/kernels/rayCastKernels.h
@@ -1,381 +1,380 @@
//this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project
-static const char* rayCastKernelCL= \
-"#define SHAPE_CONVEX_HULL 3\n"
-"#define SHAPE_PLANE 4\n"
-"#define SHAPE_CONCAVE_TRIMESH 5\n"
-"#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
-"#define SHAPE_SPHERE 7\n"
-"typedef struct\n"
-"{\n"
-" float4 m_from;\n"
-" float4 m_to;\n"
-"} b3RayInfo;\n"
-"typedef struct\n"
-"{\n"
-" float m_hitFraction;\n"
-" int m_hitResult0;\n"
-" int m_hitResult1;\n"
-" int m_hitResult2;\n"
-" float4 m_hitPoint;\n"
-" float4 m_hitNormal;\n"
-"} b3RayHit;\n"
-"typedef struct\n"
-"{\n"
-" float4 m_pos;\n"
-" float4 m_quat;\n"
-" float4 m_linVel;\n"
-" float4 m_angVel;\n"
-" unsigned int m_collidableIdx;\n"
-" float m_invMass;\n"
-" float m_restituitionCoeff;\n"
-" float m_frictionCoeff;\n"
-"} Body;\n"
-"typedef struct Collidable\n"
-"{\n"
-" union {\n"
-" int m_numChildShapes;\n"
-" int m_bvhIndex;\n"
-" };\n"
-" float m_radius;\n"
-" int m_shapeType;\n"
-" int m_shapeIndex;\n"
-"} Collidable;\n"
-"typedef struct \n"
-"{\n"
-" float4 m_localCenter;\n"
-" float4 m_extents;\n"
-" float4 mC;\n"
-" float4 mE;\n"
-" float m_radius;\n"
-" int m_faceOffset;\n"
-" int m_numFaces;\n"
-" int m_numVertices;\n"
-" int m_vertexOffset;\n"
-" int m_uniqueEdgesOffset;\n"
-" int m_numUniqueEdges;\n"
-" int m_unused;\n"
-"} ConvexPolyhedronCL;\n"
-"typedef struct\n"
-"{\n"
-" float4 m_plane;\n"
-" int m_indexOffset;\n"
-" int m_numIndices;\n"
-"} b3GpuFace;\n"
-"///////////////////////////////////////\n"
-"// Quaternion\n"
-"///////////////////////////////////////\n"
-"typedef float4 Quaternion;\n"
-"__inline\n"
-" Quaternion qtMul(Quaternion a, Quaternion b);\n"
-"__inline\n"
-" Quaternion qtNormalize(Quaternion in);\n"
-"__inline\n"
-" Quaternion qtInvert(Quaternion q);\n"
-"__inline\n"
-" float dot3F4(float4 a, float4 b)\n"
-"{\n"
-" float4 a1 = (float4)(a.xyz,0.f);\n"
-" float4 b1 = (float4)(b.xyz,0.f);\n"
-" return dot(a1, b1);\n"
-"}\n"
-"__inline\n"
-" Quaternion qtMul(Quaternion a, Quaternion b)\n"
-"{\n"
-" Quaternion ans;\n"
-" ans = cross( a, b );\n"
-" ans += a.w*b+b.w*a;\n"
-" // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
-" ans.w = a.w*b.w - dot3F4(a, b);\n"
-" return ans;\n"
-"}\n"
-"__inline\n"
-" Quaternion qtNormalize(Quaternion in)\n"
-"{\n"
-" return fast_normalize(in);\n"
-" // in /= length( in );\n"
-" // return in;\n"
-"}\n"
-"__inline\n"
-" float4 qtRotate(Quaternion q, float4 vec)\n"
-"{\n"
-" Quaternion qInv = qtInvert( q );\n"
-" float4 vcpy = vec;\n"
-" vcpy.w = 0.f;\n"
-" float4 out = qtMul(q,vcpy);\n"
-" out = qtMul(out,qInv);\n"
-" return out;\n"
-"}\n"
-"__inline\n"
-" Quaternion qtInvert(Quaternion q)\n"
-"{\n"
-" return (Quaternion)(-q.xyz, q.w);\n"
-"}\n"
-"__inline\n"
-" float4 qtInvRotate(const Quaternion q, float4 vec)\n"
-"{\n"
-" return qtRotate( qtInvert( q ), vec );\n"
-"}\n"
-"void trInverse(float4 translationIn, Quaternion orientationIn,\n"
-" float4* translationOut, Quaternion* orientationOut)\n"
-"{\n"
-" *orientationOut = qtInvert(orientationIn);\n"
-" *translationOut = qtRotate(*orientationOut, -translationIn);\n"
-"}\n"
-"bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n"
-" __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n"
-"{\n"
-" rayFromLocal.w = 0.f;\n"
-" rayToLocal.w = 0.f;\n"
-" bool result = true;\n"
-" float exitFraction = hitFraction[0];\n"
-" float enterFraction = -0.3f;\n"
-" float4 curHitNormal = (float4)(0,0,0,0);\n"
-" for (int i=0;i<numFaces && result;i++)\n"
-" {\n"
-" b3GpuFace face = faces[faceOffset+i];\n"
-" float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n"
-" float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n"
-" if (fromPlaneDist<0.f)\n"
-" {\n"
-" if (toPlaneDist >= 0.f)\n"
-" {\n"
-" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
-" if (exitFraction>fraction)\n"
-" {\n"
-" exitFraction = fraction;\n"
-" }\n"
-" } \n"
-" } else\n"
-" {\n"
-" if (toPlaneDist<0.f)\n"
-" {\n"
-" float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
-" if (enterFraction <= fraction)\n"
-" {\n"
-" enterFraction = fraction;\n"
-" curHitNormal = face.m_plane;\n"
-" curHitNormal.w = 0.f;\n"
-" }\n"
-" } else\n"
-" {\n"
-" result = false;\n"
-" }\n"
-" }\n"
-" if (exitFraction <= enterFraction)\n"
-" result = false;\n"
-" }\n"
-" if (enterFraction < 0.f)\n"
-" {\n"
-" result = false;\n"
-" }\n"
-" if (result)\n"
-" { \n"
-" hitFraction[0] = enterFraction;\n"
-" hitNormal[0] = curHitNormal;\n"
-" }\n"
-" return result;\n"
-"}\n"
-"bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n"
-"{\n"
-" float4 rs = rayFrom - spherePos;\n"
-" rs.w = 0.f;\n"
-" float4 rayDir = rayTo-rayFrom;\n"
-" rayDir.w = 0.f;\n"
-" float A = dot(rayDir,rayDir);\n"
-" float B = dot(rs, rayDir);\n"
-" float C = dot(rs, rs) - (radius * radius);\n"
-" float D = B * B - A*C;\n"
-" if (D > 0.0f)\n"
-" {\n"
-" float t = (-B - sqrt(D))/A;\n"
-" if ( (t >= 0.0f) && (t < (*hitFraction)) )\n"
-" {\n"
-" *hitFraction = t;\n"
-" return true;\n"
-" }\n"
-" }\n"
-" return false;\n"
-"}\n"
-"float4 setInterpolate3(float4 from, float4 to, float t)\n"
-"{\n"
-" float s = 1.0f - t;\n"
-" float4 result;\n"
-" result = s * from + t * to;\n"
-" result.w = 0.f; \n"
-" return result; \n"
-"}\n"
-"__kernel void rayCastKernel( \n"
-" int numRays, \n"
-" const __global b3RayInfo* rays, \n"
-" __global b3RayHit* hitResults, \n"
-" const int numBodies, \n"
-" __global Body* bodies,\n"
-" __global Collidable* collidables,\n"
-" __global const b3GpuFace* faces,\n"
-" __global const ConvexPolyhedronCL* convexShapes )\n"
-"{\n"
-" int i = get_global_id(0);\n"
-" if (i>=numRays)\n"
-" return;\n"
-" hitResults[i].m_hitFraction = 1.f;\n"
-" float4 rayFrom = rays[i].m_from;\n"
-" float4 rayTo = rays[i].m_to;\n"
-" float hitFraction = 1.f;\n"
-" float4 hitPoint;\n"
-" float4 hitNormal;\n"
-" int hitBodyIndex= -1;\n"
-" int cachedCollidableIndex = -1;\n"
-" Collidable cachedCollidable;\n"
-" for (int b=0;b<numBodies;b++)\n"
-" {\n"
-" if (hitResults[i].m_hitResult2==b)\n"
-" continue;\n"
-" Body body = bodies[b];\n"
-" float4 pos = body.m_pos;\n"
-" float4 orn = body.m_quat;\n"
-" if (cachedCollidableIndex != body.m_collidableIdx)\n"
-" {\n"
-" cachedCollidableIndex = body.m_collidableIdx;\n"
-" cachedCollidable = collidables[cachedCollidableIndex];\n"
-" }\n"
-" if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
-" {\n"
-" float4 invPos = (float4)(0,0,0,0);\n"
-" float4 invOrn = (float4)(0,0,0,0);\n"
-" float4 rayFromLocal = (float4)(0,0,0,0);\n"
-" float4 rayToLocal = (float4)(0,0,0,0);\n"
-" invOrn = qtInvert(orn);\n"
-" invPos = qtRotate(invOrn, -pos);\n"
-" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n"
-" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n"
-" rayFromLocal.w = 0.f;\n"
-" rayToLocal.w = 0.f;\n"
-" int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n"
-" int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n"
-" if (numFaces)\n"
-" {\n"
-" if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
-" {\n"
-" hitBodyIndex = b;\n"
-" \n"
-" }\n"
-" }\n"
-" }\n"
-" if (cachedCollidable.m_shapeType == SHAPE_SPHERE)\n"
-" {\n"
-" float radius = cachedCollidable.m_radius;\n"
-" \n"
-" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n"
-" {\n"
-" hitBodyIndex = b;\n"
-" hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n"
-" }\n"
-" }\n"
-" }\n"
-" if (hitBodyIndex>=0)\n"
-" {\n"
-" hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
-" hitResults[i].m_hitFraction = hitFraction;\n"
-" hitResults[i].m_hitPoint = hitPoint;\n"
-" hitResults[i].m_hitNormal = normalize(hitNormal);\n"
-" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
-" }\n"
-"}\n"
-"__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n"
-" __global int* out_firstRayRigidPairIndexPerRay,\n"
-" __global int* out_numRayRigidPairsPerRay,\n"
-" int numRayRigidPairs)\n"
-"{\n"
-" int rayRigidPairIndex = get_global_id(0);\n"
-" if (rayRigidPairIndex >= numRayRigidPairs) return;\n"
-" \n"
-" int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n"
-" \n"
-" atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n"
-" atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n"
-"}\n"
-"__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n"
-" __global b3RayHit* hitResults, \n"
-" __global int* firstRayRigidPairIndexPerRay,\n"
-" __global int* numRayRigidPairsPerRay,\n"
-" \n"
-" __global Body* bodies,\n"
-" __global Collidable* collidables,\n"
-" __global const b3GpuFace* faces,\n"
-" __global const ConvexPolyhedronCL* convexShapes,\n"
-" \n"
-" __global int2* rayRigidPairs,\n"
-" int numRays)\n"
-"{\n"
-" int i = get_global_id(0);\n"
-" if (i >= numRays) return;\n"
-" \n"
-" float4 rayFrom = rays[i].m_from;\n"
-" float4 rayTo = rays[i].m_to;\n"
-" \n"
-" hitResults[i].m_hitFraction = 1.f;\n"
-" \n"
-" float hitFraction = 1.f;\n"
-" float4 hitPoint;\n"
-" float4 hitNormal;\n"
-" int hitBodyIndex = -1;\n"
-" \n"
-" //\n"
-" for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n"
-" {\n"
-" int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n"
-" int b = rayRigidPairs[rayRigidPairIndex].y;\n"
-" \n"
-" if (hitResults[i].m_hitResult2 == b) continue;\n"
-" \n"
-" Body body = bodies[b];\n"
-" Collidable rigidCollidable = collidables[body.m_collidableIdx];\n"
-" \n"
-" float4 pos = body.m_pos;\n"
-" float4 orn = body.m_quat;\n"
-" \n"
-" if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
-" {\n"
-" float4 invPos = (float4)(0,0,0,0);\n"
-" float4 invOrn = (float4)(0,0,0,0);\n"
-" float4 rayFromLocal = (float4)(0,0,0,0);\n"
-" float4 rayToLocal = (float4)(0,0,0,0);\n"
-" invOrn = qtInvert(orn);\n"
-" invPos = qtRotate(invOrn, -pos);\n"
-" rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n"
-" rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n"
-" rayFromLocal.w = 0.f;\n"
-" rayToLocal.w = 0.f;\n"
-" int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n"
-" int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n"
-" \n"
-" if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
-" {\n"
-" hitBodyIndex = b;\n"
-" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n"
-" }\n"
-" }\n"
-" \n"
-" if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n"
-" {\n"
-" float radius = rigidCollidable.m_radius;\n"
-" \n"
-" if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n"
-" {\n"
-" hitBodyIndex = b;\n"
-" hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n"
-" hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n"
-" }\n"
-" }\n"
-" }\n"
-" \n"
-" if (hitBodyIndex >= 0)\n"
-" {\n"
-" hitResults[i].m_hitFraction = hitFraction;\n"
-" hitResults[i].m_hitPoint = hitPoint;\n"
-" hitResults[i].m_hitNormal = normalize(hitNormal);\n"
-" hitResults[i].m_hitResult0 = hitBodyIndex;\n"
-" }\n"
-" \n"
-"}\n"
-;
+static const char* rayCastKernelCL =
+ "#define SHAPE_CONVEX_HULL 3\n"
+ "#define SHAPE_PLANE 4\n"
+ "#define SHAPE_CONCAVE_TRIMESH 5\n"
+ "#define SHAPE_COMPOUND_OF_CONVEX_HULLS 6\n"
+ "#define SHAPE_SPHERE 7\n"
+ "typedef struct\n"
+ "{\n"
+ " float4 m_from;\n"
+ " float4 m_to;\n"
+ "} b3RayInfo;\n"
+ "typedef struct\n"
+ "{\n"
+ " float m_hitFraction;\n"
+ " int m_hitResult0;\n"
+ " int m_hitResult1;\n"
+ " int m_hitResult2;\n"
+ " float4 m_hitPoint;\n"
+ " float4 m_hitNormal;\n"
+ "} b3RayHit;\n"
+ "typedef struct\n"
+ "{\n"
+ " float4 m_pos;\n"
+ " float4 m_quat;\n"
+ " float4 m_linVel;\n"
+ " float4 m_angVel;\n"
+ " unsigned int m_collidableIdx;\n"
+ " float m_invMass;\n"
+ " float m_restituitionCoeff;\n"
+ " float m_frictionCoeff;\n"
+ "} Body;\n"
+ "typedef struct Collidable\n"
+ "{\n"
+ " union {\n"
+ " int m_numChildShapes;\n"
+ " int m_bvhIndex;\n"
+ " };\n"
+ " float m_radius;\n"
+ " int m_shapeType;\n"
+ " int m_shapeIndex;\n"
+ "} Collidable;\n"
+ "typedef struct \n"
+ "{\n"
+ " float4 m_localCenter;\n"
+ " float4 m_extents;\n"
+ " float4 mC;\n"
+ " float4 mE;\n"
+ " float m_radius;\n"
+ " int m_faceOffset;\n"
+ " int m_numFaces;\n"
+ " int m_numVertices;\n"
+ " int m_vertexOffset;\n"
+ " int m_uniqueEdgesOffset;\n"
+ " int m_numUniqueEdges;\n"
+ " int m_unused;\n"
+ "} ConvexPolyhedronCL;\n"
+ "typedef struct\n"
+ "{\n"
+ " float4 m_plane;\n"
+ " int m_indexOffset;\n"
+ " int m_numIndices;\n"
+ "} b3GpuFace;\n"
+ "///////////////////////////////////////\n"
+ "// Quaternion\n"
+ "///////////////////////////////////////\n"
+ "typedef float4 Quaternion;\n"
+ "__inline\n"
+ " Quaternion qtMul(Quaternion a, Quaternion b);\n"
+ "__inline\n"
+ " Quaternion qtNormalize(Quaternion in);\n"
+ "__inline\n"
+ " Quaternion qtInvert(Quaternion q);\n"
+ "__inline\n"
+ " float dot3F4(float4 a, float4 b)\n"
+ "{\n"
+ " float4 a1 = (float4)(a.xyz,0.f);\n"
+ " float4 b1 = (float4)(b.xyz,0.f);\n"
+ " return dot(a1, b1);\n"
+ "}\n"
+ "__inline\n"
+ " Quaternion qtMul(Quaternion a, Quaternion b)\n"
+ "{\n"
+ " Quaternion ans;\n"
+ " ans = cross( a, b );\n"
+ " ans += a.w*b+b.w*a;\n"
+ " // ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n"
+ " ans.w = a.w*b.w - dot3F4(a, b);\n"
+ " return ans;\n"
+ "}\n"
+ "__inline\n"
+ " Quaternion qtNormalize(Quaternion in)\n"
+ "{\n"
+ " return fast_normalize(in);\n"
+ " // in /= length( in );\n"
+ " // return in;\n"
+ "}\n"
+ "__inline\n"
+ " float4 qtRotate(Quaternion q, float4 vec)\n"
+ "{\n"
+ " Quaternion qInv = qtInvert( q );\n"
+ " float4 vcpy = vec;\n"
+ " vcpy.w = 0.f;\n"
+ " float4 out = qtMul(q,vcpy);\n"
+ " out = qtMul(out,qInv);\n"
+ " return out;\n"
+ "}\n"
+ "__inline\n"
+ " Quaternion qtInvert(Quaternion q)\n"
+ "{\n"
+ " return (Quaternion)(-q.xyz, q.w);\n"
+ "}\n"
+ "__inline\n"
+ " float4 qtInvRotate(const Quaternion q, float4 vec)\n"
+ "{\n"
+ " return qtRotate( qtInvert( q ), vec );\n"
+ "}\n"
+ "void trInverse(float4 translationIn, Quaternion orientationIn,\n"
+ " float4* translationOut, Quaternion* orientationOut)\n"
+ "{\n"
+ " *orientationOut = qtInvert(orientationIn);\n"
+ " *translationOut = qtRotate(*orientationOut, -translationIn);\n"
+ "}\n"
+ "bool rayConvex(float4 rayFromLocal, float4 rayToLocal, int numFaces, int faceOffset,\n"
+ " __global const b3GpuFace* faces, float* hitFraction, float4* hitNormal)\n"
+ "{\n"
+ " rayFromLocal.w = 0.f;\n"
+ " rayToLocal.w = 0.f;\n"
+ " bool result = true;\n"
+ " float exitFraction = hitFraction[0];\n"
+ " float enterFraction = -0.3f;\n"
+ " float4 curHitNormal = (float4)(0,0,0,0);\n"
+ " for (int i=0;i<numFaces && result;i++)\n"
+ " {\n"
+ " b3GpuFace face = faces[faceOffset+i];\n"
+ " float fromPlaneDist = dot(rayFromLocal,face.m_plane)+face.m_plane.w;\n"
+ " float toPlaneDist = dot(rayToLocal,face.m_plane)+face.m_plane.w;\n"
+ " if (fromPlaneDist<0.f)\n"
+ " {\n"
+ " if (toPlaneDist >= 0.f)\n"
+ " {\n"
+ " float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
+ " if (exitFraction>fraction)\n"
+ " {\n"
+ " exitFraction = fraction;\n"
+ " }\n"
+ " } \n"
+ " } else\n"
+ " {\n"
+ " if (toPlaneDist<0.f)\n"
+ " {\n"
+ " float fraction = fromPlaneDist / (fromPlaneDist-toPlaneDist);\n"
+ " if (enterFraction <= fraction)\n"
+ " {\n"
+ " enterFraction = fraction;\n"
+ " curHitNormal = face.m_plane;\n"
+ " curHitNormal.w = 0.f;\n"
+ " }\n"
+ " } else\n"
+ " {\n"
+ " result = false;\n"
+ " }\n"
+ " }\n"
+ " if (exitFraction <= enterFraction)\n"
+ " result = false;\n"
+ " }\n"
+ " if (enterFraction < 0.f)\n"
+ " {\n"
+ " result = false;\n"
+ " }\n"
+ " if (result)\n"
+ " { \n"
+ " hitFraction[0] = enterFraction;\n"
+ " hitNormal[0] = curHitNormal;\n"
+ " }\n"
+ " return result;\n"
+ "}\n"
+ "bool sphere_intersect(float4 spherePos, float radius, float4 rayFrom, float4 rayTo, float* hitFraction)\n"
+ "{\n"
+ " float4 rs = rayFrom - spherePos;\n"
+ " rs.w = 0.f;\n"
+ " float4 rayDir = rayTo-rayFrom;\n"
+ " rayDir.w = 0.f;\n"
+ " float A = dot(rayDir,rayDir);\n"
+ " float B = dot(rs, rayDir);\n"
+ " float C = dot(rs, rs) - (radius * radius);\n"
+ " float D = B * B - A*C;\n"
+ " if (D > 0.0f)\n"
+ " {\n"
+ " float t = (-B - sqrt(D))/A;\n"
+ " if ( (t >= 0.0f) && (t < (*hitFraction)) )\n"
+ " {\n"
+ " *hitFraction = t;\n"
+ " return true;\n"
+ " }\n"
+ " }\n"
+ " return false;\n"
+ "}\n"
+ "float4 setInterpolate3(float4 from, float4 to, float t)\n"
+ "{\n"
+ " float s = 1.0f - t;\n"
+ " float4 result;\n"
+ " result = s * from + t * to;\n"
+ " result.w = 0.f; \n"
+ " return result; \n"
+ "}\n"
+ "__kernel void rayCastKernel( \n"
+ " int numRays, \n"
+ " const __global b3RayInfo* rays, \n"
+ " __global b3RayHit* hitResults, \n"
+ " const int numBodies, \n"
+ " __global Body* bodies,\n"
+ " __global Collidable* collidables,\n"
+ " __global const b3GpuFace* faces,\n"
+ " __global const ConvexPolyhedronCL* convexShapes )\n"
+ "{\n"
+ " int i = get_global_id(0);\n"
+ " if (i>=numRays)\n"
+ " return;\n"
+ " hitResults[i].m_hitFraction = 1.f;\n"
+ " float4 rayFrom = rays[i].m_from;\n"
+ " float4 rayTo = rays[i].m_to;\n"
+ " float hitFraction = 1.f;\n"
+ " float4 hitPoint;\n"
+ " float4 hitNormal;\n"
+ " int hitBodyIndex= -1;\n"
+ " int cachedCollidableIndex = -1;\n"
+ " Collidable cachedCollidable;\n"
+ " for (int b=0;b<numBodies;b++)\n"
+ " {\n"
+ " if (hitResults[i].m_hitResult2==b)\n"
+ " continue;\n"
+ " Body body = bodies[b];\n"
+ " float4 pos = body.m_pos;\n"
+ " float4 orn = body.m_quat;\n"
+ " if (cachedCollidableIndex != body.m_collidableIdx)\n"
+ " {\n"
+ " cachedCollidableIndex = body.m_collidableIdx;\n"
+ " cachedCollidable = collidables[cachedCollidableIndex];\n"
+ " }\n"
+ " if (cachedCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
+ " {\n"
+ " float4 invPos = (float4)(0,0,0,0);\n"
+ " float4 invOrn = (float4)(0,0,0,0);\n"
+ " float4 rayFromLocal = (float4)(0,0,0,0);\n"
+ " float4 rayToLocal = (float4)(0,0,0,0);\n"
+ " invOrn = qtInvert(orn);\n"
+ " invPos = qtRotate(invOrn, -pos);\n"
+ " rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n"
+ " rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n"
+ " rayFromLocal.w = 0.f;\n"
+ " rayToLocal.w = 0.f;\n"
+ " int numFaces = convexShapes[cachedCollidable.m_shapeIndex].m_numFaces;\n"
+ " int faceOffset = convexShapes[cachedCollidable.m_shapeIndex].m_faceOffset;\n"
+ " if (numFaces)\n"
+ " {\n"
+ " if (rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
+ " {\n"
+ " hitBodyIndex = b;\n"
+ " \n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " if (cachedCollidable.m_shapeType == SHAPE_SPHERE)\n"
+ " {\n"
+ " float radius = cachedCollidable.m_radius;\n"
+ " \n"
+ " if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n"
+ " {\n"
+ " hitBodyIndex = b;\n"
+ " hitNormal = (float4) (hitPoint-bodies[b].m_pos);\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " if (hitBodyIndex>=0)\n"
+ " {\n"
+ " hitPoint = setInterpolate3(rayFrom, rayTo,hitFraction);\n"
+ " hitResults[i].m_hitFraction = hitFraction;\n"
+ " hitResults[i].m_hitPoint = hitPoint;\n"
+ " hitResults[i].m_hitNormal = normalize(hitNormal);\n"
+ " hitResults[i].m_hitResult0 = hitBodyIndex;\n"
+ " }\n"
+ "}\n"
+ "__kernel void findRayRigidPairIndexRanges(__global int2* rayRigidPairs, \n"
+ " __global int* out_firstRayRigidPairIndexPerRay,\n"
+ " __global int* out_numRayRigidPairsPerRay,\n"
+ " int numRayRigidPairs)\n"
+ "{\n"
+ " int rayRigidPairIndex = get_global_id(0);\n"
+ " if (rayRigidPairIndex >= numRayRigidPairs) return;\n"
+ " \n"
+ " int rayIndex = rayRigidPairs[rayRigidPairIndex].x;\n"
+ " \n"
+ " atomic_min(&out_firstRayRigidPairIndexPerRay[rayIndex], rayRigidPairIndex);\n"
+ " atomic_inc(&out_numRayRigidPairsPerRay[rayIndex]);\n"
+ "}\n"
+ "__kernel void rayCastPairsKernel(const __global b3RayInfo* rays, \n"
+ " __global b3RayHit* hitResults, \n"
+ " __global int* firstRayRigidPairIndexPerRay,\n"
+ " __global int* numRayRigidPairsPerRay,\n"
+ " \n"
+ " __global Body* bodies,\n"
+ " __global Collidable* collidables,\n"
+ " __global const b3GpuFace* faces,\n"
+ " __global const ConvexPolyhedronCL* convexShapes,\n"
+ " \n"
+ " __global int2* rayRigidPairs,\n"
+ " int numRays)\n"
+ "{\n"
+ " int i = get_global_id(0);\n"
+ " if (i >= numRays) return;\n"
+ " \n"
+ " float4 rayFrom = rays[i].m_from;\n"
+ " float4 rayTo = rays[i].m_to;\n"
+ " \n"
+ " hitResults[i].m_hitFraction = 1.f;\n"
+ " \n"
+ " float hitFraction = 1.f;\n"
+ " float4 hitPoint;\n"
+ " float4 hitNormal;\n"
+ " int hitBodyIndex = -1;\n"
+ " \n"
+ " //\n"
+ " for(int pair = 0; pair < numRayRigidPairsPerRay[i]; ++pair)\n"
+ " {\n"
+ " int rayRigidPairIndex = pair + firstRayRigidPairIndexPerRay[i];\n"
+ " int b = rayRigidPairs[rayRigidPairIndex].y;\n"
+ " \n"
+ " if (hitResults[i].m_hitResult2 == b) continue;\n"
+ " \n"
+ " Body body = bodies[b];\n"
+ " Collidable rigidCollidable = collidables[body.m_collidableIdx];\n"
+ " \n"
+ " float4 pos = body.m_pos;\n"
+ " float4 orn = body.m_quat;\n"
+ " \n"
+ " if (rigidCollidable.m_shapeType == SHAPE_CONVEX_HULL)\n"
+ " {\n"
+ " float4 invPos = (float4)(0,0,0,0);\n"
+ " float4 invOrn = (float4)(0,0,0,0);\n"
+ " float4 rayFromLocal = (float4)(0,0,0,0);\n"
+ " float4 rayToLocal = (float4)(0,0,0,0);\n"
+ " invOrn = qtInvert(orn);\n"
+ " invPos = qtRotate(invOrn, -pos);\n"
+ " rayFromLocal = qtRotate( invOrn, rayFrom ) + invPos;\n"
+ " rayToLocal = qtRotate( invOrn, rayTo) + invPos;\n"
+ " rayFromLocal.w = 0.f;\n"
+ " rayToLocal.w = 0.f;\n"
+ " int numFaces = convexShapes[rigidCollidable.m_shapeIndex].m_numFaces;\n"
+ " int faceOffset = convexShapes[rigidCollidable.m_shapeIndex].m_faceOffset;\n"
+ " \n"
+ " if (numFaces && rayConvex(rayFromLocal, rayToLocal, numFaces, faceOffset,faces, &hitFraction, &hitNormal))\n"
+ " {\n"
+ " hitBodyIndex = b;\n"
+ " hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n"
+ " }\n"
+ " }\n"
+ " \n"
+ " if (rigidCollidable.m_shapeType == SHAPE_SPHERE)\n"
+ " {\n"
+ " float radius = rigidCollidable.m_radius;\n"
+ " \n"
+ " if (sphere_intersect(pos, radius, rayFrom, rayTo, &hitFraction))\n"
+ " {\n"
+ " hitBodyIndex = b;\n"
+ " hitPoint = setInterpolate3(rayFrom, rayTo, hitFraction);\n"
+ " hitNormal = (float4) (hitPoint - bodies[b].m_pos);\n"
+ " }\n"
+ " }\n"
+ " }\n"
+ " \n"
+ " if (hitBodyIndex >= 0)\n"
+ " {\n"
+ " hitResults[i].m_hitFraction = hitFraction;\n"
+ " hitResults[i].m_hitPoint = hitPoint;\n"
+ " hitResults[i].m_hitNormal = normalize(hitNormal);\n"
+ " hitResults[i].m_hitResult0 = hitBodyIndex;\n"
+ " }\n"
+ " \n"
+ "}\n";