diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl | 1888 |
1 files changed, 1888 insertions, 0 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl new file mode 100644 index 0000000000..f433971741 --- /dev/null +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/kernels/satClipHullContacts.cl @@ -0,0 +1,1888 @@ + +#define TRIANGLE_NUM_CONVEX_FACES 5 + + + +#pragma OPENCL EXTENSION cl_amd_printf : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable +#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable +#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable + +#ifdef cl_ext_atomic_counters_32 +#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable +#else +#define counter32_t volatile __global int* +#endif + +#define GET_GROUP_IDX get_group_id(0) +#define GET_LOCAL_IDX get_local_id(0) +#define GET_GLOBAL_IDX get_global_id(0) +#define GET_GROUP_SIZE get_local_size(0) +#define GET_NUM_GROUPS get_num_groups(0) +#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE) +#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE) +#define AtomInc(x) atom_inc(&(x)) +#define AtomInc1(x, out) out = atom_inc(&(x)) +#define AppendInc(x, out) out = atomic_inc(x) +#define AtomAdd(x, value) atom_add(&(x), value) +#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value ) +#define AtomXhg(x, value) atom_xchg ( &(x), value ) + +#define max2 max +#define min2 min + +typedef unsigned int u32; + + + +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Contact4Data.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3ConvexPolyhedronData.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3Collidable.h" +#include "Bullet3Collision/NarrowPhaseCollision/shared/b3RigidBodyData.h" + + + +#define GET_NPOINTS(x) (x).m_worldNormalOnB.w + + + +#define SELECT_UINT4( b, a, condition ) select( b,a,condition ) + +#define make_float4 (float4) +#define make_float2 (float2) +#define make_uint4 (uint4) +#define make_int4 (int4) +#define make_uint2 (uint2) +#define make_int2 (int2) + + +__inline +float fastDiv(float numerator, float denominator) +{ + return native_divide(numerator, denominator); +// return numerator/denominator; +} + +__inline +float4 fastDiv4(float4 numerator, float4 denominator) +{ + return native_divide(numerator, denominator); +} + + +__inline +float4 cross3(float4 a, float4 b) +{ + return cross(a,b); +} + +//#define dot3F4 dot + +__inline +float dot3F4(float4 a, float4 b) +{ + float4 a1 = make_float4(a.xyz,0.f); + float4 b1 = make_float4(b.xyz,0.f); + return dot(a1, b1); +} + +__inline +float4 fastNormalize4(float4 v) +{ + return fast_normalize(v); +} + + +/////////////////////////////////////// +// Quaternion +/////////////////////////////////////// + +typedef float4 Quaternion; + +__inline +Quaternion qtMul(Quaternion a, Quaternion b); + +__inline +Quaternion qtNormalize(Quaternion in); + +__inline +float4 qtRotate(Quaternion q, float4 vec); + +__inline +Quaternion qtInvert(Quaternion q); + + + + +__inline +Quaternion qtMul(Quaternion a, Quaternion b) +{ + Quaternion ans; + ans = cross3( a, b ); + ans += a.w*b+b.w*a; +// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z); + ans.w = a.w*b.w - dot3F4(a, b); + return ans; +} + +__inline +Quaternion qtNormalize(Quaternion in) +{ + return fastNormalize4(in); +// in /= length( in ); +// return in; +} +__inline +float4 qtRotate(Quaternion q, float4 vec) +{ + Quaternion qInv = qtInvert( q ); + float4 vcpy = vec; + vcpy.w = 0.f; + float4 out = qtMul(qtMul(q,vcpy),qInv); + return out; +} + +__inline +Quaternion qtInvert(Quaternion q) +{ + return (Quaternion)(-q.xyz, q.w); +} + +__inline +float4 qtInvRotate(const Quaternion q, float4 vec) +{ + return qtRotate( qtInvert( q ), vec ); +} + +__inline +float4 transform(const float4* p, const float4* translation, const Quaternion* orientation) +{ + return qtRotate( *orientation, *p ) + (*translation); +} + + + +__inline +float4 normalize3(const float4 a) +{ + float4 n = make_float4(a.x, a.y, a.z, 0.f); + return fastNormalize4( n ); +} + + +__inline float4 lerp3(const float4 a,const float4 b, float t) +{ + return make_float4( a.x + (b.x - a.x) * t, + a.y + (b.y - a.y) * t, + a.z + (b.z - a.z) * t, + 0.f); +} + + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFaceGlobal(__global const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, __global float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; + //double-check next test + if (numVertsIn < 2) + return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + + +// Clips a face to the back of a plane, return the number of vertices out, stored in ppVtxOut +int clipFace(const float4* pVtxIn, int numVertsIn, float4 planeNormalWS,float planeEqWS, float4* ppVtxOut) +{ + + int ve; + float ds, de; + int numVertsOut = 0; +//double-check next test + if (numVertsIn < 2) + return 0; + + float4 firstVertex=pVtxIn[numVertsIn-1]; + float4 endVertex = pVtxIn[0]; + + ds = dot3F4(planeNormalWS,firstVertex)+planeEqWS; + + for (ve = 0; ve < numVertsIn; ve++) + { + endVertex=pVtxIn[ve]; + + de = dot3F4(planeNormalWS,endVertex)+planeEqWS; + + if (ds<0) + { + if (de<0) + { + // Start < 0, end < 0, so output endVertex + ppVtxOut[numVertsOut++] = endVertex; + } + else + { + // Start < 0, end >= 0, so output intersection + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + } + } + else + { + if (de<0) + { + // Start >= 0, end < 0 so output intersection and end + ppVtxOut[numVertsOut++] = lerp3(firstVertex, endVertex,(ds * 1.f/(ds - de)) ); + ppVtxOut[numVertsOut++] = endVertex; + } + } + firstVertex = endVertex; + ds = de; + } + return numVertsOut; +} + + +int clipFaceAgainstHull(const float4 separatingNormal, __global const b3ConvexPolyhedronData_t* hullA, + const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + __global const float4* vertices, + __global const b3GpuFace_t* faces, + __global const int* indices, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + faces[hullA->m_faceOffset+face].m_plane.x, + faces[hullA->m_faceOffset+face].m_plane.y, + faces[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + b3GpuFace_t polyA = faces[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+e0]]; + const float4 b = vertices[hullA->m_vertexOffset+indices[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = qtRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;i<numVertsIn;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + //resultOut.addContactPoint(separatingNormal,point,depth); + contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + } + + return numContactsOut; +} + + + +int clipFaceAgainstHullLocalA(const float4 separatingNormal, const b3ConvexPolyhedronData_t* hullA, + const float4 posA, const Quaternion ornA, float4* worldVertsB1, int numWorldVertsB1, + float4* worldVertsB2, int capacityWorldVertsB2, + const float minDist, float maxDist, + const float4* verticesA, + const b3GpuFace_t* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const b3GpuFace_t* facesB, + __global const int* indicesB, + float4* contactsOut, + int contactCapacity) +{ + int numContactsOut = 0; + + float4* pVtxIn = worldVertsB1; + float4* pVtxOut = worldVertsB2; + + int numVertsIn = numWorldVertsB1; + int numVertsOut = 0; + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + facesA[hullA->m_faceOffset+face].m_plane.x, + facesA[hullA->m_faceOffset+face].m_plane.y, + facesA[hullA->m_faceOffset+face].m_plane.z,0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + } + } + } + if (closestFaceA<0) + return numContactsOut; + + b3GpuFace_t polyA = facesA[hullA->m_faceOffset+closestFaceA]; + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + int numVerticesA = polyA.m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+e0]]; + const float4 b = verticesA[hullA->m_vertexOffset+indicesA[polyA.m_indexOffset+((e0+1)%numVerticesA)]]; + const float4 edge0 = a - b; + const float4 WorldEdge0 = qtRotate(ornA,edge0); + float4 planeNormalA = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float4 worldPlaneAnormal1 = qtRotate(ornA,planeNormalA); + + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = transform(&a,&posA,&ornA); + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + + //clip face + //clipFace(*pVtxIn, *pVtxOut,planeNormalWS,planeEqWS); + numVertsOut = clipFace(pVtxIn, numVertsIn, planeNormalWS,planeEqWS, pVtxOut); + + //btSwap(pVtxIn,pVtxOut); + float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsIn = numVertsOut; + numVertsOut = 0; + } + + + // only keep points that are behind the witness face + { + float4 localPlaneNormal = make_float4(polyA.m_plane.x,polyA.m_plane.y,polyA.m_plane.z,0.f); + float localPlaneEq = polyA.m_plane.w; + float4 planeNormalWS = qtRotate(ornA,localPlaneNormal); + float planeEqWS=localPlaneEq-dot3F4(planeNormalWS,posA); + for (int i=0;i<numVertsIn;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + //resultOut.addContactPoint(separatingNormal,point,depth); + contactsOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + } + + return numContactsOut; +} + +int clipHullAgainstHull(const float4 separatingNormal, + __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + __global const float4* vertices, + __global const b3GpuFace_t* faces, + __global const int* indices, + float4* localContactsOut, + int localContactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, + faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + numContactsOut = clipFaceAgainstHull(separatingNormal, hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist,vertices, + faces, + indices,localContactsOut,localContactCapacity); + } + + return numContactsOut; +} + + +int clipHullAgainstHullLocalA(const float4 separatingNormal, + const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + float4* worldVertsB1, float4* worldVertsB2, int capacityWorldVerts, + const float minDist, float maxDist, + const float4* verticesA, + const b3GpuFace_t* facesA, + const int* indicesA, + __global const float4* verticesB, + __global const b3GpuFace_t* facesB, + __global const int* indicesB, + float4* localContactsOut, + int localContactCapacity) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(facesB[hullB->m_faceOffset+face].m_plane.x, + facesB[hullB->m_faceOffset+face].m_plane.y, facesB[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = facesB[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4 b = verticesB[hullB->m_vertexOffset+indicesB[polyB.m_indexOffset+e0]]; + worldVertsB1[numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + if (closestFaceB>=0) + { + numContactsOut = clipFaceAgainstHullLocalA(separatingNormal, hullA, + posA,ornA, + worldVertsB1,numWorldVertsB1,worldVertsB2,capacityWorldVerts, minDist, maxDist, + verticesA,facesA,indicesA, + verticesB,facesB,indicesB, + localContactsOut,localContactCapacity); + } + + return numContactsOut; +} + +#define PARALLEL_SUM(v, n) for(int j=1; j<n; j++) v[0] += v[j]; +#define PARALLEL_DO(execution, n) for(int ie=0; ie<n; ie++){execution;} +#define REDUCE_MAX(v, n) {int i=0;\ +for(int offset=0; offset<n; offset++) v[i] = (v[i].y > v[i+offset].y)? v[i]: v[i+offset]; } +#define REDUCE_MIN(v, n) {int i=0;\ +for(int offset=0; offset<n; offset++) v[i] = (v[i].y < v[i+offset].y)? v[i]: v[i+offset]; } + +int extractManifoldSequentialGlobal(__global const float4* p, int nPoints, float4 nearNormal, int4* contactIdx) +{ + if( nPoints == 0 ) + return 0; + + if (nPoints <=4) + return nPoints; + + + if (nPoints >64) + nPoints = 64; + + float4 center = make_float4(0.f); + { + + for (int i=0;i<nPoints;i++) + center += p[i]; + center /= (float)nPoints; + } + + + + // sample 4 directions + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + + //keep point with deepest penetration + float minW= FLT_MAX; + + int minIndex=-1; + + float4 maxDots; + maxDots.x = FLT_MIN; + maxDots.y = FLT_MIN; + maxDots.z = FLT_MIN; + maxDots.w = FLT_MIN; + + // idx, distance + for(int ie = 0; ie<nPoints; ie++ ) + { + if (p[ie].w<minW) + { + minW = p[ie].w; + minIndex=ie; + } + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + if (f<maxDots.x) + { + maxDots.x = f; + contactIdx[0].x = ie; + } + + f = dot3F4( -u, r ); + if (f<maxDots.y) + { + maxDots.y = f; + contactIdx[0].y = ie; + } + + + f = dot3F4( v, r ); + if (f<maxDots.z) + { + maxDots.z = f; + contactIdx[0].z = ie; + } + + f = dot3F4( -v, r ); + if (f<maxDots.w) + { + maxDots.w = f; + contactIdx[0].w = ie; + } + + } + + if (contactIdx[0].x != minIndex && contactIdx[0].y != minIndex && contactIdx[0].z != minIndex && contactIdx[0].w != minIndex) + { + //replace the first contact with minimum (todo: replace contact with least penetration) + contactIdx[0].x = minIndex; + } + + return 4; + +} + + +int extractManifoldSequentialGlobalFake(__global const float4* p, int nPoints, float4 nearNormal, int* contactIdx) +{ + contactIdx[0] = 0; + contactIdx[1] = 1; + contactIdx[2] = 2; + contactIdx[3] = 3; + + if( nPoints == 0 ) return 0; + + nPoints = min2( nPoints, 4 ); + return nPoints; + +} + + + +int extractManifoldSequential(const float4* p, int nPoints, float4 nearNormal, int* contactIdx) +{ + if( nPoints == 0 ) return 0; + + nPoints = min2( nPoints, 64 ); + + float4 center = make_float4(0.f); + { + float4 v[64]; + for (int i=0;i<nPoints;i++) + v[i] = p[i]; + //memcpy( v, p, nPoints*sizeof(float4) ); + PARALLEL_SUM( v, nPoints ); + center = v[0]/(float)nPoints; + } + + + + { // sample 4 directions + if( nPoints < 4 ) + { + for(int i=0; i<nPoints; i++) + contactIdx[i] = i; + return nPoints; + } + + float4 aVector = p[0] - center; + float4 u = cross3( nearNormal, aVector ); + float4 v = cross3( nearNormal, u ); + u = normalize3( u ); + v = normalize3( v ); + + int idx[4]; + + float2 max00 = make_float2(0,FLT_MAX); + { + // idx, distance + { + { + int4 a[64]; + for(int ie = 0; ie<nPoints; ie++ ) + { + + + float f; + float4 r = p[ie]-center; + f = dot3F4( u, r ); + a[ie].x = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + + f = dot3F4( -u, r ); + a[ie].y = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + + f = dot3F4( v, r ); + a[ie].z = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + + f = dot3F4( -v, r ); + a[ie].w = ((*(u32*)&f) & 0xffffff00) | (0xff & ie); + } + + for(int ie=0; ie<nPoints; ie++) + { + a[0].x = (a[0].x > a[ie].x )? a[0].x: a[ie].x; + a[0].y = (a[0].y > a[ie].y )? a[0].y: a[ie].y; + a[0].z = (a[0].z > a[ie].z )? a[0].z: a[ie].z; + a[0].w = (a[0].w > a[ie].w )? a[0].w: a[ie].w; + } + + idx[0] = (int)a[0].x & 0xff; + idx[1] = (int)a[0].y & 0xff; + idx[2] = (int)a[0].z & 0xff; + idx[3] = (int)a[0].w & 0xff; + } + } + + { + float2 h[64]; + PARALLEL_DO( h[ie] = make_float2((float)ie, p[ie].w), nPoints ); + REDUCE_MIN( h, nPoints ); + max00 = h[0]; + } + } + + contactIdx[0] = idx[0]; + contactIdx[1] = idx[1]; + contactIdx[2] = idx[2]; + contactIdx[3] = idx[3]; + + + return 4; + } +} + + + +__kernel void extractManifoldAndAddContactKernel(__global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const float4* closestPointsWorld, + __global const float4* separatingNormalsWorld, + __global const int* contactCounts, + __global const int* contactOffsets, + __global struct b3Contact4Data* restrict contactsOut, + counter32_t nContactsOut, + int contactCapacity, + int numPairs, + int pairIndex + ) +{ + int idx = get_global_id(0); + + if (idx<numPairs) + { + float4 normal = separatingNormalsWorld[idx]; + int nPoints = contactCounts[idx]; + __global const float4* pointsIn = &closestPointsWorld[contactOffsets[idx]]; + float4 localPoints[64]; + for (int i=0;i<nPoints;i++) + { + localPoints[i] = pointsIn[i]; + } + + int contactIdx[4];// = {-1,-1,-1,-1}; + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nContacts = extractManifoldSequential(localPoints, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nContactsOut, dstIdx ); + if (dstIdx<contactCapacity) + { + __global struct b3Contact4Data* c = contactsOut + dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = idx; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0 ? -bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0 ? -bodyB:bodyB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + for (int i=0;i<nContacts;i++) + { + c->m_worldPosB[i] = localPoints[contactIdx[i]]; + } + GET_NPOINTS(*c) = nContacts; + } + } +} + + +void trInverse(float4 translationIn, Quaternion orientationIn, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtInvert(orientationIn); + *translationOut = qtRotate(*orientationOut, -translationIn); +} + +void trMul(float4 translationA, Quaternion orientationA, + float4 translationB, Quaternion orientationB, + float4* translationOut, Quaternion* orientationOut) +{ + *orientationOut = qtMul(orientationA,orientationB); + *translationOut = transform(&translationB,&translationA,&orientationA); +} + + + + +__kernel void clipHullHullKernel( __global int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numPairs, + int contactCapacity) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + if (hasSeparatingAxis[i]) + { + + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + + + int numLocalContactsOut = clipHullAgainstHull(separatingNormals[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, + rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -separatingNormals[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + + int mprContactIndex = pairs[pairIndex].z; + + int dstIdx = mprContactIndex; + if (dstIdx<0) + { + AppendInc( nGlobalContactsOut, dstIdx ); + } + + if (dstIdx<contactCapacity) + { + pairs[pairIndex].z = dstIdx; + + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + + for (int i=0;i<nReducedContacts;i++) + { + //this condition means: overwrite contact point, unless at index i==0 we have a valid 'mpr' contact + if (i>0||(mprContactIndex<0)) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + +} + + +__kernel void clipCompoundsHullHullKernel( __global const int4* gpuCompoundPairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const b3GpuChildShape_t* gpuChildShapes, + __global const float4* gpuCompoundSepNormalsOut, + __global const int* gpuHasCompoundSepNormalsOut, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int numCompoundPairs, int maxContactCapacity) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numCompoundPairs) + { + + if (gpuHasCompoundSepNormalsOut[i]) + { + + int bodyIndexA = gpuCompoundPairs[i].x; + int bodyIndexB = gpuCompoundPairs[i].y; + + int childShapeIndexA = gpuCompoundPairs[i].z; + int childShapeIndexB = gpuCompoundPairs[i].w; + + int collidableIndexA = -1; + int collidableIndexB = -1; + + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 posA = rigidBodies[bodyIndexA].m_pos; + + float4 ornB = rigidBodies[bodyIndexB].m_quat; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + if (childShapeIndexA >= 0) + { + collidableIndexA = gpuChildShapes[childShapeIndexA].m_shapeIndex; + float4 childPosA = gpuChildShapes[childShapeIndexA].m_childPosition; + float4 childOrnA = gpuChildShapes[childShapeIndexA].m_childOrientation; + float4 newPosA = qtRotate(ornA,childPosA)+posA; + float4 newOrnA = qtMul(ornA,childOrnA); + posA = newPosA; + ornA = newOrnA; + } else + { + collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + } + + if (childShapeIndexB>=0) + { + collidableIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + } else + { + collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + } + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + int numLocalContactsOut = clipHullAgainstHull(gpuCompoundSepNormalsOut[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -gpuCompoundSepNormalsOut[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if ((dstIdx+nReducedContacts) < maxContactCapacity) + { + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = gpuCompoundPairs[pairIndex].x; + int bodyB = gpuCompoundPairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = childShapeIndexA; + c->m_childIndexB = childShapeIndexB; + for (int i=0;i<nReducedContacts;i++) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (gpuHasCompoundSepNormalsOut[i]) + }// if (i<numCompoundPairs) + +} + + + +__kernel void sphereSphereCollisionKernel( __global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int contactCapacity, + int numPairs) +{ + + int i = get_global_id(0); + int pairIndex = i; + + if (i<numPairs) + { + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + if (collidables[collidableIndexA].m_shapeType == SHAPE_SPHERE && + collidables[collidableIndexB].m_shapeType == SHAPE_SPHERE) + { + //sphere-sphere + float radiusA = collidables[collidableIndexA].m_radius; + float radiusB = collidables[collidableIndexB].m_radius; + float4 posA = rigidBodies[bodyIndexA].m_pos; + float4 posB = rigidBodies[bodyIndexB].m_pos; + + float4 diff = posA-posB; + float len = length(diff); + + ///iff distance positive, don't generate a new contact + if ( len <= (radiusA+radiusB)) + { + ///distance (negative means penetration) + float dist = len - (radiusA+radiusB); + float4 normalOnSurfaceB = make_float4(1.f,0.f,0.f,0.f); + if (len > 0.00001) + { + normalOnSurfaceB = diff / len; + } + float4 contactPosB = posB + normalOnSurfaceB*radiusB; + contactPosB.w = dist; + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if (dstIdx < contactCapacity) + { + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normalOnSurfaceB; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_worldPosB[0] = contactPosB; + c->m_childIndexA = -1; + c->m_childIndexB = -1; + + GET_NPOINTS(*c) = 1; + }//if (dstIdx < numPairs) + }//if ( len <= (radiusA+radiusB)) + }//SHAPE_SPHERE SHAPE_SPHERE + }//if (i<numPairs) +} + +__kernel void clipHullHullConcaveConvexKernel( __global int4* concavePairsIn, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const b3GpuChildShape_t* gpuChildShapes, + __global const float4* separatingNormals, + __global struct b3Contact4Data* restrict globalContactsOut, + counter32_t nGlobalContactsOut, + int contactCapacity, + int numConcavePairs) +{ + + int i = get_global_id(0); + int pairIndex = i; + + float4 worldVertsB1[64]; + float4 worldVertsB2[64]; + int capacityWorldVerts = 64; + + float4 localContactsOut[64]; + int localContactCapacity=64; + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numConcavePairs) + { + //negative value means that the pair is invalid + if (concavePairsIn[i].w<0) + return; + + int bodyIndexA = concavePairsIn[i].x; + int bodyIndexB = concavePairsIn[i].y; + int f = concavePairsIn[i].z; + int childShapeIndexA = f; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + /////////////////////////////////////////////////////////////// + + + bool overlap = false; + + b3ConvexPolyhedronData_t convexPolyhedronA; + + //add 3 vertices of the triangle + convexPolyhedronA.m_numVertices = 3; + convexPolyhedronA.m_vertexOffset = 0; + float4 localCenter = make_float4(0.f,0.f,0.f,0.f); + + b3GpuFace_t face = faces[convexShapes[shapeIndexA].m_faceOffset+f]; + + float4 verticesA[3]; + for (int i=0;i<3;i++) + { + int index = indices[face.m_indexOffset+i]; + float4 vert = vertices[convexShapes[shapeIndexA].m_vertexOffset+index]; + verticesA[i] = vert; + localCenter += vert; + } + + float dmin = FLT_MAX; + + int localCC=0; + + //a triangle has 3 unique edges + convexPolyhedronA.m_numUniqueEdges = 3; + convexPolyhedronA.m_uniqueEdgesOffset = 0; + float4 uniqueEdgesA[3]; + + uniqueEdgesA[0] = (verticesA[1]-verticesA[0]); + uniqueEdgesA[1] = (verticesA[2]-verticesA[1]); + uniqueEdgesA[2] = (verticesA[0]-verticesA[2]); + + + convexPolyhedronA.m_faceOffset = 0; + + float4 normal = make_float4(face.m_plane.x,face.m_plane.y,face.m_plane.z,0.f); + + b3GpuFace_t facesA[TRIANGLE_NUM_CONVEX_FACES]; + int indicesA[3+3+2+2+2]; + int curUsedIndices=0; + int fidx=0; + + //front size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[0] = 0; + indicesA[1] = 1; + indicesA[2] = 2; + curUsedIndices+=3; + float c = face.m_plane.w; + facesA[fidx].m_plane.x = normal.x; + facesA[fidx].m_plane.y = normal.y; + facesA[fidx].m_plane.z = normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + //back size of triangle + { + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[3]=2; + indicesA[4]=1; + indicesA[5]=0; + curUsedIndices+=3; + float c = dot3F4(normal,verticesA[0]); + float c1 = -face.m_plane.w; + facesA[fidx].m_plane.x = -normal.x; + facesA[fidx].m_plane.y = -normal.y; + facesA[fidx].m_plane.z = -normal.z; + facesA[fidx].m_plane.w = c; + facesA[fidx].m_numIndices=3; + } + fidx++; + + bool addEdgePlanes = true; + if (addEdgePlanes) + { + int numVertices=3; + int prevVertex = numVertices-1; + for (int i=0;i<numVertices;i++) + { + float4 v0 = verticesA[i]; + float4 v1 = verticesA[prevVertex]; + + float4 edgeNormal = normalize(cross(normal,v1-v0)); + float c = -dot3F4(edgeNormal,v0); + + facesA[fidx].m_numIndices = 2; + facesA[fidx].m_indexOffset=curUsedIndices; + indicesA[curUsedIndices++]=i; + indicesA[curUsedIndices++]=prevVertex; + + facesA[fidx].m_plane.x = edgeNormal.x; + facesA[fidx].m_plane.y = edgeNormal.y; + facesA[fidx].m_plane.z = edgeNormal.z; + facesA[fidx].m_plane.w = c; + fidx++; + prevVertex = i; + } + } + convexPolyhedronA.m_numFaces = TRIANGLE_NUM_CONVEX_FACES; + convexPolyhedronA.m_localCenter = localCenter*(1.f/3.f); + + + float4 posA = rigidBodies[bodyIndexA].m_pos; + posA.w = 0.f; + float4 posB = rigidBodies[bodyIndexB].m_pos; + posB.w = 0.f; + float4 ornA = rigidBodies[bodyIndexA].m_quat; + float4 ornB =rigidBodies[bodyIndexB].m_quat; + + + float4 sepAxis = separatingNormals[i]; + + int shapeTypeB = collidables[collidableIndexB].m_shapeType; + int childShapeIndexB =-1; + if (shapeTypeB==SHAPE_COMPOUND_OF_CONVEX_HULLS) + { + /////////////////// + ///compound shape support + + childShapeIndexB = concavePairsIn[pairIndex].w; + int childColIndexB = gpuChildShapes[childShapeIndexB].m_shapeIndex; + shapeIndexB = collidables[childColIndexB].m_shapeIndex; + float4 childPosB = gpuChildShapes[childShapeIndexB].m_childPosition; + float4 childOrnB = gpuChildShapes[childShapeIndexB].m_childOrientation; + float4 newPosB = transform(&childPosB,&posB,&ornB); + float4 newOrnB = qtMul(ornB,childOrnB); + posB = newPosB; + ornB = newOrnB; + + } + + //////////////////////////////////////// + + + + int numLocalContactsOut = clipHullAgainstHullLocalA(sepAxis, + &convexPolyhedronA, &convexShapes[shapeIndexB], + posA,ornA, + posB,ornB, + worldVertsB1,worldVertsB2,capacityWorldVerts, + minDist, maxDist, + &verticesA,&facesA,&indicesA, + vertices,faces,indices, + localContactsOut,localContactCapacity); + + if (numLocalContactsOut>0) + { + float4 normal = -separatingNormals[i]; + int nPoints = numLocalContactsOut; + float4* pointsIn = localContactsOut; + int contactIdx[4];// = {-1,-1,-1,-1}; + + contactIdx[0] = -1; + contactIdx[1] = -1; + contactIdx[2] = -1; + contactIdx[3] = -1; + + int nReducedContacts = extractManifoldSequential(pointsIn, nPoints, normal, contactIdx); + + int dstIdx; + AppendInc( nGlobalContactsOut, dstIdx ); + if (dstIdx<contactCapacity) + { + __global struct b3Contact4Data* c = globalContactsOut+ dstIdx; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = concavePairsIn[pairIndex].x; + int bodyB = concavePairsIn[pairIndex].y; + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA = childShapeIndexA; + c->m_childIndexB = childShapeIndexB; + for (int i=0;i<nReducedContacts;i++) + { + c->m_worldPosB[i] = pointsIn[contactIdx[i]]; + } + GET_NPOINTS(*c) = nReducedContacts; + } + + }// if (numContactsOut>0) + }// if (i<numPairs) +} + + + + + + +int findClippingFaces(const float4 separatingNormal, + __global const b3ConvexPolyhedronData_t* hullA, __global const b3ConvexPolyhedronData_t* hullB, + const float4 posA, const Quaternion ornA,const float4 posB, const Quaternion ornB, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + const float minDist, float maxDist, + __global const float4* vertices, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global int4* clippingFaces, int pairIndex) +{ + int numContactsOut = 0; + int numWorldVertsB1= 0; + + + int closestFaceB=-1; + float dmax = -FLT_MAX; + + { + for(int face=0;face<hullB->m_numFaces;face++) + { + const float4 Normal = make_float4(faces[hullB->m_faceOffset+face].m_plane.x, + faces[hullB->m_faceOffset+face].m_plane.y, faces[hullB->m_faceOffset+face].m_plane.z,0.f); + const float4 WorldNormal = qtRotate(ornB, Normal); + float d = dot3F4(WorldNormal,separatingNormal); + if (d > dmax) + { + dmax = d; + closestFaceB = face; + } + } + } + + { + const b3GpuFace_t polyB = faces[hullB->m_faceOffset+closestFaceB]; + const int numVertices = polyB.m_numIndices; + for(int e0=0;e0<numVertices;e0++) + { + const float4 b = vertices[hullB->m_vertexOffset+indices[polyB.m_indexOffset+e0]]; + worldVertsB1[pairIndex*capacityWorldVerts+numWorldVertsB1++] = transform(&b,&posB,&ornB); + } + } + + int closestFaceA=-1; + { + float dmin = FLT_MAX; + for(int face=0;face<hullA->m_numFaces;face++) + { + const float4 Normal = make_float4( + faces[hullA->m_faceOffset+face].m_plane.x, + faces[hullA->m_faceOffset+face].m_plane.y, + faces[hullA->m_faceOffset+face].m_plane.z, + 0.f); + const float4 faceANormalWS = qtRotate(ornA,Normal); + + float d = dot3F4(faceANormalWS,separatingNormal); + if (d < dmin) + { + dmin = d; + closestFaceA = face; + worldNormalsA1[pairIndex] = faceANormalWS; + } + } + } + + int numVerticesA = faces[hullA->m_faceOffset+closestFaceA].m_numIndices; + for(int e0=0;e0<numVerticesA;e0++) + { + const float4 a = vertices[hullA->m_vertexOffset+indices[faces[hullA->m_faceOffset+closestFaceA].m_indexOffset+e0]]; + worldVertsA1[pairIndex*capacityWorldVerts+e0] = transform(&a, &posA,&ornA); + } + + clippingFaces[pairIndex].x = closestFaceA; + clippingFaces[pairIndex].y = closestFaceB; + clippingFaces[pairIndex].z = numVerticesA; + clippingFaces[pairIndex].w = numWorldVertsB1; + + + return numContactsOut; +} + + + +int clipFaces(__global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + __global float4* worldVertsB2, + int capacityWorldVertsB2, + const float minDist, float maxDist, + __global int4* clippingFaces, + int pairIndex) +{ + int numContactsOut = 0; + + int closestFaceA = clippingFaces[pairIndex].x; + int closestFaceB = clippingFaces[pairIndex].y; + int numVertsInA = clippingFaces[pairIndex].z; + int numVertsInB = clippingFaces[pairIndex].w; + + int numVertsOut = 0; + + if (closestFaceA<0) + return numContactsOut; + + __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; + __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; + + + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + + for(int e0=0;e0<numVertsInA;e0++) + { + const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; + const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; + const float4 WorldEdge0 = aw - bw; + float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = aw; + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); + __global float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsInB = numVertsOut; + numVertsOut = 0; + } + + //float4 planeNormalWS = worldNormalsA1[pairIndex]; + //float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); + + + + /*for (int i=0;i<numVertsInB;i++) + { + pVtxOut[i] = pVtxIn[i]; + }*/ + + + + + //numVertsInB=0; + + float4 planeNormalWS = worldNormalsA1[pairIndex]; + float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); + + for (int i=0;i<numVertsInB;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + pVtxOut[numContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + + clippingFaces[pairIndex].w =numContactsOut; + + + return numContactsOut; + +} + + + + +__kernel void findClippingFacesKernel( __global const int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const b3Collidable_t* collidables, + __global const b3ConvexPolyhedronData_t* convexShapes, + __global const float4* vertices, + __global const float4* uniqueEdges, + __global const b3GpuFace_t* faces, + __global const int* indices, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global int4* clippingFacesOut, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + int capacityWorldVerts, + int numPairs + ) +{ + + int i = get_global_id(0); + int pairIndex = i; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + + int bodyIndexA = pairs[i].x; + int bodyIndexB = pairs[i].y; + + int collidableIndexA = rigidBodies[bodyIndexA].m_collidableIdx; + int collidableIndexB = rigidBodies[bodyIndexB].m_collidableIdx; + + int shapeIndexA = collidables[collidableIndexA].m_shapeIndex; + int shapeIndexB = collidables[collidableIndexB].m_shapeIndex; + + + + int numLocalContactsOut = findClippingFaces(separatingNormals[i], + &convexShapes[shapeIndexA], &convexShapes[shapeIndexB], + rigidBodies[bodyIndexA].m_pos,rigidBodies[bodyIndexA].m_quat, + rigidBodies[bodyIndexB].m_pos,rigidBodies[bodyIndexB].m_quat, + worldVertsA1, + worldNormalsA1, + worldVertsB1,capacityWorldVerts, + minDist, maxDist, + vertices,faces,indices, + clippingFacesOut,i); + + + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + +} + + + + +__kernel void clipFacesAndFindContactsKernel( __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global int4* clippingFacesOut, + __global float4* worldVertsA1, + __global float4* worldNormalsA1, + __global float4* worldVertsB1, + __global float4* worldVertsB2, + int vertexFaceCapacity, + int numPairs, + int debugMode + ) +{ + int i = get_global_id(0); + int pairIndex = i; + + + float minDist = -1e30f; + float maxDist = 0.02f; + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + +// int bodyIndexA = pairs[i].x; + // int bodyIndexB = pairs[i].y; + + int numLocalContactsOut = 0; + + int capacityWorldVertsB2 = vertexFaceCapacity; + + __global float4* pVtxIn = &worldVertsB1[pairIndex*capacityWorldVertsB2]; + __global float4* pVtxOut = &worldVertsB2[pairIndex*capacityWorldVertsB2]; + + + { + __global int4* clippingFaces = clippingFacesOut; + + + int closestFaceA = clippingFaces[pairIndex].x; + int closestFaceB = clippingFaces[pairIndex].y; + int numVertsInA = clippingFaces[pairIndex].z; + int numVertsInB = clippingFaces[pairIndex].w; + + int numVertsOut = 0; + + if (closestFaceA>=0) + { + + + + // clip polygon to back of planes of all faces of hull A that are adjacent to witness face + + for(int e0=0;e0<numVertsInA;e0++) + { + const float4 aw = worldVertsA1[pairIndex*capacityWorldVertsB2+e0]; + const float4 bw = worldVertsA1[pairIndex*capacityWorldVertsB2+((e0+1)%numVertsInA)]; + const float4 WorldEdge0 = aw - bw; + float4 worldPlaneAnormal1 = worldNormalsA1[pairIndex]; + float4 planeNormalWS1 = -cross3(WorldEdge0,worldPlaneAnormal1); + float4 worldA1 = aw; + float planeEqWS1 = -dot3F4(worldA1,planeNormalWS1); + float4 planeNormalWS = planeNormalWS1; + float planeEqWS=planeEqWS1; + numVertsOut = clipFaceGlobal(pVtxIn, numVertsInB, planeNormalWS,planeEqWS, pVtxOut); + __global float4* tmp = pVtxOut; + pVtxOut = pVtxIn; + pVtxIn = tmp; + numVertsInB = numVertsOut; + numVertsOut = 0; + } + + float4 planeNormalWS = worldNormalsA1[pairIndex]; + float planeEqWS=-dot3F4(planeNormalWS,worldVertsA1[pairIndex*capacityWorldVertsB2]); + + for (int i=0;i<numVertsInB;i++) + { + float depth = dot3F4(planeNormalWS,pVtxIn[i])+planeEqWS; + if (depth <=minDist) + { + depth = minDist; + } + + if (depth <=maxDist) + { + float4 pointInWorld = pVtxIn[i]; + pVtxOut[numLocalContactsOut++] = make_float4(pointInWorld.x,pointInWorld.y,pointInWorld.z,depth); + } + } + + } + clippingFaces[pairIndex].w =numLocalContactsOut; + + + } + + for (int i=0;i<numLocalContactsOut;i++) + pVtxIn[i] = pVtxOut[i]; + + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + +} + + + + + +__kernel void newContactReductionKernel( __global int4* pairs, + __global const b3RigidBodyData_t* rigidBodies, + __global const float4* separatingNormals, + __global const int* hasSeparatingAxis, + __global struct b3Contact4Data* globalContactsOut, + __global int4* clippingFaces, + __global float4* worldVertsB2, + volatile __global int* nGlobalContactsOut, + int vertexFaceCapacity, + int contactCapacity, + int numPairs + ) +{ + int i = get_global_id(0); + int pairIndex = i; + + int4 contactIdx; + contactIdx=make_int4(0,1,2,3); + + if (i<numPairs) + { + + if (hasSeparatingAxis[i]) + { + + + + + int nPoints = clippingFaces[pairIndex].w; + + if (nPoints>0) + { + + __global float4* pointsIn = &worldVertsB2[pairIndex*vertexFaceCapacity]; + float4 normal = -separatingNormals[i]; + + int nReducedContacts = extractManifoldSequentialGlobal(pointsIn, nPoints, normal, &contactIdx); + + int mprContactIndex = pairs[pairIndex].z; + + int dstIdx = mprContactIndex; + + if (dstIdx<0) + { + AppendInc( nGlobalContactsOut, dstIdx ); + } +//#if 0 + + if (dstIdx < contactCapacity) + { + + __global struct b3Contact4Data* c = &globalContactsOut[dstIdx]; + c->m_worldNormalOnB = -normal; + c->m_restituitionCoeffCmp = (0.f*0xffff);c->m_frictionCoeffCmp = (0.7f*0xffff); + c->m_batchIdx = pairIndex; + int bodyA = pairs[pairIndex].x; + int bodyB = pairs[pairIndex].y; + + pairs[pairIndex].w = dstIdx; + + c->m_bodyAPtrAndSignBit = rigidBodies[bodyA].m_invMass==0?-bodyA:bodyA; + c->m_bodyBPtrAndSignBit = rigidBodies[bodyB].m_invMass==0?-bodyB:bodyB; + c->m_childIndexA =-1; + c->m_childIndexB =-1; + + switch (nReducedContacts) + { + case 4: + c->m_worldPosB[3] = pointsIn[contactIdx.w]; + case 3: + c->m_worldPosB[2] = pointsIn[contactIdx.z]; + case 2: + c->m_worldPosB[1] = pointsIn[contactIdx.y]; + case 1: + if (mprContactIndex<0)//test + c->m_worldPosB[0] = pointsIn[contactIdx.x]; + default: + { + } + }; + + GET_NPOINTS(*c) = nReducedContacts; + + } + + +//#endif + + }// if (numContactsOut>0) + }// if (hasSeparatingAxis[i]) + }// if (i<numPairs) + + + +} |