diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels')
10 files changed, 5323 insertions, 5333 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h index 150eedc94b..7c73c96baa 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernels.h @@ -1,388 +1,387 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* batchingKernelsCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"#define WG_SIZE 64\n" -"typedef struct \n" -"{\n" -" int m_n;\n" -" int m_start;\n" -" int m_staticIdx;\n" -" int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct \n" -"{\n" -" int m_a;\n" -" int m_b;\n" -" u32 m_idx;\n" -"}Elem;\n" -"#define STACK_SIZE (WG_SIZE*10)\n" -"//#define STACK_SIZE (WG_SIZE)\n" -"#define RING_SIZE 1024\n" -"#define RING_SIZE_MASK (RING_SIZE-1)\n" -"#define CHECK_SIZE (WG_SIZE)\n" -"#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n" -"#define RING_END ldsTmp\n" -"u32 readBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" return buff[bufIdx] & (1<<bitIdx);\n" -"}\n" -"void writeBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -"// buff[bufIdx] |= (1<<bitIdx);\n" -" atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -"}\n" -"u32 tryWrite(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -" return ((ans >> bitIdx)&1) == 0;\n" -"}\n" -"// batching on the GPU\n" -"__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n" -" __global const u32* gN, __global const u32* gStart, __global int* batchSizes, \n" -" int m_staticIdx )\n" -"{\n" -" __local u32 ldsStackIdx[STACK_SIZE];\n" -" __local u32 ldsStackEnd;\n" -" __local Elem ldsRingElem[RING_SIZE];\n" -" __local u32 ldsRingEnd;\n" -" __local u32 ldsTmp;\n" -" __local u32 ldsCheckBuffer[CHECK_SIZE];\n" -" __local u32 ldsFixedBuffer[CHECK_SIZE];\n" -" __local u32 ldsGEnd;\n" -" __local u32 ldsDstEnd;\n" -" int wgIdx = GET_GROUP_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" \n" -" const int m_n = gN[wgIdx];\n" -" const int m_start = gStart[wgIdx];\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsRingEnd = 0;\n" -" ldsGEnd = 0;\n" -" ldsStackEnd = 0;\n" -" ldsDstEnd = m_start;\n" -" }\n" -" \n" -" \n" -" \n" -"// while(1)\n" -"//was 250\n" -" int ie=0;\n" -" int maxBatch = 0;\n" -" for(ie=0; ie<50; ie++)\n" -" {\n" -" ldsFixedBuffer[lIdx] = 0;\n" -" for(int giter=0; giter<4; giter++)\n" -" {\n" -" int ringCap = GET_RING_CAPACITY;\n" -" \n" -" // 1. fill ring\n" -" if( ldsGEnd < m_n )\n" -" {\n" -" while( ringCap > WG_SIZE )\n" -" {\n" -" if( ldsGEnd >= m_n ) break;\n" -" if( lIdx < ringCap - WG_SIZE )\n" -" {\n" -" int srcIdx;\n" -" AtomInc1( ldsGEnd, srcIdx );\n" -" if( srcIdx < m_n )\n" -" {\n" -" int dstIdx;\n" -" AtomInc1( ldsRingEnd, dstIdx );\n" -" \n" -" int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n" -" int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n" -" ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" -" ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" -" ldsRingElem[dstIdx].m_idx = srcIdx;\n" -" }\n" -" }\n" -" ringCap = GET_RING_CAPACITY;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" // 2. fill stack\n" -" __local Elem* dst = ldsRingElem;\n" -" if( lIdx == 0 ) RING_END = 0;\n" -" int srcIdx=lIdx;\n" -" int end = ldsRingEnd;\n" -" {\n" -" for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n" -" {\n" -" Elem e;\n" -" if(srcIdx<end) e = ldsRingElem[srcIdx];\n" -" bool done = (srcIdx<end)?false:true;\n" -" for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n" -" \n" -" if( !done )\n" -" {\n" -" int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a));\n" -" int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b));\n" -" if( aUsed==0 && bUsed==0 )\n" -" {\n" -" int aAvailable=1;\n" -" int bAvailable=1;\n" -" int ea = abs(e.m_a);\n" -" int eb = abs(e.m_b);\n" -" bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n" -" bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n" -" \n" -" if (!aStatic)\n" -" aAvailable = tryWrite( ldsCheckBuffer, ea );\n" -" if (!bStatic)\n" -" bAvailable = tryWrite( ldsCheckBuffer, eb );\n" -" \n" -" //aAvailable = aStatic? 1: aAvailable;\n" -" //bAvailable = bStatic? 1: bAvailable;\n" -" bool success = (aAvailable && bAvailable);\n" -" if(success)\n" -" {\n" -" \n" -" if (!aStatic)\n" -" writeBuf( ldsFixedBuffer, ea );\n" -" if (!bStatic)\n" -" writeBuf( ldsFixedBuffer, eb );\n" -" }\n" -" done = success;\n" -" }\n" -" }\n" -" // put it aside\n" -" if(srcIdx<end)\n" -" {\n" -" if( done )\n" -" {\n" -" int dstIdx; AtomInc1( ldsStackEnd, dstIdx );\n" -" if( dstIdx < STACK_SIZE )\n" -" ldsStackIdx[dstIdx] = e.m_idx;\n" -" else{\n" -" done = false;\n" -" AtomAdd( ldsStackEnd, -1 );\n" -" }\n" -" }\n" -" if( !done )\n" -" {\n" -" int dstIdx; AtomInc1( RING_END, dstIdx );\n" -" dst[dstIdx] = e;\n" -" }\n" -" }\n" -" // if filled, flush\n" -" if( ldsStackEnd == STACK_SIZE )\n" -" {\n" -" for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)\n" -" {\n" -" int idx = m_start + ldsStackIdx[i];\n" -" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" -" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" -" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" -" }\n" -" if( lIdx == 0 ) ldsStackEnd = 0;\n" -" //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n" -" ldsFixedBuffer[lIdx] = 0;\n" -" }\n" -" }\n" -" }\n" -" if( lIdx == 0 ) ldsRingEnd = RING_END;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n" -" {\n" -" int idx = m_start + ldsStackIdx[i];\n" -" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" -" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" -" gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" -" }\n" -" // in case it couldn't consume any pair. Flush them\n" -" // todo. Serial batch worth while?\n" -" if( ldsStackEnd == 0 )\n" -" {\n" -" for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)\n" -" {\n" -" int idx = m_start + ldsRingElem[i].m_idx;\n" -" int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" -" gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" -" int curBatch = 100+i;\n" -" if (maxBatch < curBatch)\n" -" maxBatch = curBatch;\n" -" \n" -" gConstraintsOut[ dstIdx ].m_batchIdx = curBatch;\n" -" \n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx == 0 ) ldsRingEnd = 0;\n" -" }\n" -" if( lIdx == 0 ) ldsStackEnd = 0;\n" -" GROUP_LDS_BARRIER;\n" -" // termination\n" -" if( ldsGEnd == m_n && ldsRingEnd == 0 )\n" -" break;\n" -" }\n" -" if( lIdx == 0 )\n" -" {\n" -" if (maxBatch < ie)\n" -" maxBatch=ie;\n" -" batchSizes[wgIdx]=maxBatch;\n" -" }\n" -"}\n" -; +static const char* batchingKernelsCL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Takahiro Harada\n" + "#ifndef B3_CONTACT4DATA_H\n" + "#define B3_CONTACT4DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "typedef struct b3Contact4Data b3Contact4Data_t;\n" + "struct b3Contact4Data\n" + "{\n" + " b3Float4 m_worldPosB[4];\n" + "// b3Float4 m_localPosA[4];\n" + "// b3Float4 m_localPosB[4];\n" + " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" + " unsigned short m_restituitionCoeffCmp;\n" + " unsigned short m_frictionCoeffCmp;\n" + " int m_batchIdx;\n" + " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" + " int m_bodyBPtrAndSignBit;\n" + " int m_childIndexA;\n" + " int m_childIndexB;\n" + " int m_unused1;\n" + " int m_unused2;\n" + "};\n" + "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" + "{\n" + " return (int)contact->m_worldNormalOnB.w;\n" + "};\n" + "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" + "{\n" + " contact->m_worldNormalOnB.w = (float)numPoints;\n" + "};\n" + "#endif //B3_CONTACT4DATA_H\n" + "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile __global int*\n" + "#endif\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define make_float4 (float4)\n" + "#define make_float2 (float2)\n" + "#define make_uint4 (uint4)\n" + "#define make_int4 (int4)\n" + "#define make_uint2 (uint2)\n" + "#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "#define WG_SIZE 64\n" + "typedef struct \n" + "{\n" + " int m_n;\n" + " int m_start;\n" + " int m_staticIdx;\n" + " int m_paddings[1];\n" + "} ConstBuffer;\n" + "typedef struct \n" + "{\n" + " int m_a;\n" + " int m_b;\n" + " u32 m_idx;\n" + "}Elem;\n" + "#define STACK_SIZE (WG_SIZE*10)\n" + "//#define STACK_SIZE (WG_SIZE)\n" + "#define RING_SIZE 1024\n" + "#define RING_SIZE_MASK (RING_SIZE-1)\n" + "#define CHECK_SIZE (WG_SIZE)\n" + "#define GET_RING_CAPACITY (RING_SIZE - ldsRingEnd)\n" + "#define RING_END ldsTmp\n" + "u32 readBuf(__local u32* buff, int idx)\n" + "{\n" + " idx = idx % (32*CHECK_SIZE);\n" + " int bitIdx = idx%32;\n" + " int bufIdx = idx/32;\n" + " return buff[bufIdx] & (1<<bitIdx);\n" + "}\n" + "void writeBuf(__local u32* buff, int idx)\n" + "{\n" + " idx = idx % (32*CHECK_SIZE);\n" + " int bitIdx = idx%32;\n" + " int bufIdx = idx/32;\n" + "// buff[bufIdx] |= (1<<bitIdx);\n" + " atom_or( &buff[bufIdx], (1<<bitIdx) );\n" + "}\n" + "u32 tryWrite(__local u32* buff, int idx)\n" + "{\n" + " idx = idx % (32*CHECK_SIZE);\n" + " int bitIdx = idx%32;\n" + " int bufIdx = idx/32;\n" + " u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" + " return ((ans >> bitIdx)&1) == 0;\n" + "}\n" + "// batching on the GPU\n" + "__kernel void CreateBatches( __global const struct b3Contact4Data* gConstraints, __global struct b3Contact4Data* gConstraintsOut,\n" + " __global const u32* gN, __global const u32* gStart, __global int* batchSizes, \n" + " int m_staticIdx )\n" + "{\n" + " __local u32 ldsStackIdx[STACK_SIZE];\n" + " __local u32 ldsStackEnd;\n" + " __local Elem ldsRingElem[RING_SIZE];\n" + " __local u32 ldsRingEnd;\n" + " __local u32 ldsTmp;\n" + " __local u32 ldsCheckBuffer[CHECK_SIZE];\n" + " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" + " __local u32 ldsGEnd;\n" + " __local u32 ldsDstEnd;\n" + " int wgIdx = GET_GROUP_IDX;\n" + " int lIdx = GET_LOCAL_IDX;\n" + " \n" + " const int m_n = gN[wgIdx];\n" + " const int m_start = gStart[wgIdx];\n" + " \n" + " if( lIdx == 0 )\n" + " {\n" + " ldsRingEnd = 0;\n" + " ldsGEnd = 0;\n" + " ldsStackEnd = 0;\n" + " ldsDstEnd = m_start;\n" + " }\n" + " \n" + " \n" + " \n" + "// while(1)\n" + "//was 250\n" + " int ie=0;\n" + " int maxBatch = 0;\n" + " for(ie=0; ie<50; ie++)\n" + " {\n" + " ldsFixedBuffer[lIdx] = 0;\n" + " for(int giter=0; giter<4; giter++)\n" + " {\n" + " int ringCap = GET_RING_CAPACITY;\n" + " \n" + " // 1. fill ring\n" + " if( ldsGEnd < m_n )\n" + " {\n" + " while( ringCap > WG_SIZE )\n" + " {\n" + " if( ldsGEnd >= m_n ) break;\n" + " if( lIdx < ringCap - WG_SIZE )\n" + " {\n" + " int srcIdx;\n" + " AtomInc1( ldsGEnd, srcIdx );\n" + " if( srcIdx < m_n )\n" + " {\n" + " int dstIdx;\n" + " AtomInc1( ldsRingEnd, dstIdx );\n" + " \n" + " int a = gConstraints[m_start+srcIdx].m_bodyAPtrAndSignBit;\n" + " int b = gConstraints[m_start+srcIdx].m_bodyBPtrAndSignBit;\n" + " ldsRingElem[dstIdx].m_a = (a>b)? b:a;\n" + " ldsRingElem[dstIdx].m_b = (a>b)? a:b;\n" + " ldsRingElem[dstIdx].m_idx = srcIdx;\n" + " }\n" + " }\n" + " ringCap = GET_RING_CAPACITY;\n" + " }\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " \n" + " // 2. fill stack\n" + " __local Elem* dst = ldsRingElem;\n" + " if( lIdx == 0 ) RING_END = 0;\n" + " int srcIdx=lIdx;\n" + " int end = ldsRingEnd;\n" + " {\n" + " for(int ii=0; ii<end; ii+=WG_SIZE, srcIdx+=WG_SIZE)\n" + " {\n" + " Elem e;\n" + " if(srcIdx<end) e = ldsRingElem[srcIdx];\n" + " bool done = (srcIdx<end)?false:true;\n" + " for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) ldsCheckBuffer[lIdx] = 0;\n" + " \n" + " if( !done )\n" + " {\n" + " int aUsed = readBuf( ldsFixedBuffer, abs(e.m_a));\n" + " int bUsed = readBuf( ldsFixedBuffer, abs(e.m_b));\n" + " if( aUsed==0 && bUsed==0 )\n" + " {\n" + " int aAvailable=1;\n" + " int bAvailable=1;\n" + " int ea = abs(e.m_a);\n" + " int eb = abs(e.m_b);\n" + " bool aStatic = (e.m_a<0) ||(ea==m_staticIdx);\n" + " bool bStatic = (e.m_b<0) ||(eb==m_staticIdx);\n" + " \n" + " if (!aStatic)\n" + " aAvailable = tryWrite( ldsCheckBuffer, ea );\n" + " if (!bStatic)\n" + " bAvailable = tryWrite( ldsCheckBuffer, eb );\n" + " \n" + " //aAvailable = aStatic? 1: aAvailable;\n" + " //bAvailable = bStatic? 1: bAvailable;\n" + " bool success = (aAvailable && bAvailable);\n" + " if(success)\n" + " {\n" + " \n" + " if (!aStatic)\n" + " writeBuf( ldsFixedBuffer, ea );\n" + " if (!bStatic)\n" + " writeBuf( ldsFixedBuffer, eb );\n" + " }\n" + " done = success;\n" + " }\n" + " }\n" + " // put it aside\n" + " if(srcIdx<end)\n" + " {\n" + " if( done )\n" + " {\n" + " int dstIdx; AtomInc1( ldsStackEnd, dstIdx );\n" + " if( dstIdx < STACK_SIZE )\n" + " ldsStackIdx[dstIdx] = e.m_idx;\n" + " else{\n" + " done = false;\n" + " AtomAdd( ldsStackEnd, -1 );\n" + " }\n" + " }\n" + " if( !done )\n" + " {\n" + " int dstIdx; AtomInc1( RING_END, dstIdx );\n" + " dst[dstIdx] = e;\n" + " }\n" + " }\n" + " // if filled, flush\n" + " if( ldsStackEnd == STACK_SIZE )\n" + " {\n" + " for(int i=lIdx; i<STACK_SIZE; i+=WG_SIZE)\n" + " {\n" + " int idx = m_start + ldsStackIdx[i];\n" + " int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" + " gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" + " gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" + " }\n" + " if( lIdx == 0 ) ldsStackEnd = 0;\n" + " //for(int i=lIdx; i<CHECK_SIZE; i+=WG_SIZE) \n" + " ldsFixedBuffer[lIdx] = 0;\n" + " }\n" + " }\n" + " }\n" + " if( lIdx == 0 ) ldsRingEnd = RING_END;\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " for(int i=lIdx; i<ldsStackEnd; i+=WG_SIZE)\n" + " {\n" + " int idx = m_start + ldsStackIdx[i];\n" + " int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" + " gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" + " gConstraintsOut[ dstIdx ].m_batchIdx = ie;\n" + " }\n" + " // in case it couldn't consume any pair. Flush them\n" + " // todo. Serial batch worth while?\n" + " if( ldsStackEnd == 0 )\n" + " {\n" + " for(int i=lIdx; i<ldsRingEnd; i+=WG_SIZE)\n" + " {\n" + " int idx = m_start + ldsRingElem[i].m_idx;\n" + " int dstIdx; AtomInc1( ldsDstEnd, dstIdx );\n" + " gConstraintsOut[ dstIdx ] = gConstraints[ idx ];\n" + " int curBatch = 100+i;\n" + " if (maxBatch < curBatch)\n" + " maxBatch = curBatch;\n" + " \n" + " gConstraintsOut[ dstIdx ].m_batchIdx = curBatch;\n" + " \n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " if( lIdx == 0 ) ldsRingEnd = 0;\n" + " }\n" + " if( lIdx == 0 ) ldsStackEnd = 0;\n" + " GROUP_LDS_BARRIER;\n" + " // termination\n" + " if( ldsGEnd == m_n && ldsRingEnd == 0 )\n" + " break;\n" + " }\n" + " if( lIdx == 0 )\n" + " {\n" + " if (maxBatch < ie)\n" + " maxBatch=ie;\n" + " batchSizes[wgIdx]=maxBatch;\n" + " }\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h index 1e5957adae..05800656cb 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.h @@ -1,291 +1,290 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* batchingKernelsNewCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile __global int*\n" -"#endif\n" -"#define SIMD_WIDTH 64\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"#define WG_SIZE 64\n" -"typedef struct \n" -"{\n" -" int m_n;\n" -" int m_start;\n" -" int m_staticIdx;\n" -" int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct \n" -"{\n" -" int m_a;\n" -" int m_b;\n" -" u32 m_idx;\n" -"}Elem;\n" -"// batching on the GPU\n" -"__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" -"{\n" -" int wgIdx = GET_GROUP_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" \n" -" const int m_n = gN[wgIdx];\n" -" const int m_start = gStart[wgIdx];\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" for (int i=0;i<m_n;i++)\n" -" {\n" -" int srcIdx = i+m_start;\n" -" int batchIndex = i;\n" -" gConstraints[ srcIdx ].m_batchIdx = batchIndex; \n" -" }\n" -" }\n" -"}\n" -"#define CHECK_SIZE (WG_SIZE)\n" -"u32 readBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" return buff[bufIdx] & (1<<bitIdx);\n" -"}\n" -"void writeBuf(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" buff[bufIdx] |= (1<<bitIdx);\n" -" //atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -"}\n" -"u32 tryWrite(__local u32* buff, int idx)\n" -"{\n" -" idx = idx % (32*CHECK_SIZE);\n" -" int bitIdx = idx%32;\n" -" int bufIdx = idx/32;\n" -" u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" -" return ((ans >> bitIdx)&1) == 0;\n" -"}\n" -"// batching on the GPU\n" -"__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx )\n" -"{\n" -" int wgIdx = GET_GROUP_IDX;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" const int numConstraints = gN[wgIdx];\n" -" const int m_start = gStart[wgIdx];\n" -" b3Contact4Data_t tmp;\n" -" \n" -" __local u32 ldsFixedBuffer[CHECK_SIZE];\n" -" \n" -" \n" -" \n" -" \n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" \n" -" \n" -" __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n" -" \n" -" \n" -" int numValidConstraints = 0;\n" -" int batchIdx = 0;\n" -" while( numValidConstraints < numConstraints)\n" -" {\n" -" int nCurrentBatch = 0;\n" -" // clear flag\n" -" \n" -" for(int i=0; i<CHECK_SIZE; i++) \n" -" ldsFixedBuffer[i] = 0; \n" -" for(int i=numValidConstraints; i<numConstraints; i++)\n" -" {\n" -" int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n" -" int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n" -" int bodyA = abs(bodyAS);\n" -" int bodyB = abs(bodyBS);\n" -" bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;\n" -" bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;\n" -" int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);\n" -" int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);\n" -" \n" -" if( aUnavailable==0 && bUnavailable==0 ) // ok\n" -" {\n" -" if (!aIsStatic)\n" -" {\n" -" writeBuf( ldsFixedBuffer, bodyA );\n" -" }\n" -" if (!bIsStatic)\n" -" {\n" -" writeBuf( ldsFixedBuffer, bodyB );\n" -" }\n" -" cs[i].m_batchIdx = batchIdx;\n" -" if (i!=numValidConstraints)\n" -" {\n" -" tmp = cs[i];\n" -" cs[i] = cs[numValidConstraints];\n" -" cs[numValidConstraints] = tmp;\n" -" }\n" -" numValidConstraints++;\n" -" \n" -" nCurrentBatch++;\n" -" if( nCurrentBatch == SIMD_WIDTH)\n" -" {\n" -" nCurrentBatch = 0;\n" -" for(int i=0; i<CHECK_SIZE; i++) \n" -" ldsFixedBuffer[i] = 0;\n" -" \n" -" }\n" -" }\n" -" }//for\n" -" batchIdx ++;\n" -" }//while\n" -" \n" -" batchSizes[wgIdx] = batchIdx;\n" -" }//if( lIdx == 0 )\n" -" \n" -" //return batchIdx;\n" -"}\n" -; +static const char* batchingKernelsNewCL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Erwin Coumans\n" + "#ifndef B3_CONTACT4DATA_H\n" + "#define B3_CONTACT4DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "typedef struct b3Contact4Data b3Contact4Data_t;\n" + "struct b3Contact4Data\n" + "{\n" + " b3Float4 m_worldPosB[4];\n" + "// b3Float4 m_localPosA[4];\n" + "// b3Float4 m_localPosB[4];\n" + " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" + " unsigned short m_restituitionCoeffCmp;\n" + " unsigned short m_frictionCoeffCmp;\n" + " int m_batchIdx;\n" + " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" + " int m_bodyBPtrAndSignBit;\n" + " int m_childIndexA;\n" + " int m_childIndexB;\n" + " int m_unused1;\n" + " int m_unused2;\n" + "};\n" + "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" + "{\n" + " return (int)contact->m_worldNormalOnB.w;\n" + "};\n" + "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" + "{\n" + " contact->m_worldNormalOnB.w = (float)numPoints;\n" + "};\n" + "#endif //B3_CONTACT4DATA_H\n" + "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile __global int*\n" + "#endif\n" + "#define SIMD_WIDTH 64\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define make_float4 (float4)\n" + "#define make_float2 (float2)\n" + "#define make_uint4 (uint4)\n" + "#define make_int4 (int4)\n" + "#define make_uint2 (uint2)\n" + "#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "#define WG_SIZE 64\n" + "typedef struct \n" + "{\n" + " int m_n;\n" + " int m_start;\n" + " int m_staticIdx;\n" + " int m_paddings[1];\n" + "} ConstBuffer;\n" + "typedef struct \n" + "{\n" + " int m_a;\n" + " int m_b;\n" + " u32 m_idx;\n" + "}Elem;\n" + "// batching on the GPU\n" + "__kernel void CreateBatchesBruteForce( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, int m_staticIdx )\n" + "{\n" + " int wgIdx = GET_GROUP_IDX;\n" + " int lIdx = GET_LOCAL_IDX;\n" + " \n" + " const int m_n = gN[wgIdx];\n" + " const int m_start = gStart[wgIdx];\n" + " \n" + " if( lIdx == 0 )\n" + " {\n" + " for (int i=0;i<m_n;i++)\n" + " {\n" + " int srcIdx = i+m_start;\n" + " int batchIndex = i;\n" + " gConstraints[ srcIdx ].m_batchIdx = batchIndex; \n" + " }\n" + " }\n" + "}\n" + "#define CHECK_SIZE (WG_SIZE)\n" + "u32 readBuf(__local u32* buff, int idx)\n" + "{\n" + " idx = idx % (32*CHECK_SIZE);\n" + " int bitIdx = idx%32;\n" + " int bufIdx = idx/32;\n" + " return buff[bufIdx] & (1<<bitIdx);\n" + "}\n" + "void writeBuf(__local u32* buff, int idx)\n" + "{\n" + " idx = idx % (32*CHECK_SIZE);\n" + " int bitIdx = idx%32;\n" + " int bufIdx = idx/32;\n" + " buff[bufIdx] |= (1<<bitIdx);\n" + " //atom_or( &buff[bufIdx], (1<<bitIdx) );\n" + "}\n" + "u32 tryWrite(__local u32* buff, int idx)\n" + "{\n" + " idx = idx % (32*CHECK_SIZE);\n" + " int bitIdx = idx%32;\n" + " int bufIdx = idx/32;\n" + " u32 ans = (u32)atom_or( &buff[bufIdx], (1<<bitIdx) );\n" + " return ((ans >> bitIdx)&1) == 0;\n" + "}\n" + "// batching on the GPU\n" + "__kernel void CreateBatchesNew( __global struct b3Contact4Data* gConstraints, __global const u32* gN, __global const u32* gStart, __global int* batchSizes, int staticIdx )\n" + "{\n" + " int wgIdx = GET_GROUP_IDX;\n" + " int lIdx = GET_LOCAL_IDX;\n" + " const int numConstraints = gN[wgIdx];\n" + " const int m_start = gStart[wgIdx];\n" + " b3Contact4Data_t tmp;\n" + " \n" + " __local u32 ldsFixedBuffer[CHECK_SIZE];\n" + " \n" + " \n" + " \n" + " \n" + " \n" + " if( lIdx == 0 )\n" + " {\n" + " \n" + " \n" + " __global struct b3Contact4Data* cs = &gConstraints[m_start]; \n" + " \n" + " \n" + " int numValidConstraints = 0;\n" + " int batchIdx = 0;\n" + " while( numValidConstraints < numConstraints)\n" + " {\n" + " int nCurrentBatch = 0;\n" + " // clear flag\n" + " \n" + " for(int i=0; i<CHECK_SIZE; i++) \n" + " ldsFixedBuffer[i] = 0; \n" + " for(int i=numValidConstraints; i<numConstraints; i++)\n" + " {\n" + " int bodyAS = cs[i].m_bodyAPtrAndSignBit;\n" + " int bodyBS = cs[i].m_bodyBPtrAndSignBit;\n" + " int bodyA = abs(bodyAS);\n" + " int bodyB = abs(bodyBS);\n" + " bool aIsStatic = (bodyAS<0) || bodyAS==staticIdx;\n" + " bool bIsStatic = (bodyBS<0) || bodyBS==staticIdx;\n" + " int aUnavailable = aIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyA);\n" + " int bUnavailable = bIsStatic ? 0 : readBuf( ldsFixedBuffer, bodyB);\n" + " \n" + " if( aUnavailable==0 && bUnavailable==0 ) // ok\n" + " {\n" + " if (!aIsStatic)\n" + " {\n" + " writeBuf( ldsFixedBuffer, bodyA );\n" + " }\n" + " if (!bIsStatic)\n" + " {\n" + " writeBuf( ldsFixedBuffer, bodyB );\n" + " }\n" + " cs[i].m_batchIdx = batchIdx;\n" + " if (i!=numValidConstraints)\n" + " {\n" + " tmp = cs[i];\n" + " cs[i] = cs[numValidConstraints];\n" + " cs[numValidConstraints] = tmp;\n" + " }\n" + " numValidConstraints++;\n" + " \n" + " nCurrentBatch++;\n" + " if( nCurrentBatch == SIMD_WIDTH)\n" + " {\n" + " nCurrentBatch = 0;\n" + " for(int i=0; i<CHECK_SIZE; i++) \n" + " ldsFixedBuffer[i] = 0;\n" + " \n" + " }\n" + " }\n" + " }//for\n" + " batchIdx ++;\n" + " }//while\n" + " \n" + " batchSizes[wgIdx] = batchIdx;\n" + " }//if( lIdx == 0 )\n" + " \n" + " //return batchIdx;\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h index a5a432947c..6e9c53e161 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/integrateKernel.h @@ -1,433 +1,432 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* integrateKernelCL= \ -"/*\n" -"Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" -"{\n" -" \n" -" if (bodies[nodeID].m_invMass != 0.f)\n" -" {\n" -" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" -" //angular velocity\n" -" {\n" -" b3Float4 axis;\n" -" //add some hardcoded angular damping\n" -" bodies[nodeID].m_angVel.x *= angularDamping;\n" -" bodies[nodeID].m_angVel.y *= angularDamping;\n" -" bodies[nodeID].m_angVel.z *= angularDamping;\n" -" \n" -" b3Float4 angvel = bodies[nodeID].m_angVel;\n" -" float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" -" \n" -" //limit the angular motion\n" -" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" -" {\n" -" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" -" }\n" -" if(fAngle < 0.001f)\n" -" {\n" -" // use Taylor's expansions of sync function\n" -" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" -" }\n" -" else\n" -" {\n" -" // sync(fAngle) = sin(c*fAngle)/t\n" -" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" -" }\n" -" \n" -" b3Quat dorn;\n" -" dorn.x = axis.x;\n" -" dorn.y = axis.y;\n" -" dorn.z = axis.z;\n" -" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" -" b3Quat orn0 = bodies[nodeID].m_quat;\n" -" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" -" predictedOrn = b3QuatNormalized(predictedOrn);\n" -" bodies[nodeID].m_quat=predictedOrn;\n" -" }\n" -" //linear velocity \n" -" bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n" -" \n" -" //apply gravity\n" -" bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" -" \n" -" }\n" -" \n" -"}\n" -"inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" -"{\n" -" float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" -" \n" -" if( (body->m_invMass != 0.f))\n" -" {\n" -" //angular velocity\n" -" {\n" -" b3Float4 axis;\n" -" //add some hardcoded angular damping\n" -" body->m_angVel.x *= angularDamping;\n" -" body->m_angVel.y *= angularDamping;\n" -" body->m_angVel.z *= angularDamping;\n" -" \n" -" b3Float4 angvel = body->m_angVel;\n" -" float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" -" //limit the angular motion\n" -" if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" -" {\n" -" fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" -" }\n" -" if(fAngle < 0.001f)\n" -" {\n" -" // use Taylor's expansions of sync function\n" -" axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" -" }\n" -" else\n" -" {\n" -" // sync(fAngle) = sin(c*fAngle)/t\n" -" axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" -" }\n" -" b3Quat dorn;\n" -" dorn.x = axis.x;\n" -" dorn.y = axis.y;\n" -" dorn.z = axis.z;\n" -" dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" -" b3Quat orn0 = body->m_quat;\n" -" b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" -" predictedOrn = b3QuatNormalized(predictedOrn);\n" -" body->m_quat=predictedOrn;\n" -" }\n" -" //apply gravity\n" -" body->m_linVel += gravityAcceleration * timeStep;\n" -" //linear velocity \n" -" body->m_pos += body->m_linVel * timeStep;\n" -" \n" -" }\n" -" \n" -"}\n" -"__kernel void \n" -" integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" -"{\n" -" int nodeID = get_global_id(0);\n" -" \n" -" if( nodeID < numNodes)\n" -" {\n" -" integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n" -" }\n" -"}\n" -; +static const char* integrateKernelCL = + "/*\n" + "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Erwin Coumans\n" + "#ifndef B3_RIGIDBODY_DATA_H\n" + "#define B3_RIGIDBODY_DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "#ifndef B3_QUAT_H\n" + "#define B3_QUAT_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif\n" + "#endif\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Quat;\n" + " #define b3QuatConstArg const b3Quat\n" + " \n" + " \n" + "inline float4 b3FastNormalize4(float4 v)\n" + "{\n" + " v = (float4)(v.xyz,0.f);\n" + " return fast_normalize(v);\n" + "}\n" + " \n" + "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" + "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" + "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" + "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" + "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" + "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" + "{\n" + " b3Quat ans;\n" + " ans = b3Cross3( a, b );\n" + " ans += a.w*b+b.w*a;\n" + "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" + " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" + " return ans;\n" + "}\n" + "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" + "{\n" + " b3Quat q;\n" + " q=in;\n" + " //return b3FastNormalize4(in);\n" + " float len = native_sqrt(dot(q, q));\n" + " if(len > 0.f)\n" + " {\n" + " q *= 1.f / len;\n" + " }\n" + " else\n" + " {\n" + " q.x = q.y = q.z = 0.f;\n" + " q.w = 1.f;\n" + " }\n" + " return q;\n" + "}\n" + "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" + "{\n" + " b3Quat qInv = b3QuatInvert( q );\n" + " float4 vcpy = vec;\n" + " vcpy.w = 0.f;\n" + " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" + " return out;\n" + "}\n" + "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" + "{\n" + " return (b3Quat)(-q.xyz, q.w);\n" + "}\n" + "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" + "{\n" + " return (b3Quat)(-q.xyz, q.w);\n" + "}\n" + "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" + "{\n" + " return b3QuatRotate( b3QuatInvert( q ), vec );\n" + "}\n" + "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" + "{\n" + " return b3QuatRotate( orientation, point ) + (translation);\n" + "}\n" + " \n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "#ifndef B3_MAT3x3_H\n" + "#define B3_MAT3x3_H\n" + "#ifndef B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "typedef struct\n" + "{\n" + " b3Float4 m_row[3];\n" + "}b3Mat3x3;\n" + "#define b3Mat3x3ConstArg const b3Mat3x3\n" + "#define b3GetRow(m,row) (m.m_row[row])\n" + "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" + "{\n" + " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" + " b3Mat3x3 out;\n" + " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" + " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" + " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" + " out.m_row[0].w = 0.f;\n" + " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" + " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" + " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" + " out.m_row[1].w = 0.f;\n" + " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" + " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" + " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" + " out.m_row[2].w = 0.f;\n" + " return out;\n" + "}\n" + "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" + "{\n" + " b3Mat3x3 out;\n" + " out.m_row[0] = fabs(matIn.m_row[0]);\n" + " out.m_row[1] = fabs(matIn.m_row[1]);\n" + " out.m_row[2] = fabs(matIn.m_row[2]);\n" + " return out;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtZero();\n" + "__inline\n" + "b3Mat3x3 mtIdentity();\n" + "__inline\n" + "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" + "__inline\n" + "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" + "__inline\n" + "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" + "__inline\n" + "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" + "__inline\n" + "b3Mat3x3 mtZero()\n" + "{\n" + " b3Mat3x3 m;\n" + " m.m_row[0] = (b3Float4)(0.f);\n" + " m.m_row[1] = (b3Float4)(0.f);\n" + " m.m_row[2] = (b3Float4)(0.f);\n" + " return m;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtIdentity()\n" + "{\n" + " b3Mat3x3 m;\n" + " m.m_row[0] = (b3Float4)(1,0,0,0);\n" + " m.m_row[1] = (b3Float4)(0,1,0,0);\n" + " m.m_row[2] = (b3Float4)(0,0,1,0);\n" + " return m;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" + "{\n" + " b3Mat3x3 out;\n" + " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" + " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" + " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" + " return out;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" + "{\n" + " b3Mat3x3 transB;\n" + " transB = mtTranspose( b );\n" + " b3Mat3x3 ans;\n" + " // why this doesn't run when 0ing in the for{}\n" + " a.m_row[0].w = 0.f;\n" + " a.m_row[1].w = 0.f;\n" + " a.m_row[2].w = 0.f;\n" + " for(int i=0; i<3; i++)\n" + " {\n" + "// a.m_row[i].w = 0.f;\n" + " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" + " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" + " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" + " ans.m_row[i].w = 0.f;\n" + " }\n" + " return ans;\n" + "}\n" + "__inline\n" + "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" + "{\n" + " b3Float4 ans;\n" + " ans.x = b3Dot3F4( a.m_row[0], b );\n" + " ans.y = b3Dot3F4( a.m_row[1], b );\n" + " ans.z = b3Dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" + "{\n" + " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " b3Float4 ans;\n" + " ans.x = b3Dot3F4( a, colx );\n" + " ans.y = b3Dot3F4( a, coly );\n" + " ans.z = b3Dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "#endif\n" + "#endif //B3_MAT3x3_H\n" + "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" + "struct b3RigidBodyData\n" + "{\n" + " b3Float4 m_pos;\n" + " b3Quat m_quat;\n" + " b3Float4 m_linVel;\n" + " b3Float4 m_angVel;\n" + " int m_collidableIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "};\n" + "typedef struct b3InertiaData b3InertiaData_t;\n" + "struct b3InertiaData\n" + "{\n" + " b3Mat3x3 m_invInertiaWorld;\n" + " b3Mat3x3 m_initInvInertia;\n" + "};\n" + "#endif //B3_RIGIDBODY_DATA_H\n" + " \n" + "#ifndef B3_RIGIDBODY_DATA_H\n" + "#endif //B3_RIGIDBODY_DATA_H\n" + " \n" + "inline void integrateSingleTransform( __global b3RigidBodyData_t* bodies,int nodeID, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" + "{\n" + " \n" + " if (bodies[nodeID].m_invMass != 0.f)\n" + " {\n" + " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" + " //angular velocity\n" + " {\n" + " b3Float4 axis;\n" + " //add some hardcoded angular damping\n" + " bodies[nodeID].m_angVel.x *= angularDamping;\n" + " bodies[nodeID].m_angVel.y *= angularDamping;\n" + " bodies[nodeID].m_angVel.z *= angularDamping;\n" + " \n" + " b3Float4 angvel = bodies[nodeID].m_angVel;\n" + " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" + " \n" + " //limit the angular motion\n" + " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" + " {\n" + " fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" + " }\n" + " if(fAngle < 0.001f)\n" + " {\n" + " // use Taylor's expansions of sync function\n" + " axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" + " }\n" + " else\n" + " {\n" + " // sync(fAngle) = sin(c*fAngle)/t\n" + " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" + " }\n" + " \n" + " b3Quat dorn;\n" + " dorn.x = axis.x;\n" + " dorn.y = axis.y;\n" + " dorn.z = axis.z;\n" + " dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" + " b3Quat orn0 = bodies[nodeID].m_quat;\n" + " b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" + " predictedOrn = b3QuatNormalized(predictedOrn);\n" + " bodies[nodeID].m_quat=predictedOrn;\n" + " }\n" + " //linear velocity \n" + " bodies[nodeID].m_pos += bodies[nodeID].m_linVel * timeStep;\n" + " \n" + " //apply gravity\n" + " bodies[nodeID].m_linVel += gravityAcceleration * timeStep;\n" + " \n" + " }\n" + " \n" + "}\n" + "inline void b3IntegrateTransform( __global b3RigidBodyData_t* body, float timeStep, float angularDamping, b3Float4ConstArg gravityAcceleration)\n" + "{\n" + " float BT_GPU_ANGULAR_MOTION_THRESHOLD = (0.25f * 3.14159254f);\n" + " \n" + " if( (body->m_invMass != 0.f))\n" + " {\n" + " //angular velocity\n" + " {\n" + " b3Float4 axis;\n" + " //add some hardcoded angular damping\n" + " body->m_angVel.x *= angularDamping;\n" + " body->m_angVel.y *= angularDamping;\n" + " body->m_angVel.z *= angularDamping;\n" + " \n" + " b3Float4 angvel = body->m_angVel;\n" + " float fAngle = b3Sqrt(b3Dot3F4(angvel, angvel));\n" + " //limit the angular motion\n" + " if(fAngle*timeStep > BT_GPU_ANGULAR_MOTION_THRESHOLD)\n" + " {\n" + " fAngle = BT_GPU_ANGULAR_MOTION_THRESHOLD / timeStep;\n" + " }\n" + " if(fAngle < 0.001f)\n" + " {\n" + " // use Taylor's expansions of sync function\n" + " axis = angvel * (0.5f*timeStep-(timeStep*timeStep*timeStep)*0.020833333333f * fAngle * fAngle);\n" + " }\n" + " else\n" + " {\n" + " // sync(fAngle) = sin(c*fAngle)/t\n" + " axis = angvel * ( b3Sin(0.5f * fAngle * timeStep) / fAngle);\n" + " }\n" + " b3Quat dorn;\n" + " dorn.x = axis.x;\n" + " dorn.y = axis.y;\n" + " dorn.z = axis.z;\n" + " dorn.w = b3Cos(fAngle * timeStep * 0.5f);\n" + " b3Quat orn0 = body->m_quat;\n" + " b3Quat predictedOrn = b3QuatMul(dorn, orn0);\n" + " predictedOrn = b3QuatNormalized(predictedOrn);\n" + " body->m_quat=predictedOrn;\n" + " }\n" + " //apply gravity\n" + " body->m_linVel += gravityAcceleration * timeStep;\n" + " //linear velocity \n" + " body->m_pos += body->m_linVel * timeStep;\n" + " \n" + " }\n" + " \n" + "}\n" + "__kernel void \n" + " integrateTransformsKernel( __global b3RigidBodyData_t* bodies,const int numNodes, float timeStep, float angularDamping, float4 gravityAcceleration)\n" + "{\n" + " int nodeID = get_global_id(0);\n" + " \n" + " if( nodeID < numNodes)\n" + " {\n" + " integrateSingleTransform(bodies,nodeID, timeStep, angularDamping,gravityAcceleration);\n" + " }\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h index d48ecf6ea6..c94b55851e 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/jointSolver.h @@ -1,721 +1,720 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveConstraintRowsCL= \ -"/*\n" -"Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#define B3_CONSTRAINT_FLAG_ENABLED 1\n" -"#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n" -"#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n" -"#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n" -"#define B3_INFINITY 1e30f\n" -"#define mymake_float4 (float4)\n" -"__inline float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = mymake_float4(a.xyz,0.f);\n" -" float4 b1 = mymake_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"typedef float4 Quaternion;\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertiaWorld;\n" -" Matrix3x3 m_initInvInertia;\n" -"} BodyInertia;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_basis;//orientation\n" -" float4 m_origin;//transform\n" -"}b3Transform;\n" -"typedef struct\n" -"{\n" -"// b3Transform m_worldTransformUnused;\n" -" float4 m_deltaLinearVelocity;\n" -" float4 m_deltaAngularVelocity;\n" -" float4 m_angularFactor;\n" -" float4 m_linearFactor;\n" -" float4 m_invMass;\n" -" float4 m_pushVelocity;\n" -" float4 m_turnVelocity;\n" -" float4 m_linearVelocity;\n" -" float4 m_angularVelocity;\n" -" union \n" -" {\n" -" void* m_originalBody;\n" -" int m_originalBodyIndex;\n" -" };\n" -" int padding[3];\n" -"} b3GpuSolverBody;\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" unsigned int m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} b3RigidBodyCL;\n" -"typedef struct\n" -"{\n" -" float4 m_relpos1CrossNormal;\n" -" float4 m_contactNormal;\n" -" float4 m_relpos2CrossNormal;\n" -" //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n" -" float4 m_angularComponentA;\n" -" float4 m_angularComponentB;\n" -" \n" -" float m_appliedPushImpulse;\n" -" float m_appliedImpulse;\n" -" int m_padding1;\n" -" int m_padding2;\n" -" float m_friction;\n" -" float m_jacDiagABInv;\n" -" float m_rhs;\n" -" float m_cfm;\n" -" \n" -" float m_lowerLimit;\n" -" float m_upperLimit;\n" -" float m_rhsPenetration;\n" -" int m_originalConstraint;\n" -" int m_overrideNumSolverIterations;\n" -" int m_frictionIndex;\n" -" int m_solverBodyIdA;\n" -" int m_solverBodyIdB;\n" -"} b3SolverConstraint;\n" -"typedef struct \n" -"{\n" -" int m_bodyAPtrAndSignBit;\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_originalConstraintIndex;\n" -" int m_batchId;\n" -"} b3BatchConstraint;\n" -"typedef struct \n" -"{\n" -" int m_constraintType;\n" -" int m_rbA;\n" -" int m_rbB;\n" -" float m_breakingImpulseThreshold;\n" -" float4 m_pivotInA;\n" -" float4 m_pivotInB;\n" -" Quaternion m_relTargetAB;\n" -" int m_flags;\n" -" int m_padding[3];\n" -"} b3GpuGenericConstraint;\n" -"/*b3Transform getWorldTransform(b3RigidBodyCL* rb)\n" -"{\n" -" b3Transform newTrans;\n" -" newTrans.setOrigin(rb->m_pos);\n" -" newTrans.setRotation(rb->m_quat);\n" -" return newTrans;\n" -"}*/\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" v = mymake_float4(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n" -"{\n" -" body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n" -" body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n" -"}\n" -"void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n" -"{\n" -" float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n" -" float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n" -" float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n" -" deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n" -" deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n" -" float sum = c->m_appliedImpulse + deltaImpulse;\n" -" if (sum < c->m_lowerLimit)\n" -" {\n" -" deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse;\n" -" c->m_appliedImpulse = c->m_lowerLimit;\n" -" }\n" -" else if (sum > c->m_upperLimit) \n" -" {\n" -" deltaImpulse = c->m_upperLimit-c->m_appliedImpulse;\n" -" c->m_appliedImpulse = c->m_upperLimit;\n" -" }\n" -" else\n" -" {\n" -" c->m_appliedImpulse = sum;\n" -" }\n" -" internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n" -" internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n" -"}\n" -"__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" -" __global b3BatchConstraint* batchConstraints,\n" -" __global b3SolverConstraint* rows,\n" -" __global unsigned int* numConstraintRowsInfo1, \n" -" __global unsigned int* rowOffsets,\n" -" __global b3GpuGenericConstraint* constraints,\n" -" int batchOffset,\n" -" int numConstraintsInBatch\n" -" )\n" -"{\n" -" int b = get_global_id(0);\n" -" if (b>=numConstraintsInBatch)\n" -" return;\n" -" __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" -" int originalConstraintIndex = c->m_originalConstraintIndex;\n" -" if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" -" {\n" -" int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];\n" -" int rowOffset = rowOffsets[originalConstraintIndex];\n" -" for (int jj=0;jj<numConstraintRows;jj++)\n" -" {\n" -" __global b3SolverConstraint* constraint = &rows[rowOffset+jj];\n" -" resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" -" }\n" -" }\n" -"};\n" -"__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numBodies)\n" -" return;\n" -" __global b3GpuSolverBody* solverBody = &solverBodies[i];\n" -" __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n" -" solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" -" solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f);\n" -" solverBody->m_originalBodyIndex = i;\n" -" solverBody->m_angularFactor = (float4)(1,1,1,0);\n" -" solverBody->m_linearFactor = (float4) (1,1,1,0);\n" -" solverBody->m_linearVelocity = bodyCL->m_linVel;\n" -" solverBody->m_angularVelocity = bodyCL->m_angVel;\n" -"}\n" -"__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" -"{\n" -" int cid = get_global_id(0);\n" -" if (cid>=numConstraints)\n" -" return;\n" -" int numRows = numConstraintRows[cid];\n" -" if (numRows)\n" -" {\n" -" for (int i=0;i<numRows;i++)\n" -" {\n" -" int rowIndex = rowOffsets[cid]+i;\n" -" float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;\n" -" if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)\n" -" {\n" -" constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n" -" }\n" -" }\n" -" }\n" -"}\n" -"__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConstraints)\n" -" return;\n" -" __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -" switch (constraint->m_constraintType)\n" -" {\n" -" case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" -" {\n" -" infos[i] = 3;\n" -" break;\n" -" }\n" -" case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" -" {\n" -" infos[i] = 6;\n" -" break;\n" -" }\n" -" default:\n" -" {\n" -" }\n" -" }\n" -"}\n" -"__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" -" __global b3BatchConstraint* batchConstraints, \n" -" __global b3GpuGenericConstraint* constraints,\n" -" __global b3RigidBodyCL* bodies,\n" -" int numConstraints)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConstraints)\n" -" return;\n" -" int rbA = constraints[i].m_rbA;\n" -" int rbB = constraints[i].m_rbB;\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA;\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB;\n" -" batchConstraints[i].m_batchId = -1;\n" -" batchConstraints[i].m_originalConstraintIndex = i;\n" -"}\n" -"typedef struct\n" -"{\n" -" // integrator parameters: frames per second (1/stepsize), default error\n" -" // reduction parameter (0..1).\n" -" float fps,erp;\n" -" // for the first and second body, pointers to two (linear and angular)\n" -" // n*3 jacobian sub matrices, stored by rows. these matrices will have\n" -" // been initialized to 0 on entry. if the second body is zero then the\n" -" // J2xx pointers may be 0.\n" -" union \n" -" {\n" -" __global float4* m_J1linearAxisFloat4;\n" -" __global float* m_J1linearAxis;\n" -" };\n" -" union\n" -" {\n" -" __global float4* m_J1angularAxisFloat4;\n" -" __global float* m_J1angularAxis;\n" -" };\n" -" union\n" -" {\n" -" __global float4* m_J2linearAxisFloat4;\n" -" __global float* m_J2linearAxis;\n" -" };\n" -" union\n" -" {\n" -" __global float4* m_J2angularAxisFloat4;\n" -" __global float* m_J2angularAxis;\n" -" };\n" -" // elements to jump from one row to the next in J's\n" -" int rowskip;\n" -" // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n" -" // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n" -" // set to a constant value (typically very small or zero) value on entry.\n" -" __global float* m_constraintError;\n" -" __global float* cfm;\n" -" // lo and hi limits for variables (set to -/+ infinity on entry).\n" -" __global float* m_lowerLimit;\n" -" __global float* m_upperLimit;\n" -" // findex vector for variables. see the LCP solver interface for a\n" -" // description of what this does. this is set to -1 on entry.\n" -" // note that the returned indexes are relative to the first index of\n" -" // the constraint.\n" -" __global int *findex;\n" -" // number of solver iterations\n" -" int m_numIterations;\n" -" //damping of the velocity\n" -" float m_damping;\n" -"} b3GpuConstraintInfo2;\n" -"void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n" -"{\n" -" *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n" -" *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n" -" *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n" -"}\n" -"void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n" -"{\n" -" float4 posA = bodies[constraint->m_rbA].m_pos;\n" -" Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n" -" float4 posB = bodies[constraint->m_rbB].m_pos;\n" -" Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n" -" // anchor points in global coordinates with respect to body PORs.\n" -" \n" -" // set jacobian\n" -" info->m_J1linearAxis[0] = 1;\n" -" info->m_J1linearAxis[info->rowskip+1] = 1;\n" -" info->m_J1linearAxis[2*info->rowskip+2] = 1;\n" -" float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n" -" {\n" -" __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n" -" __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n" -" __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip);\n" -" float4 a1neg = -a1;\n" -" getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2);\n" -" }\n" -" if (info->m_J2linearAxis)\n" -" {\n" -" info->m_J2linearAxis[0] = -1;\n" -" info->m_J2linearAxis[info->rowskip+1] = -1;\n" -" info->m_J2linearAxis[2*info->rowskip+2] = -1;\n" -" }\n" -" \n" -" float4 a2 = qtRotate(rotB,constraint->m_pivotInB);\n" -" \n" -" {\n" -" // float4 a2n = -a2;\n" -" __global float4* angular0 = (__global float4*)(info->m_J2angularAxis);\n" -" __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip);\n" -" __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip);\n" -" getSkewSymmetricMatrix(a2,angular0,angular1,angular2);\n" -" }\n" -" \n" -" // set right hand side\n" -"// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n" -" float currERP = info->erp;\n" -" float k = info->fps * currERP;\n" -" int j;\n" -" float4 result = a2 + posB - a1 - posA;\n" -" float* resultPtr = &result;\n" -" for (j=0; j<3; j++)\n" -" {\n" -" info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n" -" }\n" -"}\n" -"Quaternion nearest( Quaternion first, Quaternion qd)\n" -"{\n" -" Quaternion diff,sum;\n" -" diff = first- qd;\n" -" sum = first + qd;\n" -" \n" -" if( dot(diff,diff) < dot(sum,sum) )\n" -" return qd;\n" -" return (-qd);\n" -"}\n" -"float b3Acos(float x) \n" -"{ \n" -" if (x<-1) \n" -" x=-1; \n" -" if (x>1) \n" -" x=1;\n" -" return acos(x); \n" -"}\n" -"float getAngle(Quaternion orn)\n" -"{\n" -" if (orn.w>=1.f)\n" -" orn.w=1.f;\n" -" float s = 2.f * b3Acos(orn.w);\n" -" return s;\n" -"}\n" -"void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" -"{\n" -" Quaternion orn1 = nearest(orn0,orn1a);\n" -" \n" -" Quaternion dorn = qtMul(orn1,qtInvert(orn0));\n" -" *angle = getAngle(dorn);\n" -" *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);\n" -" \n" -" //check for axis length\n" -" float len = dot3F4(*axis,*axis);\n" -" if (len < FLT_EPSILON*FLT_EPSILON)\n" -" *axis = (float4)(1,0,0,0);\n" -" else\n" -" *axis /= sqrt(len);\n" -"}\n" -"void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" -"{\n" -" Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n" -" Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n" -" int s = info->rowskip;\n" -" int start_index = start_row * s;\n" -" // 3 rows to make body rotations equal\n" -" info->m_J1angularAxis[start_index] = 1;\n" -" info->m_J1angularAxis[start_index + s + 1] = 1;\n" -" info->m_J1angularAxis[start_index + s*2+2] = 1;\n" -" if ( info->m_J2angularAxis)\n" -" {\n" -" info->m_J2angularAxis[start_index] = -1;\n" -" info->m_J2angularAxis[start_index + s+1] = -1;\n" -" info->m_J2angularAxis[start_index + s*2+2] = -1;\n" -" }\n" -" \n" -" float currERP = info->erp;\n" -" float k = info->fps * currERP;\n" -" float4 diff;\n" -" float angle;\n" -" float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));\n" -" \n" -" calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);\n" -" diff*=-angle;\n" -" \n" -" float* resultPtr = &diff;\n" -" \n" -" for (int j=0; j<3; j++)\n" -" {\n" -" info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" -" }\n" -" \n" -"}\n" -"__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numBodies)\n" -" return;\n" -" if (bodies[i].m_invMass)\n" -" {\n" -"// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP)\n" -" {\n" -" bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity;\n" -" }\n" -"// if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP)\n" -" {\n" -" bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity;\n" -" } \n" -" }\n" -"}\n" -"__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, \n" -" __global unsigned int* infos, \n" -" __global unsigned int* constraintRowOffsets, \n" -" __global b3GpuGenericConstraint* constraints, \n" -" __global b3BatchConstraint* batchConstraints, \n" -" __global b3RigidBodyCL* bodies,\n" -" __global BodyInertia* inertias,\n" -" __global b3GpuSolverBody* solverBodies,\n" -" float timeStep,\n" -" float globalErp,\n" -" float globalCfm,\n" -" float globalDamping,\n" -" int globalNumIterations,\n" -" int numConstraints)\n" -"{\n" -" int i = get_global_id(0);\n" -" if (i>=numConstraints)\n" -" return;\n" -" \n" -" //for now, always initialize the batch info\n" -" int info1 = infos[i];\n" -" \n" -" __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" -" __global b3GpuGenericConstraint* constraint = &constraints[i];\n" -" __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" -" __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" -" int solverBodyIdA = constraint->m_rbA;\n" -" int solverBodyIdB = constraint->m_rbB;\n" -" __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" -" __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" -" if (rbA->m_invMass)\n" -" {\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" -" } else\n" -" {\n" -"// if (!solverBodyIdA)\n" -"// m_staticIdx = 0;\n" -" batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" -" }\n" -" if (rbB->m_invMass)\n" -" {\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" -" } else\n" -" {\n" -"// if (!solverBodyIdB)\n" -"// m_staticIdx = 0;\n" -" batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" -" }\n" -" if (info1)\n" -" {\n" -" int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" -"// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" -" // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" -" int j;\n" -" for ( j=0;j<info1;j++)\n" -" {\n" -"// memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint));\n" -" currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_appliedImpulse = 0.f;\n" -" currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" -" currentConstraintRow[j].m_cfm = 0.f;\n" -" currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_friction = 0.f;\n" -" currentConstraintRow[j].m_frictionIndex = 0;\n" -" currentConstraintRow[j].m_jacDiagABInv = 0.f;\n" -" currentConstraintRow[j].m_lowerLimit = 0.f;\n" -" currentConstraintRow[j].m_upperLimit = 0.f;\n" -" currentConstraintRow[j].m_originalConstraint = i;\n" -" currentConstraintRow[j].m_overrideNumSolverIterations = 0;\n" -" currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0);\n" -" currentConstraintRow[j].m_rhs = 0.f;\n" -" currentConstraintRow[j].m_rhsPenetration = 0.f;\n" -" currentConstraintRow[j].m_solverBodyIdA = 0;\n" -" currentConstraintRow[j].m_solverBodyIdB = 0;\n" -" \n" -" currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;\n" -" currentConstraintRow[j].m_upperLimit = B3_INFINITY;\n" -" currentConstraintRow[j].m_appliedImpulse = 0.f;\n" -" currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" -" currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;\n" -" currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;\n" -" currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; \n" -" }\n" -" bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" -" bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" -" bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n" -" bodyAPtr->m_turnVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n" -" bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n" -" int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" -" \n" -" b3GpuConstraintInfo2 info2;\n" -" info2.fps = 1.f/timeStep;\n" -" info2.erp = globalErp;\n" -" info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal;\n" -" info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal;\n" -" info2.m_J2linearAxisFloat4 = 0;\n" -" info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n" -" info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" -" ///the size of b3SolverConstraint needs be a multiple of float\n" -"// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n" -" info2.m_constraintError = ¤tConstraintRow->m_rhs;\n" -" currentConstraintRow->m_cfm = globalCfm;\n" -" info2.m_damping = globalDamping;\n" -" info2.cfm = ¤tConstraintRow->m_cfm;\n" -" info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n" -" info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n" -" info2.m_numIterations = globalNumIterations;\n" -" switch (constraint->m_constraintType)\n" -" {\n" -" case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" -" {\n" -" getInfo2Point2Point(constraint,&info2,bodies);\n" -" break;\n" -" }\n" -" case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" -" {\n" -" getInfo2Point2Point(constraint,&info2,bodies);\n" -" getInfo2FixedOrientation(constraint,&info2,bodies,3);\n" -" break;\n" -" }\n" -" default:\n" -" {\n" -" }\n" -" }\n" -" ///finalize the constraint setup\n" -" for ( j=0;j<info1;j++)\n" -" {\n" -" __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j];\n" -" if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold)\n" -" {\n" -" solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n" -" }\n" -" if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n" -" {\n" -" solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n" -" }\n" -"// solverConstraint->m_originalContactPoint = constraint;\n" -" \n" -" Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n" -" {\n" -" //float4 angularFactorA(1,1,1);\n" -" float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n" -" solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n" -" }\n" -" \n" -" Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n" -" {\n" -" float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n" -" solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n" -" }\n" -" {\n" -" //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n" -" //because it gets multiplied iMJlB\n" -" float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass;\n" -" float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n" -" float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n" -" float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n" -" float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n" -" sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n" -" sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n" -" sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal);\n" -" float fsum = fabs(sum);\n" -" if (fsum>FLT_EPSILON)\n" -" {\n" -" solverConstraint->m_jacDiagABInv = 1.f/sum;\n" -" } else\n" -" {\n" -" solverConstraint->m_jacDiagABInv = 0.f;\n" -" }\n" -" }\n" -" ///fix rhs\n" -" ///todo: add force/torque accelerators\n" -" {\n" -" float rel_vel;\n" -" float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n" -" float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n" -" rel_vel = vel1Dotn+vel2Dotn;\n" -" float restitution = 0.f;\n" -" float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n" -" float velocityError = restitution - rel_vel * info2.m_damping;\n" -" float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv;\n" -" float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n" -" solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n" -" solverConstraint->m_appliedImpulse = 0.f;\n" -" }\n" -" }\n" -" }\n" -"}\n" -; +static const char* solveConstraintRowsCL = + "/*\n" + "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Erwin Coumans\n" + "#define B3_CONSTRAINT_FLAG_ENABLED 1\n" + "#define B3_GPU_POINT2POINT_CONSTRAINT_TYPE 3\n" + "#define B3_GPU_FIXED_CONSTRAINT_TYPE 4\n" + "#define MOTIONCLAMP 100000 //unused, for debugging/safety in case constraint solver fails\n" + "#define B3_INFINITY 1e30f\n" + "#define mymake_float4 (float4)\n" + "__inline float dot3F4(float4 a, float4 b)\n" + "{\n" + " float4 a1 = mymake_float4(a.xyz,0.f);\n" + " float4 b1 = mymake_float4(b.xyz,0.f);\n" + " return dot(a1, b1);\n" + "}\n" + "typedef float4 Quaternion;\n" + "typedef struct\n" + "{\n" + " float4 m_row[3];\n" + "}Matrix3x3;\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b);\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b);\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b)\n" + "{\n" + " float4 ans;\n" + " ans.x = dot3F4( a.m_row[0], b );\n" + " ans.y = dot3F4( a.m_row[1], b );\n" + " ans.z = dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b)\n" + "{\n" + " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " float4 ans;\n" + " ans.x = dot3F4( a, colx );\n" + " ans.y = dot3F4( a, coly );\n" + " ans.z = dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "typedef struct\n" + "{\n" + " Matrix3x3 m_invInertiaWorld;\n" + " Matrix3x3 m_initInvInertia;\n" + "} BodyInertia;\n" + "typedef struct\n" + "{\n" + " Matrix3x3 m_basis;//orientation\n" + " float4 m_origin;//transform\n" + "}b3Transform;\n" + "typedef struct\n" + "{\n" + "// b3Transform m_worldTransformUnused;\n" + " float4 m_deltaLinearVelocity;\n" + " float4 m_deltaAngularVelocity;\n" + " float4 m_angularFactor;\n" + " float4 m_linearFactor;\n" + " float4 m_invMass;\n" + " float4 m_pushVelocity;\n" + " float4 m_turnVelocity;\n" + " float4 m_linearVelocity;\n" + " float4 m_angularVelocity;\n" + " union \n" + " {\n" + " void* m_originalBody;\n" + " int m_originalBodyIndex;\n" + " };\n" + " int padding[3];\n" + "} b3GpuSolverBody;\n" + "typedef struct\n" + "{\n" + " float4 m_pos;\n" + " Quaternion m_quat;\n" + " float4 m_linVel;\n" + " float4 m_angVel;\n" + " unsigned int m_shapeIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "} b3RigidBodyCL;\n" + "typedef struct\n" + "{\n" + " float4 m_relpos1CrossNormal;\n" + " float4 m_contactNormal;\n" + " float4 m_relpos2CrossNormal;\n" + " //float4 m_contactNormal2;//usually m_contactNormal2 == -m_contactNormal\n" + " float4 m_angularComponentA;\n" + " float4 m_angularComponentB;\n" + " \n" + " float m_appliedPushImpulse;\n" + " float m_appliedImpulse;\n" + " int m_padding1;\n" + " int m_padding2;\n" + " float m_friction;\n" + " float m_jacDiagABInv;\n" + " float m_rhs;\n" + " float m_cfm;\n" + " \n" + " float m_lowerLimit;\n" + " float m_upperLimit;\n" + " float m_rhsPenetration;\n" + " int m_originalConstraint;\n" + " int m_overrideNumSolverIterations;\n" + " int m_frictionIndex;\n" + " int m_solverBodyIdA;\n" + " int m_solverBodyIdB;\n" + "} b3SolverConstraint;\n" + "typedef struct \n" + "{\n" + " int m_bodyAPtrAndSignBit;\n" + " int m_bodyBPtrAndSignBit;\n" + " int m_originalConstraintIndex;\n" + " int m_batchId;\n" + "} b3BatchConstraint;\n" + "typedef struct \n" + "{\n" + " int m_constraintType;\n" + " int m_rbA;\n" + " int m_rbB;\n" + " float m_breakingImpulseThreshold;\n" + " float4 m_pivotInA;\n" + " float4 m_pivotInB;\n" + " Quaternion m_relTargetAB;\n" + " int m_flags;\n" + " int m_padding[3];\n" + "} b3GpuGenericConstraint;\n" + "/*b3Transform getWorldTransform(b3RigidBodyCL* rb)\n" + "{\n" + " b3Transform newTrans;\n" + " newTrans.setOrigin(rb->m_pos);\n" + " newTrans.setRotation(rb->m_quat);\n" + " return newTrans;\n" + "}*/\n" + "__inline\n" + "float4 cross3(float4 a, float4 b)\n" + "{\n" + " return cross(a,b);\n" + "}\n" + "__inline\n" + "float4 fastNormalize4(float4 v)\n" + "{\n" + " v = mymake_float4(v.xyz,0.f);\n" + " return fast_normalize(v);\n" + "}\n" + "__inline\n" + "Quaternion qtMul(Quaternion a, Quaternion b);\n" + "__inline\n" + "Quaternion qtNormalize(Quaternion in);\n" + "__inline\n" + "float4 qtRotate(Quaternion q, float4 vec);\n" + "__inline\n" + "Quaternion qtInvert(Quaternion q);\n" + "__inline\n" + "Quaternion qtMul(Quaternion a, Quaternion b)\n" + "{\n" + " Quaternion ans;\n" + " ans = cross3( a, b );\n" + " ans += a.w*b+b.w*a;\n" + "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" + " ans.w = a.w*b.w - dot3F4(a, b);\n" + " return ans;\n" + "}\n" + "__inline\n" + "Quaternion qtNormalize(Quaternion in)\n" + "{\n" + " return fastNormalize4(in);\n" + "// in /= length( in );\n" + "// return in;\n" + "}\n" + "__inline\n" + "float4 qtRotate(Quaternion q, float4 vec)\n" + "{\n" + " Quaternion qInv = qtInvert( q );\n" + " float4 vcpy = vec;\n" + " vcpy.w = 0.f;\n" + " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" + " return out;\n" + "}\n" + "__inline\n" + "Quaternion qtInvert(Quaternion q)\n" + "{\n" + " return (Quaternion)(-q.xyz, q.w);\n" + "}\n" + "__inline void internalApplyImpulse(__global b3GpuSolverBody* body, float4 linearComponent, float4 angularComponent,float impulseMagnitude)\n" + "{\n" + " body->m_deltaLinearVelocity += linearComponent*impulseMagnitude*body->m_linearFactor;\n" + " body->m_deltaAngularVelocity += angularComponent*(impulseMagnitude*body->m_angularFactor);\n" + "}\n" + "void resolveSingleConstraintRowGeneric(__global b3GpuSolverBody* body1, __global b3GpuSolverBody* body2, __global b3SolverConstraint* c)\n" + "{\n" + " float deltaImpulse = c->m_rhs-c->m_appliedImpulse*c->m_cfm;\n" + " float deltaVel1Dotn = dot3F4(c->m_contactNormal,body1->m_deltaLinearVelocity) + dot3F4(c->m_relpos1CrossNormal,body1->m_deltaAngularVelocity);\n" + " float deltaVel2Dotn = -dot3F4(c->m_contactNormal,body2->m_deltaLinearVelocity) + dot3F4(c->m_relpos2CrossNormal,body2->m_deltaAngularVelocity);\n" + " deltaImpulse -= deltaVel1Dotn*c->m_jacDiagABInv;\n" + " deltaImpulse -= deltaVel2Dotn*c->m_jacDiagABInv;\n" + " float sum = c->m_appliedImpulse + deltaImpulse;\n" + " if (sum < c->m_lowerLimit)\n" + " {\n" + " deltaImpulse = c->m_lowerLimit-c->m_appliedImpulse;\n" + " c->m_appliedImpulse = c->m_lowerLimit;\n" + " }\n" + " else if (sum > c->m_upperLimit) \n" + " {\n" + " deltaImpulse = c->m_upperLimit-c->m_appliedImpulse;\n" + " c->m_appliedImpulse = c->m_upperLimit;\n" + " }\n" + " else\n" + " {\n" + " c->m_appliedImpulse = sum;\n" + " }\n" + " internalApplyImpulse(body1,c->m_contactNormal*body1->m_invMass,c->m_angularComponentA,deltaImpulse);\n" + " internalApplyImpulse(body2,-c->m_contactNormal*body2->m_invMass,c->m_angularComponentB,deltaImpulse);\n" + "}\n" + "__kernel void solveJointConstraintRows(__global b3GpuSolverBody* solverBodies,\n" + " __global b3BatchConstraint* batchConstraints,\n" + " __global b3SolverConstraint* rows,\n" + " __global unsigned int* numConstraintRowsInfo1, \n" + " __global unsigned int* rowOffsets,\n" + " __global b3GpuGenericConstraint* constraints,\n" + " int batchOffset,\n" + " int numConstraintsInBatch\n" + " )\n" + "{\n" + " int b = get_global_id(0);\n" + " if (b>=numConstraintsInBatch)\n" + " return;\n" + " __global b3BatchConstraint* c = &batchConstraints[b+batchOffset];\n" + " int originalConstraintIndex = c->m_originalConstraintIndex;\n" + " if (constraints[originalConstraintIndex].m_flags&B3_CONSTRAINT_FLAG_ENABLED)\n" + " {\n" + " int numConstraintRows = numConstraintRowsInfo1[originalConstraintIndex];\n" + " int rowOffset = rowOffsets[originalConstraintIndex];\n" + " for (int jj=0;jj<numConstraintRows;jj++)\n" + " {\n" + " __global b3SolverConstraint* constraint = &rows[rowOffset+jj];\n" + " resolveSingleConstraintRowGeneric(&solverBodies[constraint->m_solverBodyIdA],&solverBodies[constraint->m_solverBodyIdB],constraint);\n" + " }\n" + " }\n" + "};\n" + "__kernel void initSolverBodies(__global b3GpuSolverBody* solverBodies,__global b3RigidBodyCL* bodiesCL, int numBodies)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numBodies)\n" + " return;\n" + " __global b3GpuSolverBody* solverBody = &solverBodies[i];\n" + " __global b3RigidBodyCL* bodyCL = &bodiesCL[i];\n" + " solverBody->m_deltaLinearVelocity = (float4)(0.f,0.f,0.f,0.f);\n" + " solverBody->m_deltaAngularVelocity = (float4)(0.f,0.f,0.f,0.f);\n" + " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" + " solverBody->m_pushVelocity = (float4)(0.f,0.f,0.f,0.f);\n" + " solverBody->m_invMass = (float4)(bodyCL->m_invMass,bodyCL->m_invMass,bodyCL->m_invMass,0.f);\n" + " solverBody->m_originalBodyIndex = i;\n" + " solverBody->m_angularFactor = (float4)(1,1,1,0);\n" + " solverBody->m_linearFactor = (float4) (1,1,1,0);\n" + " solverBody->m_linearVelocity = bodyCL->m_linVel;\n" + " solverBody->m_angularVelocity = bodyCL->m_angVel;\n" + "}\n" + "__kernel void breakViolatedConstraintsKernel(__global b3GpuGenericConstraint* constraints, __global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, __global b3SolverConstraint* rows, int numConstraints)\n" + "{\n" + " int cid = get_global_id(0);\n" + " if (cid>=numConstraints)\n" + " return;\n" + " int numRows = numConstraintRows[cid];\n" + " if (numRows)\n" + " {\n" + " for (int i=0;i<numRows;i++)\n" + " {\n" + " int rowIndex = rowOffsets[cid]+i;\n" + " float breakingThreshold = constraints[cid].m_breakingImpulseThreshold;\n" + " if (fabs(rows[rowIndex].m_appliedImpulse) >= breakingThreshold)\n" + " {\n" + " constraints[cid].m_flags =0;//&= ~B3_CONSTRAINT_FLAG_ENABLED;\n" + " }\n" + " }\n" + " }\n" + "}\n" + "__kernel void getInfo1Kernel(__global unsigned int* infos, __global b3GpuGenericConstraint* constraints, int numConstraints)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numConstraints)\n" + " return;\n" + " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" + " switch (constraint->m_constraintType)\n" + " {\n" + " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" + " {\n" + " infos[i] = 3;\n" + " break;\n" + " }\n" + " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" + " {\n" + " infos[i] = 6;\n" + " break;\n" + " }\n" + " default:\n" + " {\n" + " }\n" + " }\n" + "}\n" + "__kernel void initBatchConstraintsKernel(__global unsigned int* numConstraintRows, __global unsigned int* rowOffsets, \n" + " __global b3BatchConstraint* batchConstraints, \n" + " __global b3GpuGenericConstraint* constraints,\n" + " __global b3RigidBodyCL* bodies,\n" + " int numConstraints)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numConstraints)\n" + " return;\n" + " int rbA = constraints[i].m_rbA;\n" + " int rbB = constraints[i].m_rbB;\n" + " batchConstraints[i].m_bodyAPtrAndSignBit = bodies[rbA].m_invMass != 0.f ? rbA : -rbA;\n" + " batchConstraints[i].m_bodyBPtrAndSignBit = bodies[rbB].m_invMass != 0.f ? rbB : -rbB;\n" + " batchConstraints[i].m_batchId = -1;\n" + " batchConstraints[i].m_originalConstraintIndex = i;\n" + "}\n" + "typedef struct\n" + "{\n" + " // integrator parameters: frames per second (1/stepsize), default error\n" + " // reduction parameter (0..1).\n" + " float fps,erp;\n" + " // for the first and second body, pointers to two (linear and angular)\n" + " // n*3 jacobian sub matrices, stored by rows. these matrices will have\n" + " // been initialized to 0 on entry. if the second body is zero then the\n" + " // J2xx pointers may be 0.\n" + " union \n" + " {\n" + " __global float4* m_J1linearAxisFloat4;\n" + " __global float* m_J1linearAxis;\n" + " };\n" + " union\n" + " {\n" + " __global float4* m_J1angularAxisFloat4;\n" + " __global float* m_J1angularAxis;\n" + " };\n" + " union\n" + " {\n" + " __global float4* m_J2linearAxisFloat4;\n" + " __global float* m_J2linearAxis;\n" + " };\n" + " union\n" + " {\n" + " __global float4* m_J2angularAxisFloat4;\n" + " __global float* m_J2angularAxis;\n" + " };\n" + " // elements to jump from one row to the next in J's\n" + " int rowskip;\n" + " // right hand sides of the equation J*v = c + cfm * lambda. cfm is the\n" + " // \"constraint force mixing\" vector. c is set to zero on entry, cfm is\n" + " // set to a constant value (typically very small or zero) value on entry.\n" + " __global float* m_constraintError;\n" + " __global float* cfm;\n" + " // lo and hi limits for variables (set to -/+ infinity on entry).\n" + " __global float* m_lowerLimit;\n" + " __global float* m_upperLimit;\n" + " // findex vector for variables. see the LCP solver interface for a\n" + " // description of what this does. this is set to -1 on entry.\n" + " // note that the returned indexes are relative to the first index of\n" + " // the constraint.\n" + " __global int *findex;\n" + " // number of solver iterations\n" + " int m_numIterations;\n" + " //damping of the velocity\n" + " float m_damping;\n" + "} b3GpuConstraintInfo2;\n" + "void getSkewSymmetricMatrix(float4 vecIn, __global float4* v0,__global float4* v1,__global float4* v2)\n" + "{\n" + " *v0 = (float4)(0. ,-vecIn.z ,vecIn.y,0.f);\n" + " *v1 = (float4)(vecIn.z ,0. ,-vecIn.x,0.f);\n" + " *v2 = (float4)(-vecIn.y ,vecIn.x ,0.f,0.f);\n" + "}\n" + "void getInfo2Point2Point(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies)\n" + "{\n" + " float4 posA = bodies[constraint->m_rbA].m_pos;\n" + " Quaternion rotA = bodies[constraint->m_rbA].m_quat;\n" + " float4 posB = bodies[constraint->m_rbB].m_pos;\n" + " Quaternion rotB = bodies[constraint->m_rbB].m_quat;\n" + " // anchor points in global coordinates with respect to body PORs.\n" + " \n" + " // set jacobian\n" + " info->m_J1linearAxis[0] = 1;\n" + " info->m_J1linearAxis[info->rowskip+1] = 1;\n" + " info->m_J1linearAxis[2*info->rowskip+2] = 1;\n" + " float4 a1 = qtRotate(rotA,constraint->m_pivotInA);\n" + " {\n" + " __global float4* angular0 = (__global float4*)(info->m_J1angularAxis);\n" + " __global float4* angular1 = (__global float4*)(info->m_J1angularAxis+info->rowskip);\n" + " __global float4* angular2 = (__global float4*)(info->m_J1angularAxis+2*info->rowskip);\n" + " float4 a1neg = -a1;\n" + " getSkewSymmetricMatrix(a1neg,angular0,angular1,angular2);\n" + " }\n" + " if (info->m_J2linearAxis)\n" + " {\n" + " info->m_J2linearAxis[0] = -1;\n" + " info->m_J2linearAxis[info->rowskip+1] = -1;\n" + " info->m_J2linearAxis[2*info->rowskip+2] = -1;\n" + " }\n" + " \n" + " float4 a2 = qtRotate(rotB,constraint->m_pivotInB);\n" + " \n" + " {\n" + " // float4 a2n = -a2;\n" + " __global float4* angular0 = (__global float4*)(info->m_J2angularAxis);\n" + " __global float4* angular1 = (__global float4*)(info->m_J2angularAxis+info->rowskip);\n" + " __global float4* angular2 = (__global float4*)(info->m_J2angularAxis+2*info->rowskip);\n" + " getSkewSymmetricMatrix(a2,angular0,angular1,angular2);\n" + " }\n" + " \n" + " // set right hand side\n" + "// float currERP = (m_flags & B3_P2P_FLAGS_ERP) ? m_erp : info->erp;\n" + " float currERP = info->erp;\n" + " float k = info->fps * currERP;\n" + " int j;\n" + " float4 result = a2 + posB - a1 - posA;\n" + " float* resultPtr = &result;\n" + " for (j=0; j<3; j++)\n" + " {\n" + " info->m_constraintError[j*info->rowskip] = k * (resultPtr[j]);\n" + " }\n" + "}\n" + "Quaternion nearest( Quaternion first, Quaternion qd)\n" + "{\n" + " Quaternion diff,sum;\n" + " diff = first- qd;\n" + " sum = first + qd;\n" + " \n" + " if( dot(diff,diff) < dot(sum,sum) )\n" + " return qd;\n" + " return (-qd);\n" + "}\n" + "float b3Acos(float x) \n" + "{ \n" + " if (x<-1) \n" + " x=-1; \n" + " if (x>1) \n" + " x=1;\n" + " return acos(x); \n" + "}\n" + "float getAngle(Quaternion orn)\n" + "{\n" + " if (orn.w>=1.f)\n" + " orn.w=1.f;\n" + " float s = 2.f * b3Acos(orn.w);\n" + " return s;\n" + "}\n" + "void calculateDiffAxisAngleQuaternion( Quaternion orn0,Quaternion orn1a,float4* axis,float* angle)\n" + "{\n" + " Quaternion orn1 = nearest(orn0,orn1a);\n" + " \n" + " Quaternion dorn = qtMul(orn1,qtInvert(orn0));\n" + " *angle = getAngle(dorn);\n" + " *axis = (float4)(dorn.x,dorn.y,dorn.z,0.f);\n" + " \n" + " //check for axis length\n" + " float len = dot3F4(*axis,*axis);\n" + " if (len < FLT_EPSILON*FLT_EPSILON)\n" + " *axis = (float4)(1,0,0,0);\n" + " else\n" + " *axis /= sqrt(len);\n" + "}\n" + "void getInfo2FixedOrientation(__global b3GpuGenericConstraint* constraint,b3GpuConstraintInfo2* info,__global b3RigidBodyCL* bodies, int start_row)\n" + "{\n" + " Quaternion worldOrnA = bodies[constraint->m_rbA].m_quat;\n" + " Quaternion worldOrnB = bodies[constraint->m_rbB].m_quat;\n" + " int s = info->rowskip;\n" + " int start_index = start_row * s;\n" + " // 3 rows to make body rotations equal\n" + " info->m_J1angularAxis[start_index] = 1;\n" + " info->m_J1angularAxis[start_index + s + 1] = 1;\n" + " info->m_J1angularAxis[start_index + s*2+2] = 1;\n" + " if ( info->m_J2angularAxis)\n" + " {\n" + " info->m_J2angularAxis[start_index] = -1;\n" + " info->m_J2angularAxis[start_index + s+1] = -1;\n" + " info->m_J2angularAxis[start_index + s*2+2] = -1;\n" + " }\n" + " \n" + " float currERP = info->erp;\n" + " float k = info->fps * currERP;\n" + " float4 diff;\n" + " float angle;\n" + " float4 qrelCur = qtMul(worldOrnA,qtInvert(worldOrnB));\n" + " \n" + " calculateDiffAxisAngleQuaternion(constraint->m_relTargetAB,qrelCur,&diff,&angle);\n" + " diff*=-angle;\n" + " \n" + " float* resultPtr = &diff;\n" + " \n" + " for (int j=0; j<3; j++)\n" + " {\n" + " info->m_constraintError[(3+j)*info->rowskip] = k * resultPtr[j];\n" + " }\n" + " \n" + "}\n" + "__kernel void writeBackVelocitiesKernel(__global b3RigidBodyCL* bodies,__global b3GpuSolverBody* solverBodies,int numBodies)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numBodies)\n" + " return;\n" + " if (bodies[i].m_invMass)\n" + " {\n" + "// if (length(solverBodies[i].m_deltaLinearVelocity)<MOTIONCLAMP)\n" + " {\n" + " bodies[i].m_linVel += solverBodies[i].m_deltaLinearVelocity;\n" + " }\n" + "// if (length(solverBodies[i].m_deltaAngularVelocity)<MOTIONCLAMP)\n" + " {\n" + " bodies[i].m_angVel += solverBodies[i].m_deltaAngularVelocity;\n" + " } \n" + " }\n" + "}\n" + "__kernel void getInfo2Kernel(__global b3SolverConstraint* solverConstraintRows, \n" + " __global unsigned int* infos, \n" + " __global unsigned int* constraintRowOffsets, \n" + " __global b3GpuGenericConstraint* constraints, \n" + " __global b3BatchConstraint* batchConstraints, \n" + " __global b3RigidBodyCL* bodies,\n" + " __global BodyInertia* inertias,\n" + " __global b3GpuSolverBody* solverBodies,\n" + " float timeStep,\n" + " float globalErp,\n" + " float globalCfm,\n" + " float globalDamping,\n" + " int globalNumIterations,\n" + " int numConstraints)\n" + "{\n" + " int i = get_global_id(0);\n" + " if (i>=numConstraints)\n" + " return;\n" + " \n" + " //for now, always initialize the batch info\n" + " int info1 = infos[i];\n" + " \n" + " __global b3SolverConstraint* currentConstraintRow = &solverConstraintRows[constraintRowOffsets[i]];\n" + " __global b3GpuGenericConstraint* constraint = &constraints[i];\n" + " __global b3RigidBodyCL* rbA = &bodies[ constraint->m_rbA];\n" + " __global b3RigidBodyCL* rbB = &bodies[ constraint->m_rbB];\n" + " int solverBodyIdA = constraint->m_rbA;\n" + " int solverBodyIdB = constraint->m_rbB;\n" + " __global b3GpuSolverBody* bodyAPtr = &solverBodies[solverBodyIdA];\n" + " __global b3GpuSolverBody* bodyBPtr = &solverBodies[solverBodyIdB];\n" + " if (rbA->m_invMass)\n" + " {\n" + " batchConstraints[i].m_bodyAPtrAndSignBit = solverBodyIdA;\n" + " } else\n" + " {\n" + "// if (!solverBodyIdA)\n" + "// m_staticIdx = 0;\n" + " batchConstraints[i].m_bodyAPtrAndSignBit = -solverBodyIdA;\n" + " }\n" + " if (rbB->m_invMass)\n" + " {\n" + " batchConstraints[i].m_bodyBPtrAndSignBit = solverBodyIdB;\n" + " } else\n" + " {\n" + "// if (!solverBodyIdB)\n" + "// m_staticIdx = 0;\n" + " batchConstraints[i].m_bodyBPtrAndSignBit = -solverBodyIdB;\n" + " }\n" + " if (info1)\n" + " {\n" + " int overrideNumSolverIterations = 0;//constraint->getOverrideNumSolverIterations() > 0 ? constraint->getOverrideNumSolverIterations() : infoGlobal.m_numIterations;\n" + "// if (overrideNumSolverIterations>m_maxOverrideNumSolverIterations)\n" + " // m_maxOverrideNumSolverIterations = overrideNumSolverIterations;\n" + " int j;\n" + " for ( j=0;j<info1;j++)\n" + " {\n" + "// memset(¤tConstraintRow[j],0,sizeof(b3SolverConstraint));\n" + " currentConstraintRow[j].m_angularComponentA = (float4)(0,0,0,0);\n" + " currentConstraintRow[j].m_angularComponentB = (float4)(0,0,0,0);\n" + " currentConstraintRow[j].m_appliedImpulse = 0.f;\n" + " currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" + " currentConstraintRow[j].m_cfm = 0.f;\n" + " currentConstraintRow[j].m_contactNormal = (float4)(0,0,0,0);\n" + " currentConstraintRow[j].m_friction = 0.f;\n" + " currentConstraintRow[j].m_frictionIndex = 0;\n" + " currentConstraintRow[j].m_jacDiagABInv = 0.f;\n" + " currentConstraintRow[j].m_lowerLimit = 0.f;\n" + " currentConstraintRow[j].m_upperLimit = 0.f;\n" + " currentConstraintRow[j].m_originalConstraint = i;\n" + " currentConstraintRow[j].m_overrideNumSolverIterations = 0;\n" + " currentConstraintRow[j].m_relpos1CrossNormal = (float4)(0,0,0,0);\n" + " currentConstraintRow[j].m_relpos2CrossNormal = (float4)(0,0,0,0);\n" + " currentConstraintRow[j].m_rhs = 0.f;\n" + " currentConstraintRow[j].m_rhsPenetration = 0.f;\n" + " currentConstraintRow[j].m_solverBodyIdA = 0;\n" + " currentConstraintRow[j].m_solverBodyIdB = 0;\n" + " \n" + " currentConstraintRow[j].m_lowerLimit = -B3_INFINITY;\n" + " currentConstraintRow[j].m_upperLimit = B3_INFINITY;\n" + " currentConstraintRow[j].m_appliedImpulse = 0.f;\n" + " currentConstraintRow[j].m_appliedPushImpulse = 0.f;\n" + " currentConstraintRow[j].m_solverBodyIdA = solverBodyIdA;\n" + " currentConstraintRow[j].m_solverBodyIdB = solverBodyIdB;\n" + " currentConstraintRow[j].m_overrideNumSolverIterations = overrideNumSolverIterations; \n" + " }\n" + " bodyAPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" + " bodyAPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" + " bodyAPtr->m_pushVelocity = (float4)(0,0,0,0);\n" + " bodyAPtr->m_turnVelocity = (float4)(0,0,0,0);\n" + " bodyBPtr->m_deltaLinearVelocity = (float4)(0,0,0,0);\n" + " bodyBPtr->m_deltaAngularVelocity = (float4)(0,0,0,0);\n" + " bodyBPtr->m_pushVelocity = (float4)(0,0,0,0);\n" + " bodyBPtr->m_turnVelocity = (float4)(0,0,0,0);\n" + " int rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" + " \n" + " b3GpuConstraintInfo2 info2;\n" + " info2.fps = 1.f/timeStep;\n" + " info2.erp = globalErp;\n" + " info2.m_J1linearAxisFloat4 = ¤tConstraintRow->m_contactNormal;\n" + " info2.m_J1angularAxisFloat4 = ¤tConstraintRow->m_relpos1CrossNormal;\n" + " info2.m_J2linearAxisFloat4 = 0;\n" + " info2.m_J2angularAxisFloat4 = ¤tConstraintRow->m_relpos2CrossNormal;\n" + " info2.rowskip = sizeof(b3SolverConstraint)/sizeof(float);//check this\n" + " ///the size of b3SolverConstraint needs be a multiple of float\n" + "// b3Assert(info2.rowskip*sizeof(float)== sizeof(b3SolverConstraint));\n" + " info2.m_constraintError = ¤tConstraintRow->m_rhs;\n" + " currentConstraintRow->m_cfm = globalCfm;\n" + " info2.m_damping = globalDamping;\n" + " info2.cfm = ¤tConstraintRow->m_cfm;\n" + " info2.m_lowerLimit = ¤tConstraintRow->m_lowerLimit;\n" + " info2.m_upperLimit = ¤tConstraintRow->m_upperLimit;\n" + " info2.m_numIterations = globalNumIterations;\n" + " switch (constraint->m_constraintType)\n" + " {\n" + " case B3_GPU_POINT2POINT_CONSTRAINT_TYPE:\n" + " {\n" + " getInfo2Point2Point(constraint,&info2,bodies);\n" + " break;\n" + " }\n" + " case B3_GPU_FIXED_CONSTRAINT_TYPE:\n" + " {\n" + " getInfo2Point2Point(constraint,&info2,bodies);\n" + " getInfo2FixedOrientation(constraint,&info2,bodies,3);\n" + " break;\n" + " }\n" + " default:\n" + " {\n" + " }\n" + " }\n" + " ///finalize the constraint setup\n" + " for ( j=0;j<info1;j++)\n" + " {\n" + " __global b3SolverConstraint* solverConstraint = ¤tConstraintRow[j];\n" + " if (solverConstraint->m_upperLimit>=constraint->m_breakingImpulseThreshold)\n" + " {\n" + " solverConstraint->m_upperLimit = constraint->m_breakingImpulseThreshold;\n" + " }\n" + " if (solverConstraint->m_lowerLimit<=-constraint->m_breakingImpulseThreshold)\n" + " {\n" + " solverConstraint->m_lowerLimit = -constraint->m_breakingImpulseThreshold;\n" + " }\n" + "// solverConstraint->m_originalContactPoint = constraint;\n" + " \n" + " Matrix3x3 invInertiaWorldA= inertias[constraint->m_rbA].m_invInertiaWorld;\n" + " {\n" + " //float4 angularFactorA(1,1,1);\n" + " float4 ftorqueAxis1 = solverConstraint->m_relpos1CrossNormal;\n" + " solverConstraint->m_angularComponentA = mtMul1(invInertiaWorldA,ftorqueAxis1);//*angularFactorA;\n" + " }\n" + " \n" + " Matrix3x3 invInertiaWorldB= inertias[constraint->m_rbB].m_invInertiaWorld;\n" + " {\n" + " float4 ftorqueAxis2 = solverConstraint->m_relpos2CrossNormal;\n" + " solverConstraint->m_angularComponentB = mtMul1(invInertiaWorldB,ftorqueAxis2);//*constraint->m_rbB.getAngularFactor();\n" + " }\n" + " {\n" + " //it is ok to use solverConstraint->m_contactNormal instead of -solverConstraint->m_contactNormal\n" + " //because it gets multiplied iMJlB\n" + " float4 iMJlA = solverConstraint->m_contactNormal*rbA->m_invMass;\n" + " float4 iMJaA = mtMul3(solverConstraint->m_relpos1CrossNormal,invInertiaWorldA);\n" + " float4 iMJlB = solverConstraint->m_contactNormal*rbB->m_invMass;//sign of normal?\n" + " float4 iMJaB = mtMul3(solverConstraint->m_relpos2CrossNormal,invInertiaWorldB);\n" + " float sum = dot3F4(iMJlA,solverConstraint->m_contactNormal);\n" + " sum += dot3F4(iMJaA,solverConstraint->m_relpos1CrossNormal);\n" + " sum += dot3F4(iMJlB,solverConstraint->m_contactNormal);\n" + " sum += dot3F4(iMJaB,solverConstraint->m_relpos2CrossNormal);\n" + " float fsum = fabs(sum);\n" + " if (fsum>FLT_EPSILON)\n" + " {\n" + " solverConstraint->m_jacDiagABInv = 1.f/sum;\n" + " } else\n" + " {\n" + " solverConstraint->m_jacDiagABInv = 0.f;\n" + " }\n" + " }\n" + " ///fix rhs\n" + " ///todo: add force/torque accelerators\n" + " {\n" + " float rel_vel;\n" + " float vel1Dotn = dot3F4(solverConstraint->m_contactNormal,rbA->m_linVel) + dot3F4(solverConstraint->m_relpos1CrossNormal,rbA->m_angVel);\n" + " float vel2Dotn = -dot3F4(solverConstraint->m_contactNormal,rbB->m_linVel) + dot3F4(solverConstraint->m_relpos2CrossNormal,rbB->m_angVel);\n" + " rel_vel = vel1Dotn+vel2Dotn;\n" + " float restitution = 0.f;\n" + " float positionalError = solverConstraint->m_rhs;//already filled in by getConstraintInfo2\n" + " float velocityError = restitution - rel_vel * info2.m_damping;\n" + " float penetrationImpulse = positionalError*solverConstraint->m_jacDiagABInv;\n" + " float velocityImpulse = velocityError *solverConstraint->m_jacDiagABInv;\n" + " solverConstraint->m_rhs = penetrationImpulse+velocityImpulse;\n" + " solverConstraint->m_appliedImpulse = 0.f;\n" + " }\n" + " }\n" + " }\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h index 15a049992b..6e14ad51fc 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveContact.h @@ -1,393 +1,392 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveContactCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define mymake_float4 (float4)\n" -"//#define make_float2 (float2)\n" -"//#define make_uint4 (uint4)\n" -"//#define make_int4 (int4)\n" -"//#define make_uint2 (uint2)\n" -"//#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = mymake_float4(a.xyz,0.f);\n" -" float4 b1 = mymake_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = mymake_float4(-n.xyz,0.f);\n" -" *angular0 = -cross3(r0, n);\n" -" *angular1 = cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" -"void solveContact(__global Constraint4* cs,\n" -" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" -" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);\n" -"void solveContact(__global Constraint4* cs,\n" -" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" -" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n" -"{\n" -" float minRambdaDt = 0;\n" -" float maxRambdaDt = FLT_MAX;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = cs->m_worldPos[ic] - posA;\n" -" float4 r1 = cs->m_worldPos[ic] - posB;\n" -" setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" -" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" -" *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" -" rambdaDt *= cs->m_jacCoeffInv[ic];\n" -" {\n" -" float prevSum = cs->m_appliedRambdaDt[ic];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt );\n" -" updated = min2( updated, maxRambdaDt );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_appliedRambdaDt[ic] = updated;\n" -" }\n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" *linVelA += linImp0;\n" -" *angVelA += angImp0;\n" -" *linVelB += linImp1;\n" -" *angVelB += angImp1;\n" -" }\n" -"}\n" -"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" -" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n[0].z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n[0].z*k;\n" -" p[0].z = n[0].y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n[0].x*p[0].z;\n" -" q[0].z = n[0].x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n[0].y*k;\n" -" p[0].y = n[0].x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n[0].z*p[0].y;\n" -" q[0].y = n[0].z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" -"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" -"{\n" -" //float frictionCoeff = ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -" posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" -" if (gBodies[aIdx].m_invMass)\n" -" {\n" -" gBodies[aIdx].m_linVel = linVelA;\n" -" gBodies[aIdx].m_angVel = angVelA;\n" -" } else\n" -" {\n" -" gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" \n" -" }\n" -" if (gBodies[bIdx].m_invMass)\n" -" {\n" -" gBodies[bIdx].m_linVel = linVelB;\n" -" gBodies[bIdx].m_angVel = angVelB;\n" -" } else\n" -" {\n" -" gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" \n" -" }\n" -"}\n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void BatchSolveKernelContact(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" __global int* gN,\n" -" __global int* gOffsets,\n" -" __global int* batchSizes,\n" -" int maxBatch1,\n" -" int cellBatch,\n" -" int4 nSplit\n" -" )\n" -"{\n" -" //__local int ldsBatchIdx[WG_SIZE+1];\n" -" __local int ldsCurBatch;\n" -" __local int ldsNextBatch;\n" -" __local int ldsStart;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" int wgIdx = GET_GROUP_IDX;\n" -"// int gIdx = GET_GLOBAL_IDX;\n" -"// debugInfo[gIdx].m_valInt0 = gIdx;\n" -" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" -" \n" -" \n" -" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" -" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" -" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" -" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" -" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" -" //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" -" //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" -" //int cellIdx = xIdx+yIdx*nSplit;\n" -" \n" -" if( gN[cellIdx] == 0 ) \n" -" return;\n" -" int maxBatch = batchSizes[cellIdx];\n" -" \n" -" \n" -" const int start = gOffsets[cellIdx];\n" -" const int end = start + gN[cellIdx];\n" -" \n" -" \n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch = 0;\n" -" ldsNextBatch = 0;\n" -" ldsStart = start;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" int idx=ldsStart+lIdx;\n" -" while (ldsCurBatch < maxBatch)\n" -" {\n" -" for(; idx<end; )\n" -" {\n" -" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" -" {\n" -" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" idx+=64;\n" -" } else\n" -" {\n" -" break;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch++;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" \n" -" \n" -"}\n" -"__kernel void solveSingleContactKernel(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" int cellIdx,\n" -" int batchOffset,\n" -" int numConstraintsInBatch\n" -" )\n" -"{\n" -" int index = get_global_id(0);\n" -" if (index < numConstraintsInBatch)\n" -" {\n" -" int idx=batchOffset+index;\n" -" solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" } \n" -"}\n" -; +static const char* solveContactCL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Takahiro Harada\n" + "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile global int*\n" + "#endif\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define mymake_float4 (float4)\n" + "//#define make_float2 (float2)\n" + "//#define make_uint4 (uint4)\n" + "//#define make_int4 (int4)\n" + "//#define make_uint2 (uint2)\n" + "//#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "///////////////////////////////////////\n" + "// Vector\n" + "///////////////////////////////////////\n" + "__inline\n" + "float4 fastNormalize4(float4 v)\n" + "{\n" + " return fast_normalize(v);\n" + "}\n" + "__inline\n" + "float4 cross3(float4 a, float4 b)\n" + "{\n" + " return cross(a,b);\n" + "}\n" + "__inline\n" + "float dot3F4(float4 a, float4 b)\n" + "{\n" + " float4 a1 = mymake_float4(a.xyz,0.f);\n" + " float4 b1 = mymake_float4(b.xyz,0.f);\n" + " return dot(a1, b1);\n" + "}\n" + "__inline\n" + "float4 normalize3(const float4 a)\n" + "{\n" + " float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" + " return fastNormalize4( n );\n" + "// float length = sqrtf(dot3F4(a, a));\n" + "// return 1.f/length * a;\n" + "}\n" + "///////////////////////////////////////\n" + "// Matrix3x3\n" + "///////////////////////////////////////\n" + "typedef struct\n" + "{\n" + " float4 m_row[3];\n" + "}Matrix3x3;\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b);\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b);\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b)\n" + "{\n" + " float4 ans;\n" + " ans.x = dot3F4( a.m_row[0], b );\n" + " ans.y = dot3F4( a.m_row[1], b );\n" + " ans.z = dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b)\n" + "{\n" + " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " float4 ans;\n" + " ans.x = dot3F4( a, colx );\n" + " ans.y = dot3F4( a, coly );\n" + " ans.z = dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "///////////////////////////////////////\n" + "// Quaternion\n" + "///////////////////////////////////////\n" + "typedef float4 Quaternion;\n" + "#define WG_SIZE 64\n" + "typedef struct\n" + "{\n" + " float4 m_pos;\n" + " Quaternion m_quat;\n" + " float4 m_linVel;\n" + " float4 m_angVel;\n" + " u32 m_shapeIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "} Body;\n" + "typedef struct\n" + "{\n" + " Matrix3x3 m_invInertia;\n" + " Matrix3x3 m_initInvInertia;\n" + "} Shape;\n" + "typedef struct\n" + "{\n" + " float4 m_linear;\n" + " float4 m_worldPos[4];\n" + " float4 m_center; \n" + " float m_jacCoeffInv[4];\n" + " float m_b[4];\n" + " float m_appliedRambdaDt[4];\n" + " float m_fJacCoeffInv[2]; \n" + " float m_fAppliedRambdaDt[2]; \n" + " u32 m_bodyA;\n" + " u32 m_bodyB;\n" + " int m_batchIdx;\n" + " u32 m_paddings[1];\n" + "} Constraint4;\n" + "typedef struct\n" + "{\n" + " int m_nConstraints;\n" + " int m_start;\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBuffer;\n" + "typedef struct\n" + "{\n" + " int m_solveFriction;\n" + " int m_maxBatch; // long batch really kills the performance\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBufferBatchSolve;\n" + "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" + "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" + "{\n" + " *linear = mymake_float4(-n.xyz,0.f);\n" + " *angular0 = -cross3(r0, n);\n" + " *angular1 = cross3(r1, n);\n" + "}\n" + "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" + "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" + "{\n" + " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" + "}\n" + "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" + " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" + "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" + " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" + "{\n" + " // linear0,1 are normlized\n" + " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" + " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" + " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" + " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" + " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" + "}\n" + "void solveContact(__global Constraint4* cs,\n" + " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" + " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB);\n" + "void solveContact(__global Constraint4* cs,\n" + " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" + " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB)\n" + "{\n" + " float minRambdaDt = 0;\n" + " float maxRambdaDt = FLT_MAX;\n" + " for(int ic=0; ic<4; ic++)\n" + " {\n" + " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" + " float4 angular0, angular1, linear;\n" + " float4 r0 = cs->m_worldPos[ic] - posA;\n" + " float4 r1 = cs->m_worldPos[ic] - posB;\n" + " setLinearAndAngular( -cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" + " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" + " *linVelA, *angVelA, *linVelB, *angVelB ) + cs->m_b[ic];\n" + " rambdaDt *= cs->m_jacCoeffInv[ic];\n" + " {\n" + " float prevSum = cs->m_appliedRambdaDt[ic];\n" + " float updated = prevSum;\n" + " updated += rambdaDt;\n" + " updated = max2( updated, minRambdaDt );\n" + " updated = min2( updated, maxRambdaDt );\n" + " rambdaDt = updated - prevSum;\n" + " cs->m_appliedRambdaDt[ic] = updated;\n" + " }\n" + " float4 linImp0 = invMassA*linear*rambdaDt;\n" + " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" + " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" + " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" + " *linVelA += linImp0;\n" + " *angVelA += angImp0;\n" + " *linVelB += linImp1;\n" + " *angVelB += angImp1;\n" + " }\n" + "}\n" + "void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" + " void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" + "{\n" + " if (fabs(n[0].z) > 0.70710678f) {\n" + " // choose p in y-z plane\n" + " float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = 0;\n" + " p[0].y = -n[0].z*k;\n" + " p[0].z = n[0].y*k;\n" + " // set q = n x p\n" + " q[0].x = a*k;\n" + " q[0].y = -n[0].x*p[0].z;\n" + " q[0].z = n[0].x*p[0].y;\n" + " }\n" + " else {\n" + " // choose p in x-y plane\n" + " float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = -n[0].y*k;\n" + " p[0].y = n[0].x*k;\n" + " p[0].z = 0;\n" + " // set q = n x p\n" + " q[0].x = -n[0].z*p[0].y;\n" + " q[0].y = n[0].z*p[0].x;\n" + " q[0].z = a*k;\n" + " }\n" + "}\n" + "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" + "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" + "{\n" + " //float frictionCoeff = ldsCs[0].m_linear.w;\n" + " int aIdx = ldsCs[0].m_bodyA;\n" + " int bIdx = ldsCs[0].m_bodyB;\n" + " float4 posA = gBodies[aIdx].m_pos;\n" + " float4 linVelA = gBodies[aIdx].m_linVel;\n" + " float4 angVelA = gBodies[aIdx].m_angVel;\n" + " float invMassA = gBodies[aIdx].m_invMass;\n" + " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" + " float4 posB = gBodies[bIdx].m_pos;\n" + " float4 linVelB = gBodies[bIdx].m_linVel;\n" + " float4 angVelB = gBodies[bIdx].m_angVel;\n" + " float invMassB = gBodies[bIdx].m_invMass;\n" + " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" + " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" + " posB, &linVelB, &angVelB, invMassB, invInertiaB );\n" + " if (gBodies[aIdx].m_invMass)\n" + " {\n" + " gBodies[aIdx].m_linVel = linVelA;\n" + " gBodies[aIdx].m_angVel = angVelA;\n" + " } else\n" + " {\n" + " gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" + " gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" + " \n" + " }\n" + " if (gBodies[bIdx].m_invMass)\n" + " {\n" + " gBodies[bIdx].m_linVel = linVelB;\n" + " gBodies[bIdx].m_angVel = angVelB;\n" + " } else\n" + " {\n" + " gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" + " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" + " \n" + " }\n" + "}\n" + "typedef struct \n" + "{\n" + " int m_valInt0;\n" + " int m_valInt1;\n" + " int m_valInt2;\n" + " int m_valInt3;\n" + " float m_val0;\n" + " float m_val1;\n" + " float m_val2;\n" + " float m_val3;\n" + "} SolverDebugInfo;\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void BatchSolveKernelContact(__global Body* gBodies,\n" + " __global Shape* gShapes,\n" + " __global Constraint4* gConstraints,\n" + " __global int* gN,\n" + " __global int* gOffsets,\n" + " __global int* batchSizes,\n" + " int maxBatch1,\n" + " int cellBatch,\n" + " int4 nSplit\n" + " )\n" + "{\n" + " //__local int ldsBatchIdx[WG_SIZE+1];\n" + " __local int ldsCurBatch;\n" + " __local int ldsNextBatch;\n" + " __local int ldsStart;\n" + " int lIdx = GET_LOCAL_IDX;\n" + " int wgIdx = GET_GROUP_IDX;\n" + "// int gIdx = GET_GLOBAL_IDX;\n" + "// debugInfo[gIdx].m_valInt0 = gIdx;\n" + " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" + " \n" + " \n" + " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" + " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" + " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" + " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" + " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" + " //int xIdx = (wgIdx/(nSplit/2))*2 + (bIdx&1);\n" + " //int yIdx = (wgIdx%(nSplit/2))*2 + (bIdx>>1);\n" + " //int cellIdx = xIdx+yIdx*nSplit;\n" + " \n" + " if( gN[cellIdx] == 0 ) \n" + " return;\n" + " int maxBatch = batchSizes[cellIdx];\n" + " \n" + " \n" + " const int start = gOffsets[cellIdx];\n" + " const int end = start + gN[cellIdx];\n" + " \n" + " \n" + " \n" + " if( lIdx == 0 )\n" + " {\n" + " ldsCurBatch = 0;\n" + " ldsNextBatch = 0;\n" + " ldsStart = start;\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " int idx=ldsStart+lIdx;\n" + " while (ldsCurBatch < maxBatch)\n" + " {\n" + " for(; idx<end; )\n" + " {\n" + " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" + " {\n" + " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" + " idx+=64;\n" + " } else\n" + " {\n" + " break;\n" + " }\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " \n" + " if( lIdx == 0 )\n" + " {\n" + " ldsCurBatch++;\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " }\n" + " \n" + " \n" + "}\n" + "__kernel void solveSingleContactKernel(__global Body* gBodies,\n" + " __global Shape* gShapes,\n" + " __global Constraint4* gConstraints,\n" + " int cellIdx,\n" + " int batchOffset,\n" + " int numConstraintsInBatch\n" + " )\n" + "{\n" + " int index = get_global_id(0);\n" + " if (index < numConstraintsInBatch)\n" + " {\n" + " int idx=batchOffset+index;\n" + " solveContactConstraint( gBodies, gShapes, &gConstraints[idx] );\n" + " } \n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h index eb58674f22..9707cdb25d 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solveFriction.h @@ -1,421 +1,420 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solveFrictionCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define mymake_float4 (float4)\n" -"//#define make_float2 (float2)\n" -"//#define make_uint4 (uint4)\n" -"//#define make_int4 (int4)\n" -"//#define make_uint2 (uint2)\n" -"//#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = mymake_float4(a.xyz,0.f);\n" -" float4 b1 = mymake_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = mymake_float4(-n.xyz,0.f);\n" -" *angular0 = -cross3(r0, n);\n" -" *angular1 = cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" -"void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" -" void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n[0].z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n[0].z*k;\n" -" p[0].z = n[0].y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n[0].x*p[0].z;\n" -" q[0].z = n[0].x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n[0].y*k;\n" -" p[0].y = n[0].x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n[0].z*p[0].y;\n" -" q[0].y = n[0].z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" -"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" -"{\n" -" float frictionCoeff = ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" \n" -" {\n" -" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" -" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" -" float sum = 0;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" sum +=ldsCs[0].m_appliedRambdaDt[j];\n" -" }\n" -" frictionCoeff = 0.7f;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" maxRambdaDt[j] = frictionCoeff*sum;\n" -" minRambdaDt[j] = -maxRambdaDt[j];\n" -" }\n" -" \n" -"// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -"// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" -" \n" -" \n" -" {\n" -" \n" -" __global Constraint4* cs = ldsCs;\n" -" \n" -" if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" -" const float4 center = cs->m_center;\n" -" \n" -" float4 n = -cs->m_linear;\n" -" \n" -" float4 tangent[2];\n" -" btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = center - posA;\n" -" float4 r1 = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" -" float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB );\n" -" rambdaDt *= cs->m_fJacCoeffInv[i];\n" -" \n" -" {\n" -" float prevSum = cs->m_fAppliedRambdaDt[i];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt[i] );\n" -" updated = min2( updated, maxRambdaDt[i] );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_fAppliedRambdaDt[i] = updated;\n" -" }\n" -" \n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" \n" -" linVelA += linImp0;\n" -" angVelA += angImp0;\n" -" linVelB += linImp1;\n" -" angVelB += angImp1;\n" -" }\n" -" { // angular damping for point constraint\n" -" float4 ab = normalize3( posB - posA );\n" -" float4 ac = normalize3( center - posA );\n" -" if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" -" {\n" -" float angNA = dot3F4( n, angVelA );\n" -" float angNB = dot3F4( n, angVelB );\n" -" \n" -" angVelA -= (angNA*0.1f)*n;\n" -" angVelB -= (angNB*0.1f)*n;\n" -" }\n" -" }\n" -" }\n" -" \n" -" \n" -" }\n" -" if (gBodies[aIdx].m_invMass)\n" -" {\n" -" gBodies[aIdx].m_linVel = linVelA;\n" -" gBodies[aIdx].m_angVel = angVelA;\n" -" } else\n" -" {\n" -" gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" }\n" -" if (gBodies[bIdx].m_invMass)\n" -" {\n" -" gBodies[bIdx].m_linVel = linVelB;\n" -" gBodies[bIdx].m_angVel = angVelB;\n" -" } else\n" -" {\n" -" gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" -" gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" -" }\n" -" \n" -"}\n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void BatchSolveKernelFriction(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" __global int* gN,\n" -" __global int* gOffsets,\n" -" __global int* batchSizes,\n" -" int maxBatch1,\n" -" int cellBatch,\n" -" int4 nSplit\n" -" )\n" -"{\n" -" //__local int ldsBatchIdx[WG_SIZE+1];\n" -" __local int ldsCurBatch;\n" -" __local int ldsNextBatch;\n" -" __local int ldsStart;\n" -" int lIdx = GET_LOCAL_IDX;\n" -" int wgIdx = GET_GROUP_IDX;\n" -"// int gIdx = GET_GLOBAL_IDX;\n" -"// debugInfo[gIdx].m_valInt0 = gIdx;\n" -" //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" -" int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" -" int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" -" int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" -" int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" -" int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" -" \n" -" if( gN[cellIdx] == 0 ) \n" -" return;\n" -" int maxBatch = batchSizes[cellIdx];\n" -" const int start = gOffsets[cellIdx];\n" -" const int end = start + gN[cellIdx];\n" -" \n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch = 0;\n" -" ldsNextBatch = 0;\n" -" ldsStart = start;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" int idx=ldsStart+lIdx;\n" -" while (ldsCurBatch < maxBatch)\n" -" {\n" -" for(; idx<end; )\n" -" {\n" -" if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" -" {\n" -" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" idx+=64;\n" -" } else\n" -" {\n" -" break;\n" -" }\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" if( lIdx == 0 )\n" -" {\n" -" ldsCurBatch++;\n" -" }\n" -" GROUP_LDS_BARRIER;\n" -" }\n" -" \n" -" \n" -"}\n" -"__kernel void solveSingleFrictionKernel(__global Body* gBodies,\n" -" __global Shape* gShapes,\n" -" __global Constraint4* gConstraints,\n" -" int cellIdx,\n" -" int batchOffset,\n" -" int numConstraintsInBatch\n" -" )\n" -"{\n" -" int index = get_global_id(0);\n" -" if (index < numConstraintsInBatch)\n" -" {\n" -" \n" -" int idx=batchOffset+index;\n" -" \n" -" solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" -" } \n" -"}\n" -; +static const char* solveFrictionCL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Takahiro Harada\n" + "//#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile global int*\n" + "#endif\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define mymake_float4 (float4)\n" + "//#define make_float2 (float2)\n" + "//#define make_uint4 (uint4)\n" + "//#define make_int4 (int4)\n" + "//#define make_uint2 (uint2)\n" + "//#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "///////////////////////////////////////\n" + "// Vector\n" + "///////////////////////////////////////\n" + "__inline\n" + "float4 fastNormalize4(float4 v)\n" + "{\n" + " return fast_normalize(v);\n" + "}\n" + "__inline\n" + "float4 cross3(float4 a, float4 b)\n" + "{\n" + " return cross(a,b);\n" + "}\n" + "__inline\n" + "float dot3F4(float4 a, float4 b)\n" + "{\n" + " float4 a1 = mymake_float4(a.xyz,0.f);\n" + " float4 b1 = mymake_float4(b.xyz,0.f);\n" + " return dot(a1, b1);\n" + "}\n" + "__inline\n" + "float4 normalize3(const float4 a)\n" + "{\n" + " float4 n = mymake_float4(a.x, a.y, a.z, 0.f);\n" + " return fastNormalize4( n );\n" + "// float length = sqrtf(dot3F4(a, a));\n" + "// return 1.f/length * a;\n" + "}\n" + "///////////////////////////////////////\n" + "// Matrix3x3\n" + "///////////////////////////////////////\n" + "typedef struct\n" + "{\n" + " float4 m_row[3];\n" + "}Matrix3x3;\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b);\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b);\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b)\n" + "{\n" + " float4 ans;\n" + " ans.x = dot3F4( a.m_row[0], b );\n" + " ans.y = dot3F4( a.m_row[1], b );\n" + " ans.z = dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b)\n" + "{\n" + " float4 colx = mymake_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " float4 coly = mymake_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " float4 colz = mymake_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " float4 ans;\n" + " ans.x = dot3F4( a, colx );\n" + " ans.y = dot3F4( a, coly );\n" + " ans.z = dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "///////////////////////////////////////\n" + "// Quaternion\n" + "///////////////////////////////////////\n" + "typedef float4 Quaternion;\n" + "#define WG_SIZE 64\n" + "typedef struct\n" + "{\n" + " float4 m_pos;\n" + " Quaternion m_quat;\n" + " float4 m_linVel;\n" + " float4 m_angVel;\n" + " u32 m_shapeIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "} Body;\n" + "typedef struct\n" + "{\n" + " Matrix3x3 m_invInertia;\n" + " Matrix3x3 m_initInvInertia;\n" + "} Shape;\n" + "typedef struct\n" + "{\n" + " float4 m_linear;\n" + " float4 m_worldPos[4];\n" + " float4 m_center; \n" + " float m_jacCoeffInv[4];\n" + " float m_b[4];\n" + " float m_appliedRambdaDt[4];\n" + " float m_fJacCoeffInv[2]; \n" + " float m_fAppliedRambdaDt[2]; \n" + " u32 m_bodyA;\n" + " u32 m_bodyB;\n" + " int m_batchIdx;\n" + " u32 m_paddings[1];\n" + "} Constraint4;\n" + "typedef struct\n" + "{\n" + " int m_nConstraints;\n" + " int m_start;\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBuffer;\n" + "typedef struct\n" + "{\n" + " int m_solveFriction;\n" + " int m_maxBatch; // long batch really kills the performance\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBufferBatchSolve;\n" + "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1);\n" + "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" + "{\n" + " *linear = mymake_float4(-n.xyz,0.f);\n" + " *angular0 = -cross3(r0, n);\n" + " *angular1 = cross3(r1, n);\n" + "}\n" + "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 );\n" + "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" + "{\n" + " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" + "}\n" + "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" + " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1);\n" + "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" + " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1)\n" + "{\n" + " // linear0,1 are normlized\n" + " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" + " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" + " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" + " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" + " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" + "}\n" + "void btPlaneSpace1 (const float4* n, float4* p, float4* q);\n" + " void btPlaneSpace1 (const float4* n, float4* p, float4* q)\n" + "{\n" + " if (fabs(n[0].z) > 0.70710678f) {\n" + " // choose p in y-z plane\n" + " float a = n[0].y*n[0].y + n[0].z*n[0].z;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = 0;\n" + " p[0].y = -n[0].z*k;\n" + " p[0].z = n[0].y*k;\n" + " // set q = n x p\n" + " q[0].x = a*k;\n" + " q[0].y = -n[0].x*p[0].z;\n" + " q[0].z = n[0].x*p[0].y;\n" + " }\n" + " else {\n" + " // choose p in x-y plane\n" + " float a = n[0].x*n[0].x + n[0].y*n[0].y;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = -n[0].y*k;\n" + " p[0].y = n[0].x*k;\n" + " p[0].z = 0;\n" + " // set q = n x p\n" + " q[0].x = -n[0].z*p[0].y;\n" + " q[0].y = n[0].z*p[0].x;\n" + " q[0].z = a*k;\n" + " }\n" + "}\n" + "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs);\n" + "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs)\n" + "{\n" + " float frictionCoeff = ldsCs[0].m_linear.w;\n" + " int aIdx = ldsCs[0].m_bodyA;\n" + " int bIdx = ldsCs[0].m_bodyB;\n" + " float4 posA = gBodies[aIdx].m_pos;\n" + " float4 linVelA = gBodies[aIdx].m_linVel;\n" + " float4 angVelA = gBodies[aIdx].m_angVel;\n" + " float invMassA = gBodies[aIdx].m_invMass;\n" + " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" + " float4 posB = gBodies[bIdx].m_pos;\n" + " float4 linVelB = gBodies[bIdx].m_linVel;\n" + " float4 angVelB = gBodies[bIdx].m_angVel;\n" + " float invMassB = gBodies[bIdx].m_invMass;\n" + " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" + " \n" + " {\n" + " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" + " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" + " float sum = 0;\n" + " for(int j=0; j<4; j++)\n" + " {\n" + " sum +=ldsCs[0].m_appliedRambdaDt[j];\n" + " }\n" + " frictionCoeff = 0.7f;\n" + " for(int j=0; j<4; j++)\n" + " {\n" + " maxRambdaDt[j] = frictionCoeff*sum;\n" + " minRambdaDt[j] = -maxRambdaDt[j];\n" + " }\n" + " \n" + "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" + "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" + " \n" + " \n" + " {\n" + " \n" + " __global Constraint4* cs = ldsCs;\n" + " \n" + " if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" + " const float4 center = cs->m_center;\n" + " \n" + " float4 n = -cs->m_linear;\n" + " \n" + " float4 tangent[2];\n" + " btPlaneSpace1(&n,&tangent[0],&tangent[1]);\n" + " float4 angular0, angular1, linear;\n" + " float4 r0 = center - posA;\n" + " float4 r1 = center - posB;\n" + " for(int i=0; i<2; i++)\n" + " {\n" + " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" + " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" + " linVelA, angVelA, linVelB, angVelB );\n" + " rambdaDt *= cs->m_fJacCoeffInv[i];\n" + " \n" + " {\n" + " float prevSum = cs->m_fAppliedRambdaDt[i];\n" + " float updated = prevSum;\n" + " updated += rambdaDt;\n" + " updated = max2( updated, minRambdaDt[i] );\n" + " updated = min2( updated, maxRambdaDt[i] );\n" + " rambdaDt = updated - prevSum;\n" + " cs->m_fAppliedRambdaDt[i] = updated;\n" + " }\n" + " \n" + " float4 linImp0 = invMassA*linear*rambdaDt;\n" + " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" + " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" + " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" + " \n" + " linVelA += linImp0;\n" + " angVelA += angImp0;\n" + " linVelB += linImp1;\n" + " angVelB += angImp1;\n" + " }\n" + " { // angular damping for point constraint\n" + " float4 ab = normalize3( posB - posA );\n" + " float4 ac = normalize3( center - posA );\n" + " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" + " {\n" + " float angNA = dot3F4( n, angVelA );\n" + " float angNB = dot3F4( n, angVelB );\n" + " \n" + " angVelA -= (angNA*0.1f)*n;\n" + " angVelB -= (angNB*0.1f)*n;\n" + " }\n" + " }\n" + " }\n" + " \n" + " \n" + " }\n" + " if (gBodies[aIdx].m_invMass)\n" + " {\n" + " gBodies[aIdx].m_linVel = linVelA;\n" + " gBodies[aIdx].m_angVel = angVelA;\n" + " } else\n" + " {\n" + " gBodies[aIdx].m_linVel = mymake_float4(0,0,0,0);\n" + " gBodies[aIdx].m_angVel = mymake_float4(0,0,0,0);\n" + " }\n" + " if (gBodies[bIdx].m_invMass)\n" + " {\n" + " gBodies[bIdx].m_linVel = linVelB;\n" + " gBodies[bIdx].m_angVel = angVelB;\n" + " } else\n" + " {\n" + " gBodies[bIdx].m_linVel = mymake_float4(0,0,0,0);\n" + " gBodies[bIdx].m_angVel = mymake_float4(0,0,0,0);\n" + " }\n" + " \n" + "}\n" + "typedef struct \n" + "{\n" + " int m_valInt0;\n" + " int m_valInt1;\n" + " int m_valInt2;\n" + " int m_valInt3;\n" + " float m_val0;\n" + " float m_val1;\n" + " float m_val2;\n" + " float m_val3;\n" + "} SolverDebugInfo;\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void BatchSolveKernelFriction(__global Body* gBodies,\n" + " __global Shape* gShapes,\n" + " __global Constraint4* gConstraints,\n" + " __global int* gN,\n" + " __global int* gOffsets,\n" + " __global int* batchSizes,\n" + " int maxBatch1,\n" + " int cellBatch,\n" + " int4 nSplit\n" + " )\n" + "{\n" + " //__local int ldsBatchIdx[WG_SIZE+1];\n" + " __local int ldsCurBatch;\n" + " __local int ldsNextBatch;\n" + " __local int ldsStart;\n" + " int lIdx = GET_LOCAL_IDX;\n" + " int wgIdx = GET_GROUP_IDX;\n" + "// int gIdx = GET_GLOBAL_IDX;\n" + "// debugInfo[gIdx].m_valInt0 = gIdx;\n" + " //debugInfo[gIdx].m_valInt1 = GET_GROUP_SIZE;\n" + " int zIdx = (wgIdx/((nSplit.x*nSplit.y)/4))*2+((cellBatch&4)>>2);\n" + " int remain= (wgIdx%((nSplit.x*nSplit.y)/4));\n" + " int yIdx = (remain/(nSplit.x/2))*2 + ((cellBatch&2)>>1);\n" + " int xIdx = (remain%(nSplit.x/2))*2 + (cellBatch&1);\n" + " int cellIdx = xIdx+yIdx*nSplit.x+zIdx*(nSplit.x*nSplit.y);\n" + " \n" + " if( gN[cellIdx] == 0 ) \n" + " return;\n" + " int maxBatch = batchSizes[cellIdx];\n" + " const int start = gOffsets[cellIdx];\n" + " const int end = start + gN[cellIdx];\n" + " \n" + " if( lIdx == 0 )\n" + " {\n" + " ldsCurBatch = 0;\n" + " ldsNextBatch = 0;\n" + " ldsStart = start;\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " int idx=ldsStart+lIdx;\n" + " while (ldsCurBatch < maxBatch)\n" + " {\n" + " for(; idx<end; )\n" + " {\n" + " if (gConstraints[idx].m_batchIdx == ldsCurBatch)\n" + " {\n" + " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" + " idx+=64;\n" + " } else\n" + " {\n" + " break;\n" + " }\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " if( lIdx == 0 )\n" + " {\n" + " ldsCurBatch++;\n" + " }\n" + " GROUP_LDS_BARRIER;\n" + " }\n" + " \n" + " \n" + "}\n" + "__kernel void solveSingleFrictionKernel(__global Body* gBodies,\n" + " __global Shape* gShapes,\n" + " __global Constraint4* gConstraints,\n" + " int cellIdx,\n" + " int batchOffset,\n" + " int numConstraintsInBatch\n" + " )\n" + "{\n" + " int index = get_global_id(0);\n" + " if (index < numConstraintsInBatch)\n" + " {\n" + " \n" + " int idx=batchOffset+index;\n" + " \n" + " solveFrictionConstraint( gBodies, gShapes, &gConstraints[idx] );\n" + " } \n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h index eb1834ee00..d53db03181 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup.h @@ -1,703 +1,702 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetupCL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#ifndef B3_CONTACT_CONSTRAINT5_H\n" -"#define B3_CONTACT_CONSTRAINT5_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n" -"struct b3ContactConstraint4\n" -"{\n" -" b3Float4 m_linear;//normal?\n" -" b3Float4 m_worldPos[4];\n" -" b3Float4 m_center; // friction\n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; // friction\n" -" float m_fAppliedRambdaDt[2]; // friction\n" -" unsigned int m_bodyA;\n" -" unsigned int m_bodyB;\n" -" int m_batchIdx;\n" -" unsigned int m_paddings;\n" -"};\n" -"//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n" -"inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n" -"{\n" -" return constraint->m_linear.w; \n" -"}\n" -"#endif //B3_CONTACT_CONSTRAINT5_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n" -" void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n" -"{\n" -" if (b3Fabs(n.z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n.y*n.y + n.z*n.z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n.z*k;\n" -" p[0].z = n.y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n.x*p[0].z;\n" -" q[0].z = n.x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n.x*n.x + n.y*n.y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n.y*k;\n" -" p[0].y = n.x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n.z*p[0].y;\n" -" q[0].y = n.z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -" \n" -"void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n" -"{\n" -" *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n" -" *angular0 = b3Cross3(r0, n);\n" -" *angular1 = -b3Cross3(r1, n);\n" -"}\n" -"float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n" -" b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n" -"{\n" -" return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n" -" float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" -"}\n" -"void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n" -" b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n" -" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" -" b3ContactConstraint4_t* dstC )\n" -"{\n" -" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" -" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -" float dtInv = 1.f/dt;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -" dstC->m_linear = src->m_worldNormalOnB;\n" -" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" b3Float4 r0 = src->m_worldPosB[ic] - posA;\n" -" b3Float4 r1 = src->m_worldPosB[ic] - posB;\n" -" if( ic >= src->m_worldNormalOnB.w )//npoints\n" -" {\n" -" dstC->m_jacCoeffInv[ic] = 0.f;\n" -" continue;\n" -" }\n" -" float relVelN;\n" -" {\n" -" b3Float4 linear, angular0, angular1;\n" -" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" -" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB);\n" -" float e = 0.f;//src->getRestituitionCoeff();\n" -" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -" dstC->m_b[ic] = e*relVelN;\n" -" //float penetration = src->m_worldPosB[ic].w;\n" -" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" }\n" -" if( src->m_worldNormalOnB.w > 0 )//npoints\n" -" { // prepare friction\n" -" b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" -" for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" -" center += src->m_worldPosB[i];\n" -" center /= (float)src->m_worldNormalOnB.w;\n" -" b3Float4 tangent[2];\n" -" b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" -" \n" -" b3Float4 r[2];\n" -" r[0] = center - posA;\n" -" r[1] = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" b3Float4 linear, angular0, angular1;\n" -" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB );\n" -" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" -" }\n" -" dstC->m_center = center;\n" -" }\n" -" for(int i=0; i<4; i++)\n" -" {\n" -" if( i<src->m_worldNormalOnB.w )\n" -" {\n" -" dstC->m_worldPos[i] = src->m_worldPosB[i];\n" -" }\n" -" else\n" -" {\n" -" dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" -" }\n" -" }\n" -"}\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float fastSqrtf(float f2)\n" -"{\n" -" return native_sqrt(f2);\n" -"// return sqrt(f2);\n" -"}\n" -"__inline\n" -"float fastRSqrt(float f2)\n" -"{\n" -" return native_rsqrt(f2);\n" -"}\n" -"__inline\n" -"float fastLength4(float4 v)\n" -"{\n" -" return fast_length(v);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float sqrtf(float a)\n" -"{\n" -"// return sqrt(a);\n" -" return native_sqrt(a);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float length3(const float4 a)\n" -"{\n" -" return sqrtf(dot3F4(a,a));\n" -"}\n" -"__inline\n" -"float dot4(const float4 a, const float4 b)\n" -"{\n" -" return dot( a, b );\n" -"}\n" -"// for height\n" -"__inline\n" -"float dot3w1(const float4 point, const float4 eqn)\n" -"{\n" -" return dot3F4(point,eqn) + eqn.w;\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 normalize4(const float4 a)\n" -"{\n" -" float length = sqrtf(dot4(a, a));\n" -" return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" -"{\n" -" float4 eqn;\n" -" float4 ab = b-a;\n" -" float4 ac = c-a;\n" -" eqn = normalize3( cross3(ab, ac) );\n" -" eqn.w = -dot3F4(eqn,a);\n" -" return eqn;\n" -"}\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -" \n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"typedef struct\n" -"{\n" -" int m_nContacts;\n" -" float m_dt;\n" -" float m_positionDrift;\n" -" float m_positionConstraintCoeff;\n" -"} ConstBufferCTC;\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n" -"int nContacts,\n" -"float dt,\n" -"float positionDrift,\n" -"float positionConstraintCoeff\n" -")\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nContacts )\n" -" {\n" -" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" -" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n" -" b3ContactConstraint4_t cs;\n" -" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" -" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" -" &cs );\n" -" \n" -" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" -" gConstraintOut[gIdx] = cs;\n" -" }\n" -"}\n" -; +static const char* solverSetupCL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Takahiro Harada\n" + "#ifndef B3_CONTACT4DATA_H\n" + "#define B3_CONTACT4DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "typedef struct b3Contact4Data b3Contact4Data_t;\n" + "struct b3Contact4Data\n" + "{\n" + " b3Float4 m_worldPosB[4];\n" + "// b3Float4 m_localPosA[4];\n" + "// b3Float4 m_localPosB[4];\n" + " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" + " unsigned short m_restituitionCoeffCmp;\n" + " unsigned short m_frictionCoeffCmp;\n" + " int m_batchIdx;\n" + " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" + " int m_bodyBPtrAndSignBit;\n" + " int m_childIndexA;\n" + " int m_childIndexB;\n" + " int m_unused1;\n" + " int m_unused2;\n" + "};\n" + "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" + "{\n" + " return (int)contact->m_worldNormalOnB.w;\n" + "};\n" + "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" + "{\n" + " contact->m_worldNormalOnB.w = (float)numPoints;\n" + "};\n" + "#endif //B3_CONTACT4DATA_H\n" + "#ifndef B3_CONTACT_CONSTRAINT5_H\n" + "#define B3_CONTACT_CONSTRAINT5_H\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "typedef struct b3ContactConstraint4 b3ContactConstraint4_t;\n" + "struct b3ContactConstraint4\n" + "{\n" + " b3Float4 m_linear;//normal?\n" + " b3Float4 m_worldPos[4];\n" + " b3Float4 m_center; // friction\n" + " float m_jacCoeffInv[4];\n" + " float m_b[4];\n" + " float m_appliedRambdaDt[4];\n" + " float m_fJacCoeffInv[2]; // friction\n" + " float m_fAppliedRambdaDt[2]; // friction\n" + " unsigned int m_bodyA;\n" + " unsigned int m_bodyB;\n" + " int m_batchIdx;\n" + " unsigned int m_paddings;\n" + "};\n" + "//inline void setFrictionCoeff(float value) { m_linear[3] = value; }\n" + "inline float b3GetFrictionCoeff(b3ContactConstraint4_t* constraint) \n" + "{\n" + " return constraint->m_linear.w; \n" + "}\n" + "#endif //B3_CONTACT_CONSTRAINT5_H\n" + "#ifndef B3_RIGIDBODY_DATA_H\n" + "#define B3_RIGIDBODY_DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "#ifndef B3_QUAT_H\n" + "#define B3_QUAT_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif\n" + "#endif\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Quat;\n" + " #define b3QuatConstArg const b3Quat\n" + " \n" + " \n" + "inline float4 b3FastNormalize4(float4 v)\n" + "{\n" + " v = (float4)(v.xyz,0.f);\n" + " return fast_normalize(v);\n" + "}\n" + " \n" + "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" + "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" + "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" + "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" + "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" + "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" + "{\n" + " b3Quat ans;\n" + " ans = b3Cross3( a, b );\n" + " ans += a.w*b+b.w*a;\n" + "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" + " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" + " return ans;\n" + "}\n" + "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" + "{\n" + " b3Quat q;\n" + " q=in;\n" + " //return b3FastNormalize4(in);\n" + " float len = native_sqrt(dot(q, q));\n" + " if(len > 0.f)\n" + " {\n" + " q *= 1.f / len;\n" + " }\n" + " else\n" + " {\n" + " q.x = q.y = q.z = 0.f;\n" + " q.w = 1.f;\n" + " }\n" + " return q;\n" + "}\n" + "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" + "{\n" + " b3Quat qInv = b3QuatInvert( q );\n" + " float4 vcpy = vec;\n" + " vcpy.w = 0.f;\n" + " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" + " return out;\n" + "}\n" + "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" + "{\n" + " return (b3Quat)(-q.xyz, q.w);\n" + "}\n" + "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" + "{\n" + " return (b3Quat)(-q.xyz, q.w);\n" + "}\n" + "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" + "{\n" + " return b3QuatRotate( b3QuatInvert( q ), vec );\n" + "}\n" + "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" + "{\n" + " return b3QuatRotate( orientation, point ) + (translation);\n" + "}\n" + " \n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "#ifndef B3_MAT3x3_H\n" + "#define B3_MAT3x3_H\n" + "#ifndef B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "typedef struct\n" + "{\n" + " b3Float4 m_row[3];\n" + "}b3Mat3x3;\n" + "#define b3Mat3x3ConstArg const b3Mat3x3\n" + "#define b3GetRow(m,row) (m.m_row[row])\n" + "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" + "{\n" + " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" + " b3Mat3x3 out;\n" + " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" + " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" + " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" + " out.m_row[0].w = 0.f;\n" + " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" + " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" + " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" + " out.m_row[1].w = 0.f;\n" + " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" + " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" + " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" + " out.m_row[2].w = 0.f;\n" + " return out;\n" + "}\n" + "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" + "{\n" + " b3Mat3x3 out;\n" + " out.m_row[0] = fabs(matIn.m_row[0]);\n" + " out.m_row[1] = fabs(matIn.m_row[1]);\n" + " out.m_row[2] = fabs(matIn.m_row[2]);\n" + " return out;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtZero();\n" + "__inline\n" + "b3Mat3x3 mtIdentity();\n" + "__inline\n" + "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" + "__inline\n" + "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" + "__inline\n" + "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" + "__inline\n" + "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" + "__inline\n" + "b3Mat3x3 mtZero()\n" + "{\n" + " b3Mat3x3 m;\n" + " m.m_row[0] = (b3Float4)(0.f);\n" + " m.m_row[1] = (b3Float4)(0.f);\n" + " m.m_row[2] = (b3Float4)(0.f);\n" + " return m;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtIdentity()\n" + "{\n" + " b3Mat3x3 m;\n" + " m.m_row[0] = (b3Float4)(1,0,0,0);\n" + " m.m_row[1] = (b3Float4)(0,1,0,0);\n" + " m.m_row[2] = (b3Float4)(0,0,1,0);\n" + " return m;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" + "{\n" + " b3Mat3x3 out;\n" + " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" + " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" + " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" + " return out;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" + "{\n" + " b3Mat3x3 transB;\n" + " transB = mtTranspose( b );\n" + " b3Mat3x3 ans;\n" + " // why this doesn't run when 0ing in the for{}\n" + " a.m_row[0].w = 0.f;\n" + " a.m_row[1].w = 0.f;\n" + " a.m_row[2].w = 0.f;\n" + " for(int i=0; i<3; i++)\n" + " {\n" + "// a.m_row[i].w = 0.f;\n" + " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" + " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" + " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" + " ans.m_row[i].w = 0.f;\n" + " }\n" + " return ans;\n" + "}\n" + "__inline\n" + "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" + "{\n" + " b3Float4 ans;\n" + " ans.x = b3Dot3F4( a.m_row[0], b );\n" + " ans.y = b3Dot3F4( a.m_row[1], b );\n" + " ans.z = b3Dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" + "{\n" + " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " b3Float4 ans;\n" + " ans.x = b3Dot3F4( a, colx );\n" + " ans.y = b3Dot3F4( a, coly );\n" + " ans.z = b3Dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "#endif\n" + "#endif //B3_MAT3x3_H\n" + "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" + "struct b3RigidBodyData\n" + "{\n" + " b3Float4 m_pos;\n" + " b3Quat m_quat;\n" + " b3Float4 m_linVel;\n" + " b3Float4 m_angVel;\n" + " int m_collidableIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "};\n" + "typedef struct b3InertiaData b3InertiaData_t;\n" + "struct b3InertiaData\n" + "{\n" + " b3Mat3x3 m_invInertiaWorld;\n" + " b3Mat3x3 m_initInvInertia;\n" + "};\n" + "#endif //B3_RIGIDBODY_DATA_H\n" + " \n" + "void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q);\n" + " void b3PlaneSpace1 (b3Float4ConstArg n, b3Float4* p, b3Float4* q)\n" + "{\n" + " if (b3Fabs(n.z) > 0.70710678f) {\n" + " // choose p in y-z plane\n" + " float a = n.y*n.y + n.z*n.z;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = 0;\n" + " p[0].y = -n.z*k;\n" + " p[0].z = n.y*k;\n" + " // set q = n x p\n" + " q[0].x = a*k;\n" + " q[0].y = -n.x*p[0].z;\n" + " q[0].z = n.x*p[0].y;\n" + " }\n" + " else {\n" + " // choose p in x-y plane\n" + " float a = n.x*n.x + n.y*n.y;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = -n.y*k;\n" + " p[0].y = n.x*k;\n" + " p[0].z = 0;\n" + " // set q = n x p\n" + " q[0].x = -n.z*p[0].y;\n" + " q[0].y = n.z*p[0].x;\n" + " q[0].z = a*k;\n" + " }\n" + "}\n" + " \n" + "void setLinearAndAngular( b3Float4ConstArg n, b3Float4ConstArg r0, b3Float4ConstArg r1, b3Float4* linear, b3Float4* angular0, b3Float4* angular1)\n" + "{\n" + " *linear = b3MakeFloat4(n.x,n.y,n.z,0.f);\n" + " *angular0 = b3Cross3(r0, n);\n" + " *angular1 = -b3Cross3(r1, n);\n" + "}\n" + "float calcRelVel( b3Float4ConstArg l0, b3Float4ConstArg l1, b3Float4ConstArg a0, b3Float4ConstArg a1, b3Float4ConstArg linVel0,\n" + " b3Float4ConstArg angVel0, b3Float4ConstArg linVel1, b3Float4ConstArg angVel1 )\n" + "{\n" + " return b3Dot3F4(l0, linVel0) + b3Dot3F4(a0, angVel0) + b3Dot3F4(l1, linVel1) + b3Dot3F4(a1, angVel1);\n" + "}\n" + "float calcJacCoeff(b3Float4ConstArg linear0, b3Float4ConstArg linear1, b3Float4ConstArg angular0, b3Float4ConstArg angular1,\n" + " float invMass0, const b3Mat3x3* invInertia0, float invMass1, const b3Mat3x3* invInertia1)\n" + "{\n" + " // linear0,1 are normlized\n" + " float jmj0 = invMass0;//b3Dot3F4(linear0, linear0)*invMass0;\n" + " float jmj1 = b3Dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" + " float jmj2 = invMass1;//b3Dot3F4(linear1, linear1)*invMass1;\n" + " float jmj3 = b3Dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" + " return -1.f/(jmj0+jmj1+jmj2+jmj3);\n" + "}\n" + "void setConstraint4( b3Float4ConstArg posA, b3Float4ConstArg linVelA, b3Float4ConstArg angVelA, float invMassA, b3Mat3x3ConstArg invInertiaA,\n" + " b3Float4ConstArg posB, b3Float4ConstArg linVelB, b3Float4ConstArg angVelB, float invMassB, b3Mat3x3ConstArg invInertiaB, \n" + " __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,\n" + " b3ContactConstraint4_t* dstC )\n" + "{\n" + " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" + " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" + " float dtInv = 1.f/dt;\n" + " for(int ic=0; ic<4; ic++)\n" + " {\n" + " dstC->m_appliedRambdaDt[ic] = 0.f;\n" + " }\n" + " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" + " dstC->m_linear = src->m_worldNormalOnB;\n" + " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" + " for(int ic=0; ic<4; ic++)\n" + " {\n" + " b3Float4 r0 = src->m_worldPosB[ic] - posA;\n" + " b3Float4 r1 = src->m_worldPosB[ic] - posB;\n" + " if( ic >= src->m_worldNormalOnB.w )//npoints\n" + " {\n" + " dstC->m_jacCoeffInv[ic] = 0.f;\n" + " continue;\n" + " }\n" + " float relVelN;\n" + " {\n" + " b3Float4 linear, angular0, angular1;\n" + " setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" + " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" + " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" + " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" + " linVelA, angVelA, linVelB, angVelB);\n" + " float e = 0.f;//src->getRestituitionCoeff();\n" + " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" + " dstC->m_b[ic] = e*relVelN;\n" + " //float penetration = src->m_worldPosB[ic].w;\n" + " dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" + " dstC->m_appliedRambdaDt[ic] = 0.f;\n" + " }\n" + " }\n" + " if( src->m_worldNormalOnB.w > 0 )//npoints\n" + " { // prepare friction\n" + " b3Float4 center = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" + " for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" + " center += src->m_worldPosB[i];\n" + " center /= (float)src->m_worldNormalOnB.w;\n" + " b3Float4 tangent[2];\n" + " b3PlaneSpace1(src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" + " \n" + " b3Float4 r[2];\n" + " r[0] = center - posA;\n" + " r[1] = center - posB;\n" + " for(int i=0; i<2; i++)\n" + " {\n" + " b3Float4 linear, angular0, angular1;\n" + " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" + " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" + " invMassA, &invInertiaA, invMassB, &invInertiaB );\n" + " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" + " }\n" + " dstC->m_center = center;\n" + " }\n" + " for(int i=0; i<4; i++)\n" + " {\n" + " if( i<src->m_worldNormalOnB.w )\n" + " {\n" + " dstC->m_worldPos[i] = src->m_worldPosB[i];\n" + " }\n" + " else\n" + " {\n" + " dstC->m_worldPos[i] = b3MakeFloat4(0.f,0.f,0.f,0.f);\n" + " }\n" + " }\n" + "}\n" + "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile global int*\n" + "#endif\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define make_float4 (float4)\n" + "#define make_float2 (float2)\n" + "#define make_uint4 (uint4)\n" + "#define make_int4 (int4)\n" + "#define make_uint2 (uint2)\n" + "#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "///////////////////////////////////////\n" + "// Vector\n" + "///////////////////////////////////////\n" + "__inline\n" + "float fastDiv(float numerator, float denominator)\n" + "{\n" + " return native_divide(numerator, denominator); \n" + "// return numerator/denominator; \n" + "}\n" + "__inline\n" + "float4 fastDiv4(float4 numerator, float4 denominator)\n" + "{\n" + " return native_divide(numerator, denominator); \n" + "}\n" + "__inline\n" + "float fastSqrtf(float f2)\n" + "{\n" + " return native_sqrt(f2);\n" + "// return sqrt(f2);\n" + "}\n" + "__inline\n" + "float fastRSqrt(float f2)\n" + "{\n" + " return native_rsqrt(f2);\n" + "}\n" + "__inline\n" + "float fastLength4(float4 v)\n" + "{\n" + " return fast_length(v);\n" + "}\n" + "__inline\n" + "float4 fastNormalize4(float4 v)\n" + "{\n" + " return fast_normalize(v);\n" + "}\n" + "__inline\n" + "float sqrtf(float a)\n" + "{\n" + "// return sqrt(a);\n" + " return native_sqrt(a);\n" + "}\n" + "__inline\n" + "float4 cross3(float4 a, float4 b)\n" + "{\n" + " return cross(a,b);\n" + "}\n" + "__inline\n" + "float dot3F4(float4 a, float4 b)\n" + "{\n" + " float4 a1 = make_float4(a.xyz,0.f);\n" + " float4 b1 = make_float4(b.xyz,0.f);\n" + " return dot(a1, b1);\n" + "}\n" + "__inline\n" + "float length3(const float4 a)\n" + "{\n" + " return sqrtf(dot3F4(a,a));\n" + "}\n" + "__inline\n" + "float dot4(const float4 a, const float4 b)\n" + "{\n" + " return dot( a, b );\n" + "}\n" + "// for height\n" + "__inline\n" + "float dot3w1(const float4 point, const float4 eqn)\n" + "{\n" + " return dot3F4(point,eqn) + eqn.w;\n" + "}\n" + "__inline\n" + "float4 normalize3(const float4 a)\n" + "{\n" + " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" + " return fastNormalize4( n );\n" + "// float length = sqrtf(dot3F4(a, a));\n" + "// return 1.f/length * a;\n" + "}\n" + "__inline\n" + "float4 normalize4(const float4 a)\n" + "{\n" + " float length = sqrtf(dot4(a, a));\n" + " return 1.f/length * a;\n" + "}\n" + "__inline\n" + "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" + "{\n" + " float4 eqn;\n" + " float4 ab = b-a;\n" + " float4 ac = c-a;\n" + " eqn = normalize3( cross3(ab, ac) );\n" + " eqn.w = -dot3F4(eqn,a);\n" + " return eqn;\n" + "}\n" + "#define WG_SIZE 64\n" + "typedef struct\n" + "{\n" + " int m_nConstraints;\n" + " int m_start;\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBuffer;\n" + "typedef struct\n" + "{\n" + " int m_solveFriction;\n" + " int m_maxBatch; // long batch really kills the performance\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBufferBatchSolve;\n" + " \n" + "typedef struct \n" + "{\n" + " int m_valInt0;\n" + " int m_valInt1;\n" + " int m_valInt2;\n" + " int m_valInt3;\n" + " float m_val0;\n" + " float m_val1;\n" + " float m_val2;\n" + " float m_val3;\n" + "} SolverDebugInfo;\n" + "typedef struct\n" + "{\n" + " int m_nContacts;\n" + " float m_dt;\n" + " float m_positionDrift;\n" + " float m_positionConstraintCoeff;\n" + "} ConstBufferCTC;\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void ContactToConstraintKernel(__global struct b3Contact4Data* gContact, __global b3RigidBodyData_t* gBodies, __global b3InertiaData_t* gShapes, __global b3ContactConstraint4_t* gConstraintOut, \n" + "int nContacts,\n" + "float dt,\n" + "float positionDrift,\n" + "float positionConstraintCoeff\n" + ")\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " \n" + " if( gIdx < nContacts )\n" + " {\n" + " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" + " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" + " float4 posA = gBodies[aIdx].m_pos;\n" + " float4 linVelA = gBodies[aIdx].m_linVel;\n" + " float4 angVelA = gBodies[aIdx].m_angVel;\n" + " float invMassA = gBodies[aIdx].m_invMass;\n" + " b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia;\n" + " float4 posB = gBodies[bIdx].m_pos;\n" + " float4 linVelB = gBodies[bIdx].m_linVel;\n" + " float4 angVelB = gBodies[bIdx].m_angVel;\n" + " float invMassB = gBodies[bIdx].m_invMass;\n" + " b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia;\n" + " b3ContactConstraint4_t cs;\n" + " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" + " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,\n" + " &cs );\n" + " \n" + " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" + " gConstraintOut[gIdx] = cs;\n" + " }\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h index 1b5819f6cf..1e6e3579b6 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverSetup2.h @@ -1,601 +1,600 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverSetup2CL= \ -"/*\n" -"Copyright (c) 2012 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Takahiro Harada\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float fastSqrtf(float f2)\n" -"{\n" -" return native_sqrt(f2);\n" -"// return sqrt(f2);\n" -"}\n" -"__inline\n" -"float fastRSqrt(float f2)\n" -"{\n" -" return native_rsqrt(f2);\n" -"}\n" -"__inline\n" -"float fastLength4(float4 v)\n" -"{\n" -" return fast_length(v);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float sqrtf(float a)\n" -"{\n" -"// return sqrt(a);\n" -" return native_sqrt(a);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a, float4 b)\n" -"{\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float length3(const float4 a)\n" -"{\n" -" return sqrtf(dot3F4(a,a));\n" -"}\n" -"__inline\n" -"float dot4(const float4 a, const float4 b)\n" -"{\n" -" return dot( a, b );\n" -"}\n" -"// for height\n" -"__inline\n" -"float dot3w1(const float4 point, const float4 eqn)\n" -"{\n" -" return dot3F4(point,eqn) + eqn.w;\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 normalize4(const float4 a)\n" -"{\n" -" float length = sqrtf(dot4(a, a));\n" -" return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" -"{\n" -" float4 eqn;\n" -" float4 ab = b-a;\n" -" float4 ac = c-a;\n" -" eqn = normalize3( cross3(ab, ac) );\n" -" eqn.w = -dot3F4(eqn,a);\n" -" return eqn;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"Matrix3x3 mtZero();\n" -"__inline\n" -"Matrix3x3 mtIdentity();\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"Matrix3x3 mtZero()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(0.f);\n" -" m.m_row[1] = (float4)(0.f);\n" -" m.m_row[2] = (float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtIdentity()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(1,0,0,0);\n" -" m.m_row[1] = (float4)(0,1,0,0);\n" -" m.m_row[2] = (float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m)\n" -"{\n" -" Matrix3x3 out;\n" -" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" -"{\n" -" Matrix3x3 transB;\n" -" transB = mtTranspose( b );\n" -" Matrix3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings[1];\n" -"} Constraint4;\n" -"typedef struct\n" -"{\n" -" int m_nConstraints;\n" -" int m_start;\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBuffer;\n" -"typedef struct\n" -"{\n" -" int m_solveFriction;\n" -" int m_maxBatch; // long batch really kills the performance\n" -" int m_batchIdx;\n" -" int m_nSplit;\n" -"// int m_paddings[1];\n" -"} ConstBufferBatchSolve;\n" -" \n" -"typedef struct \n" -"{\n" -" int m_valInt0;\n" -" int m_valInt1;\n" -" int m_valInt2;\n" -" int m_valInt3;\n" -" float m_val0;\n" -" float m_val1;\n" -" float m_val2;\n" -" float m_val3;\n" -"} SolverDebugInfo;\n" -"// others\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" -"{\n" -" int nContacts = cb.x;\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int srcIdx = sortData[gIdx].y;\n" -" out[gIdx] = in[srcIdx];\n" -" }\n" -"}\n" -"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sd;\n" -" sd.x = contactsIn[gIdx].m_childIndexB;\n" -" sd.y = gIdx;\n" -" sortDataOut[gIdx] = sd;\n" -" }\n" -"}\n" -"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sdIn;\n" -" sdIn = sortDataInOut[gIdx];\n" -" int2 sdOut;\n" -" sdOut.x = contactsIn[sdIn.y].m_childIndexA;\n" -" sdOut.y = sdIn.y;\n" -" sortDataInOut[gIdx] = sdOut;\n" -" }\n" -"}\n" -"__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sdIn;\n" -" sdIn = sortDataInOut[gIdx];\n" -" int2 sdOut;\n" -" sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;\n" -" sdOut.y = sdIn.y;\n" -" sortDataInOut[gIdx] = sdOut;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < nContacts )\n" -" {\n" -" int2 sdIn;\n" -" sdIn = sortDataInOut[gIdx];\n" -" int2 sdOut;\n" -" sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;\n" -" sdOut.y = sdIn.y;\n" -" sortDataInOut[gIdx] = sdOut;\n" -" }\n" -"}\n" -"typedef struct\n" -"{\n" -" int m_nContacts;\n" -" int m_staticIdx;\n" -" float m_scale;\n" -" int m_nSplit;\n" -"} ConstBufferSSD;\n" -"__constant const int gridTable4x4[] = \n" -"{\n" -" 0,1,17,16,\n" -" 1,2,18,19,\n" -" 17,18,32,3,\n" -" 16,19,3,34\n" -"};\n" -"__constant const int gridTable8x8[] = \n" -"{\n" -" 0, 2, 3, 16, 17, 18, 19, 1,\n" -" 66, 64, 80, 67, 82, 81, 65, 83,\n" -" 131,144,128,130,147,129,145,146,\n" -" 208,195,194,192,193,211,210,209,\n" -" 21, 22, 23, 5, 4, 6, 7, 20,\n" -" 86, 85, 69, 87, 70, 68, 84, 71,\n" -" 151,133,149,150,135,148,132,134,\n" -" 197,27,214,213,212,199,198,196\n" -" \n" -"};\n" -"#define USE_SPATIAL_BATCHING 1\n" -"#define USE_4x4_GRID 1\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" -"int nContacts,float scale,int4 nSplit,int staticIdx)\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nContacts )\n" -" {\n" -" int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n" -" int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n" -" int aIdx = abs(aPtrAndSignBit );\n" -" int bIdx = abs(bPtrAndSignBit);\n" -" bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n" -" bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n" -"#if USE_SPATIAL_BATCHING \n" -" int idx = (aStatic)? bIdx: aIdx;\n" -" float4 p = gBodies[idx].m_pos;\n" -" int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);\n" -" int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);\n" -" int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);\n" -" int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);\n" -" \n" -"#else//USE_SPATIAL_BATCHING\n" -" #if USE_4x4_GRID\n" -" int aa = aIdx&3;\n" -" int bb = bIdx&3;\n" -" if (aStatic)\n" -" aa = bb;\n" -" if (bStatic)\n" -" bb = aa;\n" -" int gridIndex = aa + bb*4;\n" -" int newIndex = gridTable4x4[gridIndex];\n" -" #else//USE_4x4_GRID\n" -" int aa = aIdx&7;\n" -" int bb = bIdx&7;\n" -" if (aStatic)\n" -" aa = bb;\n" -" if (bStatic)\n" -" bb = aa;\n" -" int gridIndex = aa + bb*8;\n" -" int newIndex = gridTable8x8[gridIndex];\n" -" #endif//USE_4x4_GRID\n" -"#endif//USE_SPATIAL_BATCHING\n" -" gSortDataOut[gIdx].x = newIndex;\n" -" gSortDataOut[gIdx].y = gIdx;\n" -" }\n" -" else\n" -" {\n" -" gSortDataOut[gIdx].x = 0xffffffff;\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" if( gIdx < cb.x )\n" -" {\n" -" gOut[gIdx] = gIn[gIdx];\n" -" }\n" -"}\n" -; +static const char* solverSetup2CL = + "/*\n" + "Copyright (c) 2012 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Takahiro Harada\n" + "#ifndef B3_CONTACT4DATA_H\n" + "#define B3_CONTACT4DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "typedef struct b3Contact4Data b3Contact4Data_t;\n" + "struct b3Contact4Data\n" + "{\n" + " b3Float4 m_worldPosB[4];\n" + "// b3Float4 m_localPosA[4];\n" + "// b3Float4 m_localPosB[4];\n" + " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" + " unsigned short m_restituitionCoeffCmp;\n" + " unsigned short m_frictionCoeffCmp;\n" + " int m_batchIdx;\n" + " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" + " int m_bodyBPtrAndSignBit;\n" + " int m_childIndexA;\n" + " int m_childIndexB;\n" + " int m_unused1;\n" + " int m_unused2;\n" + "};\n" + "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" + "{\n" + " return (int)contact->m_worldNormalOnB.w;\n" + "};\n" + "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" + "{\n" + " contact->m_worldNormalOnB.w = (float)numPoints;\n" + "};\n" + "#endif //B3_CONTACT4DATA_H\n" + "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile global int*\n" + "#endif\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define make_float4 (float4)\n" + "#define make_float2 (float2)\n" + "#define make_uint4 (uint4)\n" + "#define make_int4 (int4)\n" + "#define make_uint2 (uint2)\n" + "#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "///////////////////////////////////////\n" + "// Vector\n" + "///////////////////////////////////////\n" + "__inline\n" + "float fastDiv(float numerator, float denominator)\n" + "{\n" + " return native_divide(numerator, denominator); \n" + "// return numerator/denominator; \n" + "}\n" + "__inline\n" + "float4 fastDiv4(float4 numerator, float4 denominator)\n" + "{\n" + " return native_divide(numerator, denominator); \n" + "}\n" + "__inline\n" + "float fastSqrtf(float f2)\n" + "{\n" + " return native_sqrt(f2);\n" + "// return sqrt(f2);\n" + "}\n" + "__inline\n" + "float fastRSqrt(float f2)\n" + "{\n" + " return native_rsqrt(f2);\n" + "}\n" + "__inline\n" + "float fastLength4(float4 v)\n" + "{\n" + " return fast_length(v);\n" + "}\n" + "__inline\n" + "float4 fastNormalize4(float4 v)\n" + "{\n" + " return fast_normalize(v);\n" + "}\n" + "__inline\n" + "float sqrtf(float a)\n" + "{\n" + "// return sqrt(a);\n" + " return native_sqrt(a);\n" + "}\n" + "__inline\n" + "float4 cross3(float4 a, float4 b)\n" + "{\n" + " return cross(a,b);\n" + "}\n" + "__inline\n" + "float dot3F4(float4 a, float4 b)\n" + "{\n" + " float4 a1 = make_float4(a.xyz,0.f);\n" + " float4 b1 = make_float4(b.xyz,0.f);\n" + " return dot(a1, b1);\n" + "}\n" + "__inline\n" + "float length3(const float4 a)\n" + "{\n" + " return sqrtf(dot3F4(a,a));\n" + "}\n" + "__inline\n" + "float dot4(const float4 a, const float4 b)\n" + "{\n" + " return dot( a, b );\n" + "}\n" + "// for height\n" + "__inline\n" + "float dot3w1(const float4 point, const float4 eqn)\n" + "{\n" + " return dot3F4(point,eqn) + eqn.w;\n" + "}\n" + "__inline\n" + "float4 normalize3(const float4 a)\n" + "{\n" + " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" + " return fastNormalize4( n );\n" + "// float length = sqrtf(dot3F4(a, a));\n" + "// return 1.f/length * a;\n" + "}\n" + "__inline\n" + "float4 normalize4(const float4 a)\n" + "{\n" + " float length = sqrtf(dot4(a, a));\n" + " return 1.f/length * a;\n" + "}\n" + "__inline\n" + "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" + "{\n" + " float4 eqn;\n" + " float4 ab = b-a;\n" + " float4 ac = c-a;\n" + " eqn = normalize3( cross3(ab, ac) );\n" + " eqn.w = -dot3F4(eqn,a);\n" + " return eqn;\n" + "}\n" + "///////////////////////////////////////\n" + "// Matrix3x3\n" + "///////////////////////////////////////\n" + "typedef struct\n" + "{\n" + " float4 m_row[3];\n" + "}Matrix3x3;\n" + "__inline\n" + "Matrix3x3 mtZero();\n" + "__inline\n" + "Matrix3x3 mtIdentity();\n" + "__inline\n" + "Matrix3x3 mtTranspose(Matrix3x3 m);\n" + "__inline\n" + "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b);\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b);\n" + "__inline\n" + "Matrix3x3 mtZero()\n" + "{\n" + " Matrix3x3 m;\n" + " m.m_row[0] = (float4)(0.f);\n" + " m.m_row[1] = (float4)(0.f);\n" + " m.m_row[2] = (float4)(0.f);\n" + " return m;\n" + "}\n" + "__inline\n" + "Matrix3x3 mtIdentity()\n" + "{\n" + " Matrix3x3 m;\n" + " m.m_row[0] = (float4)(1,0,0,0);\n" + " m.m_row[1] = (float4)(0,1,0,0);\n" + " m.m_row[2] = (float4)(0,0,1,0);\n" + " return m;\n" + "}\n" + "__inline\n" + "Matrix3x3 mtTranspose(Matrix3x3 m)\n" + "{\n" + " Matrix3x3 out;\n" + " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" + " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" + " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" + " return out;\n" + "}\n" + "__inline\n" + "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" + "{\n" + " Matrix3x3 transB;\n" + " transB = mtTranspose( b );\n" + " Matrix3x3 ans;\n" + " // why this doesn't run when 0ing in the for{}\n" + " a.m_row[0].w = 0.f;\n" + " a.m_row[1].w = 0.f;\n" + " a.m_row[2].w = 0.f;\n" + " for(int i=0; i<3; i++)\n" + " {\n" + "// a.m_row[i].w = 0.f;\n" + " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" + " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" + " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" + " ans.m_row[i].w = 0.f;\n" + " }\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b)\n" + "{\n" + " float4 ans;\n" + " ans.x = dot3F4( a.m_row[0], b );\n" + " ans.y = dot3F4( a.m_row[1], b );\n" + " ans.z = dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b)\n" + "{\n" + " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " float4 ans;\n" + " ans.x = dot3F4( a, colx );\n" + " ans.y = dot3F4( a, coly );\n" + " ans.z = dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "///////////////////////////////////////\n" + "// Quaternion\n" + "///////////////////////////////////////\n" + "typedef float4 Quaternion;\n" + "__inline\n" + "Quaternion qtMul(Quaternion a, Quaternion b);\n" + "__inline\n" + "Quaternion qtNormalize(Quaternion in);\n" + "__inline\n" + "float4 qtRotate(Quaternion q, float4 vec);\n" + "__inline\n" + "Quaternion qtInvert(Quaternion q);\n" + "__inline\n" + "Quaternion qtMul(Quaternion a, Quaternion b)\n" + "{\n" + " Quaternion ans;\n" + " ans = cross3( a, b );\n" + " ans += a.w*b+b.w*a;\n" + "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" + " ans.w = a.w*b.w - dot3F4(a, b);\n" + " return ans;\n" + "}\n" + "__inline\n" + "Quaternion qtNormalize(Quaternion in)\n" + "{\n" + " return fastNormalize4(in);\n" + "// in /= length( in );\n" + "// return in;\n" + "}\n" + "__inline\n" + "float4 qtRotate(Quaternion q, float4 vec)\n" + "{\n" + " Quaternion qInv = qtInvert( q );\n" + " float4 vcpy = vec;\n" + " vcpy.w = 0.f;\n" + " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" + " return out;\n" + "}\n" + "__inline\n" + "Quaternion qtInvert(Quaternion q)\n" + "{\n" + " return (Quaternion)(-q.xyz, q.w);\n" + "}\n" + "__inline\n" + "float4 qtInvRotate(const Quaternion q, float4 vec)\n" + "{\n" + " return qtRotate( qtInvert( q ), vec );\n" + "}\n" + "#define WG_SIZE 64\n" + "typedef struct\n" + "{\n" + " float4 m_pos;\n" + " Quaternion m_quat;\n" + " float4 m_linVel;\n" + " float4 m_angVel;\n" + " u32 m_shapeIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "} Body;\n" + "typedef struct\n" + "{\n" + " Matrix3x3 m_invInertia;\n" + " Matrix3x3 m_initInvInertia;\n" + "} Shape;\n" + "typedef struct\n" + "{\n" + " float4 m_linear;\n" + " float4 m_worldPos[4];\n" + " float4 m_center; \n" + " float m_jacCoeffInv[4];\n" + " float m_b[4];\n" + " float m_appliedRambdaDt[4];\n" + " float m_fJacCoeffInv[2]; \n" + " float m_fAppliedRambdaDt[2]; \n" + " u32 m_bodyA;\n" + " u32 m_bodyB;\n" + " int m_batchIdx;\n" + " u32 m_paddings[1];\n" + "} Constraint4;\n" + "typedef struct\n" + "{\n" + " int m_nConstraints;\n" + " int m_start;\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBuffer;\n" + "typedef struct\n" + "{\n" + " int m_solveFriction;\n" + " int m_maxBatch; // long batch really kills the performance\n" + " int m_batchIdx;\n" + " int m_nSplit;\n" + "// int m_paddings[1];\n" + "} ConstBufferBatchSolve;\n" + " \n" + "typedef struct \n" + "{\n" + " int m_valInt0;\n" + " int m_valInt1;\n" + " int m_valInt2;\n" + " int m_valInt3;\n" + " float m_val0;\n" + " float m_val1;\n" + " float m_val2;\n" + " float m_val3;\n" + "} SolverDebugInfo;\n" + "// others\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void ReorderContactKernel(__global struct b3Contact4Data* in, __global struct b3Contact4Data* out, __global int2* sortData, int4 cb )\n" + "{\n" + " int nContacts = cb.x;\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < nContacts )\n" + " {\n" + " int srcIdx = sortData[gIdx].y;\n" + " out[gIdx] = in[srcIdx];\n" + " }\n" + "}\n" + "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void SetDeterminismSortDataChildShapeB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataOut, int nContacts)\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < nContacts )\n" + " {\n" + " int2 sd;\n" + " sd.x = contactsIn[gIdx].m_childIndexB;\n" + " sd.y = gIdx;\n" + " sortDataOut[gIdx] = sd;\n" + " }\n" + "}\n" + "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void SetDeterminismSortDataChildShapeA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < nContacts )\n" + " {\n" + " int2 sdIn;\n" + " sdIn = sortDataInOut[gIdx];\n" + " int2 sdOut;\n" + " sdOut.x = contactsIn[sdIn.y].m_childIndexA;\n" + " sdOut.y = sdIn.y;\n" + " sortDataInOut[gIdx] = sdOut;\n" + " }\n" + "}\n" + "__kernel __attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void SetDeterminismSortDataBodyA(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < nContacts )\n" + " {\n" + " int2 sdIn;\n" + " sdIn = sortDataInOut[gIdx];\n" + " int2 sdOut;\n" + " sdOut.x = contactsIn[sdIn.y].m_bodyAPtrAndSignBit;\n" + " sdOut.y = sdIn.y;\n" + " sortDataInOut[gIdx] = sdOut;\n" + " }\n" + "}\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void SetDeterminismSortDataBodyB(__global struct b3Contact4Data* contactsIn, __global int2* sortDataInOut, int nContacts)\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < nContacts )\n" + " {\n" + " int2 sdIn;\n" + " sdIn = sortDataInOut[gIdx];\n" + " int2 sdOut;\n" + " sdOut.x = contactsIn[sdIn.y].m_bodyBPtrAndSignBit;\n" + " sdOut.y = sdIn.y;\n" + " sortDataInOut[gIdx] = sdOut;\n" + " }\n" + "}\n" + "typedef struct\n" + "{\n" + " int m_nContacts;\n" + " int m_staticIdx;\n" + " float m_scale;\n" + " int m_nSplit;\n" + "} ConstBufferSSD;\n" + "__constant const int gridTable4x4[] = \n" + "{\n" + " 0,1,17,16,\n" + " 1,2,18,19,\n" + " 17,18,32,3,\n" + " 16,19,3,34\n" + "};\n" + "__constant const int gridTable8x8[] = \n" + "{\n" + " 0, 2, 3, 16, 17, 18, 19, 1,\n" + " 66, 64, 80, 67, 82, 81, 65, 83,\n" + " 131,144,128,130,147,129,145,146,\n" + " 208,195,194,192,193,211,210,209,\n" + " 21, 22, 23, 5, 4, 6, 7, 20,\n" + " 86, 85, 69, 87, 70, 68, 84, 71,\n" + " 151,133,149,150,135,148,132,134,\n" + " 197,27,214,213,212,199,198,196\n" + " \n" + "};\n" + "#define USE_SPATIAL_BATCHING 1\n" + "#define USE_4x4_GRID 1\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void SetSortDataKernel(__global struct b3Contact4Data* gContact, __global Body* gBodies, __global int2* gSortDataOut, \n" + "int nContacts,float scale,int4 nSplit,int staticIdx)\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " \n" + " if( gIdx < nContacts )\n" + " {\n" + " int aPtrAndSignBit = gContact[gIdx].m_bodyAPtrAndSignBit;\n" + " int bPtrAndSignBit = gContact[gIdx].m_bodyBPtrAndSignBit;\n" + " int aIdx = abs(aPtrAndSignBit );\n" + " int bIdx = abs(bPtrAndSignBit);\n" + " bool aStatic = (aPtrAndSignBit<0) ||(aPtrAndSignBit==staticIdx);\n" + " bool bStatic = (bPtrAndSignBit<0) ||(bPtrAndSignBit==staticIdx);\n" + "#if USE_SPATIAL_BATCHING \n" + " int idx = (aStatic)? bIdx: aIdx;\n" + " float4 p = gBodies[idx].m_pos;\n" + " int xIdx = (int)((p.x-((p.x<0.f)?1.f:0.f))*scale) & (nSplit.x-1);\n" + " int yIdx = (int)((p.y-((p.y<0.f)?1.f:0.f))*scale) & (nSplit.y-1);\n" + " int zIdx = (int)((p.z-((p.z<0.f)?1.f:0.f))*scale) & (nSplit.z-1);\n" + " int newIndex = (xIdx+yIdx*nSplit.x+zIdx*nSplit.x*nSplit.y);\n" + " \n" + "#else//USE_SPATIAL_BATCHING\n" + " #if USE_4x4_GRID\n" + " int aa = aIdx&3;\n" + " int bb = bIdx&3;\n" + " if (aStatic)\n" + " aa = bb;\n" + " if (bStatic)\n" + " bb = aa;\n" + " int gridIndex = aa + bb*4;\n" + " int newIndex = gridTable4x4[gridIndex];\n" + " #else//USE_4x4_GRID\n" + " int aa = aIdx&7;\n" + " int bb = bIdx&7;\n" + " if (aStatic)\n" + " aa = bb;\n" + " if (bStatic)\n" + " bb = aa;\n" + " int gridIndex = aa + bb*8;\n" + " int newIndex = gridTable8x8[gridIndex];\n" + " #endif//USE_4x4_GRID\n" + "#endif//USE_SPATIAL_BATCHING\n" + " gSortDataOut[gIdx].x = newIndex;\n" + " gSortDataOut[gIdx].y = gIdx;\n" + " }\n" + " else\n" + " {\n" + " gSortDataOut[gIdx].x = 0xffffffff;\n" + " }\n" + "}\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void CopyConstraintKernel(__global struct b3Contact4Data* gIn, __global struct b3Contact4Data* gOut, int4 cb )\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " if( gIdx < cb.x )\n" + " {\n" + " gOut[gIdx] = gIn[gIdx];\n" + " }\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h index c0173ad9f4..f4d98d9941 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/solverUtils.h @@ -1,909 +1,908 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* solverUtilsCL= \ -"/*\n" -"Copyright (c) 2013 Advanced Micro Devices, Inc. \n" -"This software is provided 'as-is', without any express or implied warranty.\n" -"In no event will the authors be held liable for any damages arising from the use of this software.\n" -"Permission is granted to anyone to use this software for any purpose, \n" -"including commercial applications, and to alter it and redistribute it freely, \n" -"subject to the following restrictions:\n" -"1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" -"2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" -"3. This notice may not be removed or altered from any source distribution.\n" -"*/\n" -"//Originally written by Erwin Coumans\n" -"#ifndef B3_CONTACT4DATA_H\n" -"#define B3_CONTACT4DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"typedef struct b3Contact4Data b3Contact4Data_t;\n" -"struct b3Contact4Data\n" -"{\n" -" b3Float4 m_worldPosB[4];\n" -"// b3Float4 m_localPosA[4];\n" -"// b3Float4 m_localPosB[4];\n" -" b3Float4 m_worldNormalOnB; // w: m_nPoints\n" -" unsigned short m_restituitionCoeffCmp;\n" -" unsigned short m_frictionCoeffCmp;\n" -" int m_batchIdx;\n" -" int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" -" int m_bodyBPtrAndSignBit;\n" -" int m_childIndexA;\n" -" int m_childIndexB;\n" -" int m_unused1;\n" -" int m_unused2;\n" -"};\n" -"inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" -"{\n" -" return (int)contact->m_worldNormalOnB.w;\n" -"};\n" -"inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" -"{\n" -" contact->m_worldNormalOnB.w = (float)numPoints;\n" -"};\n" -"#endif //B3_CONTACT4DATA_H\n" -"#pragma OPENCL EXTENSION cl_amd_printf : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" -"#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" -"#ifdef cl_ext_atomic_counters_32\n" -"#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" -"#else\n" -"#define counter32_t volatile global int*\n" -"#endif\n" -"typedef unsigned int u32;\n" -"typedef unsigned short u16;\n" -"typedef unsigned char u8;\n" -"#define GET_GROUP_IDX get_group_id(0)\n" -"#define GET_LOCAL_IDX get_local_id(0)\n" -"#define GET_GLOBAL_IDX get_global_id(0)\n" -"#define GET_GROUP_SIZE get_local_size(0)\n" -"#define GET_NUM_GROUPS get_num_groups(0)\n" -"#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" -"#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" -"#define AtomInc(x) atom_inc(&(x))\n" -"#define AtomInc1(x, out) out = atom_inc(&(x))\n" -"#define AppendInc(x, out) out = atomic_inc(x)\n" -"#define AtomAdd(x, value) atom_add(&(x), value)\n" -"#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" -"#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" -"#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" -"#define make_float4 (float4)\n" -"#define make_float2 (float2)\n" -"#define make_uint4 (uint4)\n" -"#define make_int4 (int4)\n" -"#define make_uint2 (uint2)\n" -"#define make_int2 (int2)\n" -"#define max2 max\n" -"#define min2 min\n" -"///////////////////////////////////////\n" -"// Vector\n" -"///////////////////////////////////////\n" -"__inline\n" -"float fastDiv(float numerator, float denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"// return numerator/denominator; \n" -"}\n" -"__inline\n" -"float4 fastDiv4(float4 numerator, float4 denominator)\n" -"{\n" -" return native_divide(numerator, denominator); \n" -"}\n" -"__inline\n" -"float fastSqrtf(float f2)\n" -"{\n" -" return native_sqrt(f2);\n" -"// return sqrt(f2);\n" -"}\n" -"__inline\n" -"float fastRSqrt(float f2)\n" -"{\n" -" return native_rsqrt(f2);\n" -"}\n" -"__inline\n" -"float fastLength4(float4 v)\n" -"{\n" -" return fast_length(v);\n" -"}\n" -"__inline\n" -"float4 fastNormalize4(float4 v)\n" -"{\n" -" return fast_normalize(v);\n" -"}\n" -"__inline\n" -"float sqrtf(float a)\n" -"{\n" -"// return sqrt(a);\n" -" return native_sqrt(a);\n" -"}\n" -"__inline\n" -"float4 cross3(float4 a1, float4 b1)\n" -"{\n" -" float4 a=make_float4(a1.xyz,0.f);\n" -" float4 b=make_float4(b1.xyz,0.f);\n" -" //float4 a=a1;\n" -" //float4 b=b1;\n" -" return cross(a,b);\n" -"}\n" -"__inline\n" -"float dot3F4(float4 a, float4 b)\n" -"{\n" -" float4 a1 = make_float4(a.xyz,0.f);\n" -" float4 b1 = make_float4(b.xyz,0.f);\n" -" return dot(a1, b1);\n" -"}\n" -"__inline\n" -"float length3(const float4 a)\n" -"{\n" -" return sqrtf(dot3F4(a,a));\n" -"}\n" -"__inline\n" -"float dot4(const float4 a, const float4 b)\n" -"{\n" -" return dot( a, b );\n" -"}\n" -"// for height\n" -"__inline\n" -"float dot3w1(const float4 point, const float4 eqn)\n" -"{\n" -" return dot3F4(point,eqn) + eqn.w;\n" -"}\n" -"__inline\n" -"float4 normalize3(const float4 a)\n" -"{\n" -" float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" -" return fastNormalize4( n );\n" -"// float length = sqrtf(dot3F4(a, a));\n" -"// return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 normalize4(const float4 a)\n" -"{\n" -" float length = sqrtf(dot4(a, a));\n" -" return 1.f/length * a;\n" -"}\n" -"__inline\n" -"float4 createEquation(const float4 a, const float4 b, const float4 c)\n" -"{\n" -" float4 eqn;\n" -" float4 ab = b-a;\n" -" float4 ac = c-a;\n" -" eqn = normalize3( cross3(ab, ac) );\n" -" eqn.w = -dot3F4(eqn,a);\n" -" return eqn;\n" -"}\n" -"///////////////////////////////////////\n" -"// Matrix3x3\n" -"///////////////////////////////////////\n" -"typedef struct\n" -"{\n" -" float4 m_row[3];\n" -"}Matrix3x3;\n" -"__inline\n" -"Matrix3x3 mtZero();\n" -"__inline\n" -"Matrix3x3 mtIdentity();\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m);\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b);\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b);\n" -"__inline\n" -"Matrix3x3 mtZero()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(0.f);\n" -" m.m_row[1] = (float4)(0.f);\n" -" m.m_row[2] = (float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtIdentity()\n" -"{\n" -" Matrix3x3 m;\n" -" m.m_row[0] = (float4)(1,0,0,0);\n" -" m.m_row[1] = (float4)(0,1,0,0);\n" -" m.m_row[2] = (float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtTranspose(Matrix3x3 m)\n" -"{\n" -" Matrix3x3 out;\n" -" out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" -"{\n" -" Matrix3x3 transB;\n" -" transB = mtTranspose( b );\n" -" Matrix3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul1(Matrix3x3 a, float4 b)\n" -"{\n" -" float4 ans;\n" -" ans.x = dot3F4( a.m_row[0], b );\n" -" ans.y = dot3F4( a.m_row[1], b );\n" -" ans.z = dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"float4 mtMul3(float4 a, Matrix3x3 b)\n" -"{\n" -" float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" float4 ans;\n" -" ans.x = dot3F4( a, colx );\n" -" ans.y = dot3F4( a, coly );\n" -" ans.z = dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"///////////////////////////////////////\n" -"// Quaternion\n" -"///////////////////////////////////////\n" -"typedef float4 Quaternion;\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b);\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in);\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec);\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q);\n" -"__inline\n" -"Quaternion qtMul(Quaternion a, Quaternion b)\n" -"{\n" -" Quaternion ans;\n" -" ans = cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"__inline\n" -"Quaternion qtNormalize(Quaternion in)\n" -"{\n" -" return fastNormalize4(in);\n" -"// in /= length( in );\n" -"// return in;\n" -"}\n" -"__inline\n" -"float4 qtRotate(Quaternion q, float4 vec)\n" -"{\n" -" Quaternion qInv = qtInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = qtMul(qtMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"__inline\n" -"Quaternion qtInvert(Quaternion q)\n" -"{\n" -" return (Quaternion)(-q.xyz, q.w);\n" -"}\n" -"__inline\n" -"float4 qtInvRotate(const Quaternion q, float4 vec)\n" -"{\n" -" return qtRotate( qtInvert( q ), vec );\n" -"}\n" -"#define WG_SIZE 64\n" -"typedef struct\n" -"{\n" -" float4 m_pos;\n" -" Quaternion m_quat;\n" -" float4 m_linVel;\n" -" float4 m_angVel;\n" -" u32 m_shapeIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"} Body;\n" -"typedef struct\n" -"{\n" -" Matrix3x3 m_invInertia;\n" -" Matrix3x3 m_initInvInertia;\n" -"} Shape;\n" -"typedef struct\n" -"{\n" -" float4 m_linear;\n" -" float4 m_worldPos[4];\n" -" float4 m_center; \n" -" float m_jacCoeffInv[4];\n" -" float m_b[4];\n" -" float m_appliedRambdaDt[4];\n" -" float m_fJacCoeffInv[2]; \n" -" float m_fAppliedRambdaDt[2]; \n" -" u32 m_bodyA;\n" -" u32 m_bodyB;\n" -" int m_batchIdx;\n" -" u32 m_paddings;\n" -"} Constraint4;\n" -"__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" \n" -" if( i < numContactManifolds)\n" -" {\n" -" int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;\n" -" bool isFixedA = (pa <0) || (pa == fixedBodyIndex);\n" -" int bodyIndexA = abs(pa);\n" -" if (!isFixedA)\n" -" {\n" -" AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);\n" -" }\n" -" barrier(CLK_GLOBAL_MEM_FENCE);\n" -" int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;\n" -" bool isFixedB = (pb <0) || (pb == fixedBodyIndex);\n" -" int bodyIndexB = abs(pb);\n" -" if (!isFixedB)\n" -" {\n" -" AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);\n" -" } \n" -" }\n" -"}\n" -"__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" \n" -" if( i < numSplitBodies)\n" -" {\n" -" linearVelocities[i] = make_float4(0);\n" -" angularVelocities[i] = make_float4(0);\n" -" }\n" -"}\n" -"__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" -"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numBodies)\n" -" {\n" -" if (gBodies[i].m_invMass)\n" -" {\n" -" int bodyOffset = offsetSplitBodies[i];\n" -" int count = bodyCount[i];\n" -" float factor = 1.f/((float)count);\n" -" float4 averageLinVel = make_float4(0.f);\n" -" float4 averageAngVel = make_float4(0.f);\n" -" \n" -" for (int j=0;j<count;j++)\n" -" {\n" -" averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;\n" -" averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;\n" -" }\n" -" \n" -" for (int j=0;j<count;j++)\n" -" {\n" -" deltaLinearVelocities[bodyOffset+j] = averageLinVel;\n" -" deltaAngularVelocities[bodyOffset+j] = averageAngVel;\n" -" }\n" -" \n" -" }//bodies[i].m_invMass\n" -" }//i<numBodies\n" -"}\n" -"void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" -"{\n" -" *linear = make_float4(n.xyz,0.f);\n" -" *angular0 = cross3(r0, n);\n" -" *angular1 = -cross3(r1, n);\n" -"}\n" -"float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" -"{\n" -" return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" -"}\n" -"float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" -" float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)\n" -"{\n" -" // linear0,1 are normlized\n" -" float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" -" float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" -" float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" -" float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" -" return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);\n" -"}\n" -"void btPlaneSpace1 (float4 n, float4* p, float4* q);\n" -" void btPlaneSpace1 (float4 n, float4* p, float4* q)\n" -"{\n" -" if (fabs(n.z) > 0.70710678f) {\n" -" // choose p in y-z plane\n" -" float a = n.y*n.y + n.z*n.z;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = 0;\n" -" p[0].y = -n.z*k;\n" -" p[0].z = n.y*k;\n" -" // set q = n x p\n" -" q[0].x = a*k;\n" -" q[0].y = -n.x*p[0].z;\n" -" q[0].z = n.x*p[0].y;\n" -" }\n" -" else {\n" -" // choose p in x-y plane\n" -" float a = n.x*n.x + n.y*n.y;\n" -" float k = 1.f/sqrt(a);\n" -" p[0].x = -n.y*k;\n" -" p[0].y = n.x*k;\n" -" p[0].z = 0;\n" -" // set q = n x p\n" -" q[0].x = -n.z*p[0].y;\n" -" q[0].y = n.z*p[0].x;\n" -" q[0].z = a*k;\n" -" }\n" -"}\n" -"void solveContact(__global Constraint4* cs,\n" -" float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" -" float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n" -" float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)\n" -"{\n" -" float minRambdaDt = 0;\n" -" float maxRambdaDt = FLT_MAX;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = cs->m_worldPos[ic] - posA;\n" -" float4 r1 = cs->m_worldPos[ic] - posB;\n" -" setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" -" \n" -" float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" -" *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" -" rambdaDt *= cs->m_jacCoeffInv[ic];\n" -" \n" -" {\n" -" float prevSum = cs->m_appliedRambdaDt[ic];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt );\n" -" updated = min2( updated, maxRambdaDt );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_appliedRambdaDt[ic] = updated;\n" -" }\n" -" \n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" \n" -" if (invMassA)\n" -" {\n" -" *dLinVelA += linImp0;\n" -" *dAngVelA += angImp0;\n" -" }\n" -" if (invMassB)\n" -" {\n" -" *dLinVelB += linImp1;\n" -" *dAngVelB += angImp1;\n" -" }\n" -" }\n" -"}\n" -"// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -"void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" -"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" -"__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" -"{\n" -" //float frictionCoeff = ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" \n" -" float4 dLinVelA = make_float4(0,0,0,0);\n" -" float4 dAngVelA = make_float4(0,0,0,0);\n" -" float4 dLinVelB = make_float4(0,0,0,0);\n" -" float4 dAngVelB = make_float4(0,0,0,0);\n" -" \n" -" int bodyOffsetA = offsetSplitBodies[aIdx];\n" -" int constraintOffsetA = contactConstraintOffsets[0].x;\n" -" int splitIndexA = bodyOffsetA+constraintOffsetA;\n" -" \n" -" if (invMassA)\n" -" {\n" -" dLinVelA = deltaLinearVelocities[splitIndexA];\n" -" dAngVelA = deltaAngularVelocities[splitIndexA];\n" -" }\n" -" int bodyOffsetB = offsetSplitBodies[bIdx];\n" -" int constraintOffsetB = contactConstraintOffsets[0].y;\n" -" int splitIndexB= bodyOffsetB+constraintOffsetB;\n" -" if (invMassB)\n" -" {\n" -" dLinVelB = deltaLinearVelocities[splitIndexB];\n" -" dAngVelB = deltaAngularVelocities[splitIndexB];\n" -" }\n" -" solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -" posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n" -" if (invMassA)\n" -" {\n" -" deltaLinearVelocities[splitIndexA] = dLinVelA;\n" -" deltaAngularVelocities[splitIndexA] = dAngVelA;\n" -" } \n" -" if (invMassB)\n" -" {\n" -" deltaLinearVelocities[splitIndexB] = dLinVelB;\n" -" deltaAngularVelocities[splitIndexB] = dAngVelB;\n" -" }\n" -"}\n" -"__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" -"__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" -"float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" -")\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numManifolds)\n" -" {\n" -" solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -" }\n" -"}\n" -"void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,\n" -" __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" -" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" -"{\n" -" float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;\n" -" int aIdx = ldsCs[0].m_bodyA;\n" -" int bIdx = ldsCs[0].m_bodyB;\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" \n" -" float4 dLinVelA = make_float4(0,0,0,0);\n" -" float4 dAngVelA = make_float4(0,0,0,0);\n" -" float4 dLinVelB = make_float4(0,0,0,0);\n" -" float4 dAngVelB = make_float4(0,0,0,0);\n" -" \n" -" int bodyOffsetA = offsetSplitBodies[aIdx];\n" -" int constraintOffsetA = contactConstraintOffsets[0].x;\n" -" int splitIndexA = bodyOffsetA+constraintOffsetA;\n" -" \n" -" if (invMassA)\n" -" {\n" -" dLinVelA = deltaLinearVelocities[splitIndexA];\n" -" dAngVelA = deltaAngularVelocities[splitIndexA];\n" -" }\n" -" int bodyOffsetB = offsetSplitBodies[bIdx];\n" -" int constraintOffsetB = contactConstraintOffsets[0].y;\n" -" int splitIndexB= bodyOffsetB+constraintOffsetB;\n" -" if (invMassB)\n" -" {\n" -" dLinVelB = deltaLinearVelocities[splitIndexB];\n" -" dAngVelB = deltaAngularVelocities[splitIndexB];\n" -" }\n" -" {\n" -" float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" -" float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" -" float sum = 0;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" sum +=ldsCs[0].m_appliedRambdaDt[j];\n" -" }\n" -" frictionCoeff = 0.7f;\n" -" for(int j=0; j<4; j++)\n" -" {\n" -" maxRambdaDt[j] = frictionCoeff*sum;\n" -" minRambdaDt[j] = -maxRambdaDt[j];\n" -" }\n" -" \n" -"// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" -"// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" -" \n" -" \n" -" {\n" -" \n" -" __global Constraint4* cs = ldsCs;\n" -" \n" -" if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" -" const float4 center = cs->m_center;\n" -" \n" -" float4 n = -cs->m_linear;\n" -" \n" -" float4 tangent[2];\n" -" btPlaneSpace1(n,&tangent[0],&tangent[1]);\n" -" float4 angular0, angular1, linear;\n" -" float4 r0 = center - posA;\n" -" float4 r1 = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" -" float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );\n" -" rambdaDt *= cs->m_fJacCoeffInv[i];\n" -" \n" -" {\n" -" float prevSum = cs->m_fAppliedRambdaDt[i];\n" -" float updated = prevSum;\n" -" updated += rambdaDt;\n" -" updated = max2( updated, minRambdaDt[i] );\n" -" updated = min2( updated, maxRambdaDt[i] );\n" -" rambdaDt = updated - prevSum;\n" -" cs->m_fAppliedRambdaDt[i] = updated;\n" -" }\n" -" \n" -" float4 linImp0 = invMassA*linear*rambdaDt;\n" -" float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" -" float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" -" float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" -" \n" -" dLinVelA += linImp0;\n" -" dAngVelA += angImp0;\n" -" dLinVelB += linImp1;\n" -" dAngVelB += angImp1;\n" -" }\n" -" { // angular damping for point constraint\n" -" float4 ab = normalize3( posB - posA );\n" -" float4 ac = normalize3( center - posA );\n" -" if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" -" {\n" -" float angNA = dot3F4( n, angVelA );\n" -" float angNB = dot3F4( n, angVelB );\n" -" \n" -" dAngVelA -= (angNA*0.1f)*n;\n" -" dAngVelB -= (angNB*0.1f)*n;\n" -" }\n" -" }\n" -" }\n" -" \n" -" \n" -" }\n" -" if (invMassA)\n" -" {\n" -" deltaLinearVelocities[splitIndexA] = dLinVelA;\n" -" deltaAngularVelocities[splitIndexA] = dAngVelA;\n" -" } \n" -" if (invMassB)\n" -" {\n" -" deltaLinearVelocities[splitIndexB] = dLinVelB;\n" -" deltaAngularVelocities[splitIndexB] = dAngVelB;\n" -" }\n" -" \n" -"}\n" -"__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" -" __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" -" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" -" float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" -")\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numManifolds)\n" -" {\n" -" solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" -" }\n" -"}\n" -"__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" -" __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" -"{\n" -" int i = GET_GLOBAL_IDX;\n" -" if (i<numBodies)\n" -" {\n" -" if (gBodies[i].m_invMass)\n" -" {\n" -" int bodyOffset = offsetSplitBodies[i];\n" -" int count = bodyCount[i];\n" -" if (count)\n" -" {\n" -" gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset];\n" -" gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset];\n" -" }\n" -" }\n" -" }\n" -"}\n" -"void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" -" const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" -" __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" -" Constraint4* dstC )\n" -"{\n" -" dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" -" dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" -" float dtInv = 1.f/dt;\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" -" dstC->m_linear = src->m_worldNormalOnB;\n" -" dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" -" for(int ic=0; ic<4; ic++)\n" -" {\n" -" float4 r0 = src->m_worldPosB[ic] - posA;\n" -" float4 r1 = src->m_worldPosB[ic] - posB;\n" -" if( ic >= src->m_worldNormalOnB.w )//npoints\n" -" {\n" -" dstC->m_jacCoeffInv[ic] = 0.f;\n" -" continue;\n" -" }\n" -" float relVelN;\n" -" {\n" -" float4 linear, angular0, angular1;\n" -" setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" -" dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" -" relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" -" linVelA, angVelA, linVelB, angVelB);\n" -" float e = 0.f;//src->getRestituitionCoeff();\n" -" if( relVelN*relVelN < 0.004f ) e = 0.f;\n" -" dstC->m_b[ic] = e*relVelN;\n" -" //float penetration = src->m_worldPosB[ic].w;\n" -" dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" -" dstC->m_appliedRambdaDt[ic] = 0.f;\n" -" }\n" -" }\n" -" if( src->m_worldNormalOnB.w > 0 )//npoints\n" -" { // prepare friction\n" -" float4 center = make_float4(0.f);\n" -" for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" -" center += src->m_worldPosB[i];\n" -" center /= (float)src->m_worldNormalOnB.w;\n" -" float4 tangent[2];\n" -" btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" -" \n" -" float4 r[2];\n" -" r[0] = center - posA;\n" -" r[1] = center - posB;\n" -" for(int i=0; i<2; i++)\n" -" {\n" -" float4 linear, angular0, angular1;\n" -" setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" -" dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" -" invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" -" dstC->m_fAppliedRambdaDt[i] = 0.f;\n" -" }\n" -" dstC->m_center = center;\n" -" }\n" -" for(int i=0; i<4; i++)\n" -" {\n" -" if( i<src->m_worldNormalOnB.w )\n" -" {\n" -" dstC->m_worldPos[i] = src->m_worldPosB[i];\n" -" }\n" -" else\n" -" {\n" -" dstC->m_worldPos[i] = make_float4(0.f);\n" -" }\n" -" }\n" -"}\n" -"__kernel\n" -"__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" -"void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" -"__global const unsigned int* bodyCount,\n" -"int nContacts,\n" -"float dt,\n" -"float positionDrift,\n" -"float positionConstraintCoeff\n" -")\n" -"{\n" -" int gIdx = GET_GLOBAL_IDX;\n" -" \n" -" if( gIdx < nContacts )\n" -" {\n" -" int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" -" int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" -" float4 posA = gBodies[aIdx].m_pos;\n" -" float4 linVelA = gBodies[aIdx].m_linVel;\n" -" float4 angVelA = gBodies[aIdx].m_angVel;\n" -" float invMassA = gBodies[aIdx].m_invMass;\n" -" Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" -" float4 posB = gBodies[bIdx].m_pos;\n" -" float4 linVelB = gBodies[bIdx].m_linVel;\n" -" float4 angVelB = gBodies[bIdx].m_angVel;\n" -" float invMassB = gBodies[bIdx].m_invMass;\n" -" Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" -" Constraint4 cs;\n" -" float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;\n" -" float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;\n" -" setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" -" &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" -" &cs );\n" -" \n" -" cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" -" gConstraintOut[gIdx] = cs;\n" -" }\n" -"}\n" -; +static const char* solverUtilsCL = + "/*\n" + "Copyright (c) 2013 Advanced Micro Devices, Inc. \n" + "This software is provided 'as-is', without any express or implied warranty.\n" + "In no event will the authors be held liable for any damages arising from the use of this software.\n" + "Permission is granted to anyone to use this software for any purpose, \n" + "including commercial applications, and to alter it and redistribute it freely, \n" + "subject to the following restrictions:\n" + "1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.\n" + "2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.\n" + "3. This notice may not be removed or altered from any source distribution.\n" + "*/\n" + "//Originally written by Erwin Coumans\n" + "#ifndef B3_CONTACT4DATA_H\n" + "#define B3_CONTACT4DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "typedef struct b3Contact4Data b3Contact4Data_t;\n" + "struct b3Contact4Data\n" + "{\n" + " b3Float4 m_worldPosB[4];\n" + "// b3Float4 m_localPosA[4];\n" + "// b3Float4 m_localPosB[4];\n" + " b3Float4 m_worldNormalOnB; // w: m_nPoints\n" + " unsigned short m_restituitionCoeffCmp;\n" + " unsigned short m_frictionCoeffCmp;\n" + " int m_batchIdx;\n" + " int m_bodyAPtrAndSignBit;//x:m_bodyAPtr, y:m_bodyBPtr\n" + " int m_bodyBPtrAndSignBit;\n" + " int m_childIndexA;\n" + " int m_childIndexB;\n" + " int m_unused1;\n" + " int m_unused2;\n" + "};\n" + "inline int b3Contact4Data_getNumPoints(const struct b3Contact4Data* contact)\n" + "{\n" + " return (int)contact->m_worldNormalOnB.w;\n" + "};\n" + "inline void b3Contact4Data_setNumPoints(struct b3Contact4Data* contact, int numPoints)\n" + "{\n" + " contact->m_worldNormalOnB.w = (float)numPoints;\n" + "};\n" + "#endif //B3_CONTACT4DATA_H\n" + "#pragma OPENCL EXTENSION cl_amd_printf : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_base_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_local_int32_extended_atomics : enable\n" + "#pragma OPENCL EXTENSION cl_khr_global_int32_extended_atomics : enable\n" + "#ifdef cl_ext_atomic_counters_32\n" + "#pragma OPENCL EXTENSION cl_ext_atomic_counters_32 : enable\n" + "#else\n" + "#define counter32_t volatile global int*\n" + "#endif\n" + "typedef unsigned int u32;\n" + "typedef unsigned short u16;\n" + "typedef unsigned char u8;\n" + "#define GET_GROUP_IDX get_group_id(0)\n" + "#define GET_LOCAL_IDX get_local_id(0)\n" + "#define GET_GLOBAL_IDX get_global_id(0)\n" + "#define GET_GROUP_SIZE get_local_size(0)\n" + "#define GET_NUM_GROUPS get_num_groups(0)\n" + "#define GROUP_LDS_BARRIER barrier(CLK_LOCAL_MEM_FENCE)\n" + "#define GROUP_MEM_FENCE mem_fence(CLK_LOCAL_MEM_FENCE)\n" + "#define AtomInc(x) atom_inc(&(x))\n" + "#define AtomInc1(x, out) out = atom_inc(&(x))\n" + "#define AppendInc(x, out) out = atomic_inc(x)\n" + "#define AtomAdd(x, value) atom_add(&(x), value)\n" + "#define AtomCmpxhg(x, cmp, value) atom_cmpxchg( &(x), cmp, value )\n" + "#define AtomXhg(x, value) atom_xchg ( &(x), value )\n" + "#define SELECT_UINT4( b, a, condition ) select( b,a,condition )\n" + "#define make_float4 (float4)\n" + "#define make_float2 (float2)\n" + "#define make_uint4 (uint4)\n" + "#define make_int4 (int4)\n" + "#define make_uint2 (uint2)\n" + "#define make_int2 (int2)\n" + "#define max2 max\n" + "#define min2 min\n" + "///////////////////////////////////////\n" + "// Vector\n" + "///////////////////////////////////////\n" + "__inline\n" + "float fastDiv(float numerator, float denominator)\n" + "{\n" + " return native_divide(numerator, denominator); \n" + "// return numerator/denominator; \n" + "}\n" + "__inline\n" + "float4 fastDiv4(float4 numerator, float4 denominator)\n" + "{\n" + " return native_divide(numerator, denominator); \n" + "}\n" + "__inline\n" + "float fastSqrtf(float f2)\n" + "{\n" + " return native_sqrt(f2);\n" + "// return sqrt(f2);\n" + "}\n" + "__inline\n" + "float fastRSqrt(float f2)\n" + "{\n" + " return native_rsqrt(f2);\n" + "}\n" + "__inline\n" + "float fastLength4(float4 v)\n" + "{\n" + " return fast_length(v);\n" + "}\n" + "__inline\n" + "float4 fastNormalize4(float4 v)\n" + "{\n" + " return fast_normalize(v);\n" + "}\n" + "__inline\n" + "float sqrtf(float a)\n" + "{\n" + "// return sqrt(a);\n" + " return native_sqrt(a);\n" + "}\n" + "__inline\n" + "float4 cross3(float4 a1, float4 b1)\n" + "{\n" + " float4 a=make_float4(a1.xyz,0.f);\n" + " float4 b=make_float4(b1.xyz,0.f);\n" + " //float4 a=a1;\n" + " //float4 b=b1;\n" + " return cross(a,b);\n" + "}\n" + "__inline\n" + "float dot3F4(float4 a, float4 b)\n" + "{\n" + " float4 a1 = make_float4(a.xyz,0.f);\n" + " float4 b1 = make_float4(b.xyz,0.f);\n" + " return dot(a1, b1);\n" + "}\n" + "__inline\n" + "float length3(const float4 a)\n" + "{\n" + " return sqrtf(dot3F4(a,a));\n" + "}\n" + "__inline\n" + "float dot4(const float4 a, const float4 b)\n" + "{\n" + " return dot( a, b );\n" + "}\n" + "// for height\n" + "__inline\n" + "float dot3w1(const float4 point, const float4 eqn)\n" + "{\n" + " return dot3F4(point,eqn) + eqn.w;\n" + "}\n" + "__inline\n" + "float4 normalize3(const float4 a)\n" + "{\n" + " float4 n = make_float4(a.x, a.y, a.z, 0.f);\n" + " return fastNormalize4( n );\n" + "// float length = sqrtf(dot3F4(a, a));\n" + "// return 1.f/length * a;\n" + "}\n" + "__inline\n" + "float4 normalize4(const float4 a)\n" + "{\n" + " float length = sqrtf(dot4(a, a));\n" + " return 1.f/length * a;\n" + "}\n" + "__inline\n" + "float4 createEquation(const float4 a, const float4 b, const float4 c)\n" + "{\n" + " float4 eqn;\n" + " float4 ab = b-a;\n" + " float4 ac = c-a;\n" + " eqn = normalize3( cross3(ab, ac) );\n" + " eqn.w = -dot3F4(eqn,a);\n" + " return eqn;\n" + "}\n" + "///////////////////////////////////////\n" + "// Matrix3x3\n" + "///////////////////////////////////////\n" + "typedef struct\n" + "{\n" + " float4 m_row[3];\n" + "}Matrix3x3;\n" + "__inline\n" + "Matrix3x3 mtZero();\n" + "__inline\n" + "Matrix3x3 mtIdentity();\n" + "__inline\n" + "Matrix3x3 mtTranspose(Matrix3x3 m);\n" + "__inline\n" + "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b);\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b);\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b);\n" + "__inline\n" + "Matrix3x3 mtZero()\n" + "{\n" + " Matrix3x3 m;\n" + " m.m_row[0] = (float4)(0.f);\n" + " m.m_row[1] = (float4)(0.f);\n" + " m.m_row[2] = (float4)(0.f);\n" + " return m;\n" + "}\n" + "__inline\n" + "Matrix3x3 mtIdentity()\n" + "{\n" + " Matrix3x3 m;\n" + " m.m_row[0] = (float4)(1,0,0,0);\n" + " m.m_row[1] = (float4)(0,1,0,0);\n" + " m.m_row[2] = (float4)(0,0,1,0);\n" + " return m;\n" + "}\n" + "__inline\n" + "Matrix3x3 mtTranspose(Matrix3x3 m)\n" + "{\n" + " Matrix3x3 out;\n" + " out.m_row[0] = (float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" + " out.m_row[1] = (float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" + " out.m_row[2] = (float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" + " return out;\n" + "}\n" + "__inline\n" + "Matrix3x3 mtMul(Matrix3x3 a, Matrix3x3 b)\n" + "{\n" + " Matrix3x3 transB;\n" + " transB = mtTranspose( b );\n" + " Matrix3x3 ans;\n" + " // why this doesn't run when 0ing in the for{}\n" + " a.m_row[0].w = 0.f;\n" + " a.m_row[1].w = 0.f;\n" + " a.m_row[2].w = 0.f;\n" + " for(int i=0; i<3; i++)\n" + " {\n" + "// a.m_row[i].w = 0.f;\n" + " ans.m_row[i].x = dot3F4(a.m_row[i],transB.m_row[0]);\n" + " ans.m_row[i].y = dot3F4(a.m_row[i],transB.m_row[1]);\n" + " ans.m_row[i].z = dot3F4(a.m_row[i],transB.m_row[2]);\n" + " ans.m_row[i].w = 0.f;\n" + " }\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul1(Matrix3x3 a, float4 b)\n" + "{\n" + " float4 ans;\n" + " ans.x = dot3F4( a.m_row[0], b );\n" + " ans.y = dot3F4( a.m_row[1], b );\n" + " ans.z = dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "float4 mtMul3(float4 a, Matrix3x3 b)\n" + "{\n" + " float4 colx = make_float4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " float4 coly = make_float4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " float4 colz = make_float4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " float4 ans;\n" + " ans.x = dot3F4( a, colx );\n" + " ans.y = dot3F4( a, coly );\n" + " ans.z = dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "///////////////////////////////////////\n" + "// Quaternion\n" + "///////////////////////////////////////\n" + "typedef float4 Quaternion;\n" + "__inline\n" + "Quaternion qtMul(Quaternion a, Quaternion b);\n" + "__inline\n" + "Quaternion qtNormalize(Quaternion in);\n" + "__inline\n" + "float4 qtRotate(Quaternion q, float4 vec);\n" + "__inline\n" + "Quaternion qtInvert(Quaternion q);\n" + "__inline\n" + "Quaternion qtMul(Quaternion a, Quaternion b)\n" + "{\n" + " Quaternion ans;\n" + " ans = cross3( a, b );\n" + " ans += a.w*b+b.w*a;\n" + "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" + " ans.w = a.w*b.w - dot3F4(a, b);\n" + " return ans;\n" + "}\n" + "__inline\n" + "Quaternion qtNormalize(Quaternion in)\n" + "{\n" + " return fastNormalize4(in);\n" + "// in /= length( in );\n" + "// return in;\n" + "}\n" + "__inline\n" + "float4 qtRotate(Quaternion q, float4 vec)\n" + "{\n" + " Quaternion qInv = qtInvert( q );\n" + " float4 vcpy = vec;\n" + " vcpy.w = 0.f;\n" + " float4 out = qtMul(qtMul(q,vcpy),qInv);\n" + " return out;\n" + "}\n" + "__inline\n" + "Quaternion qtInvert(Quaternion q)\n" + "{\n" + " return (Quaternion)(-q.xyz, q.w);\n" + "}\n" + "__inline\n" + "float4 qtInvRotate(const Quaternion q, float4 vec)\n" + "{\n" + " return qtRotate( qtInvert( q ), vec );\n" + "}\n" + "#define WG_SIZE 64\n" + "typedef struct\n" + "{\n" + " float4 m_pos;\n" + " Quaternion m_quat;\n" + " float4 m_linVel;\n" + " float4 m_angVel;\n" + " u32 m_shapeIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "} Body;\n" + "typedef struct\n" + "{\n" + " Matrix3x3 m_invInertia;\n" + " Matrix3x3 m_initInvInertia;\n" + "} Shape;\n" + "typedef struct\n" + "{\n" + " float4 m_linear;\n" + " float4 m_worldPos[4];\n" + " float4 m_center; \n" + " float m_jacCoeffInv[4];\n" + " float m_b[4];\n" + " float m_appliedRambdaDt[4];\n" + " float m_fJacCoeffInv[2]; \n" + " float m_fAppliedRambdaDt[2]; \n" + " u32 m_bodyA;\n" + " u32 m_bodyB;\n" + " int m_batchIdx;\n" + " u32 m_paddings;\n" + "} Constraint4;\n" + "__kernel void CountBodiesKernel(__global struct b3Contact4Data* manifoldPtr, __global unsigned int* bodyCount, __global int2* contactConstraintOffsets, int numContactManifolds, int fixedBodyIndex)\n" + "{\n" + " int i = GET_GLOBAL_IDX;\n" + " \n" + " if( i < numContactManifolds)\n" + " {\n" + " int pa = manifoldPtr[i].m_bodyAPtrAndSignBit;\n" + " bool isFixedA = (pa <0) || (pa == fixedBodyIndex);\n" + " int bodyIndexA = abs(pa);\n" + " if (!isFixedA)\n" + " {\n" + " AtomInc1(bodyCount[bodyIndexA],contactConstraintOffsets[i].x);\n" + " }\n" + " barrier(CLK_GLOBAL_MEM_FENCE);\n" + " int pb = manifoldPtr[i].m_bodyBPtrAndSignBit;\n" + " bool isFixedB = (pb <0) || (pb == fixedBodyIndex);\n" + " int bodyIndexB = abs(pb);\n" + " if (!isFixedB)\n" + " {\n" + " AtomInc1(bodyCount[bodyIndexB],contactConstraintOffsets[i].y);\n" + " } \n" + " }\n" + "}\n" + "__kernel void ClearVelocitiesKernel(__global float4* linearVelocities,__global float4* angularVelocities, int numSplitBodies)\n" + "{\n" + " int i = GET_GLOBAL_IDX;\n" + " \n" + " if( i < numSplitBodies)\n" + " {\n" + " linearVelocities[i] = make_float4(0);\n" + " angularVelocities[i] = make_float4(0);\n" + " }\n" + "}\n" + "__kernel void AverageVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" + "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" + "{\n" + " int i = GET_GLOBAL_IDX;\n" + " if (i<numBodies)\n" + " {\n" + " if (gBodies[i].m_invMass)\n" + " {\n" + " int bodyOffset = offsetSplitBodies[i];\n" + " int count = bodyCount[i];\n" + " float factor = 1.f/((float)count);\n" + " float4 averageLinVel = make_float4(0.f);\n" + " float4 averageAngVel = make_float4(0.f);\n" + " \n" + " for (int j=0;j<count;j++)\n" + " {\n" + " averageLinVel += deltaLinearVelocities[bodyOffset+j]*factor;\n" + " averageAngVel += deltaAngularVelocities[bodyOffset+j]*factor;\n" + " }\n" + " \n" + " for (int j=0;j<count;j++)\n" + " {\n" + " deltaLinearVelocities[bodyOffset+j] = averageLinVel;\n" + " deltaAngularVelocities[bodyOffset+j] = averageAngVel;\n" + " }\n" + " \n" + " }//bodies[i].m_invMass\n" + " }//i<numBodies\n" + "}\n" + "void setLinearAndAngular( float4 n, float4 r0, float4 r1, float4* linear, float4* angular0, float4* angular1)\n" + "{\n" + " *linear = make_float4(n.xyz,0.f);\n" + " *angular0 = cross3(r0, n);\n" + " *angular1 = -cross3(r1, n);\n" + "}\n" + "float calcRelVel( float4 l0, float4 l1, float4 a0, float4 a1, float4 linVel0, float4 angVel0, float4 linVel1, float4 angVel1 )\n" + "{\n" + " return dot3F4(l0, linVel0) + dot3F4(a0, angVel0) + dot3F4(l1, linVel1) + dot3F4(a1, angVel1);\n" + "}\n" + "float calcJacCoeff(const float4 linear0, const float4 linear1, const float4 angular0, const float4 angular1,\n" + " float invMass0, const Matrix3x3* invInertia0, float invMass1, const Matrix3x3* invInertia1, float countA, float countB)\n" + "{\n" + " // linear0,1 are normlized\n" + " float jmj0 = invMass0;//dot3F4(linear0, linear0)*invMass0;\n" + " float jmj1 = dot3F4(mtMul3(angular0,*invInertia0), angular0);\n" + " float jmj2 = invMass1;//dot3F4(linear1, linear1)*invMass1;\n" + " float jmj3 = dot3F4(mtMul3(angular1,*invInertia1), angular1);\n" + " return -1.f/((jmj0+jmj1)*countA+(jmj2+jmj3)*countB);\n" + "}\n" + "void btPlaneSpace1 (float4 n, float4* p, float4* q);\n" + " void btPlaneSpace1 (float4 n, float4* p, float4* q)\n" + "{\n" + " if (fabs(n.z) > 0.70710678f) {\n" + " // choose p in y-z plane\n" + " float a = n.y*n.y + n.z*n.z;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = 0;\n" + " p[0].y = -n.z*k;\n" + " p[0].z = n.y*k;\n" + " // set q = n x p\n" + " q[0].x = a*k;\n" + " q[0].y = -n.x*p[0].z;\n" + " q[0].z = n.x*p[0].y;\n" + " }\n" + " else {\n" + " // choose p in x-y plane\n" + " float a = n.x*n.x + n.y*n.y;\n" + " float k = 1.f/sqrt(a);\n" + " p[0].x = -n.y*k;\n" + " p[0].y = n.x*k;\n" + " p[0].z = 0;\n" + " // set q = n x p\n" + " q[0].x = -n.z*p[0].y;\n" + " q[0].y = n.z*p[0].x;\n" + " q[0].z = a*k;\n" + " }\n" + "}\n" + "void solveContact(__global Constraint4* cs,\n" + " float4 posA, float4* linVelA, float4* angVelA, float invMassA, Matrix3x3 invInertiaA,\n" + " float4 posB, float4* linVelB, float4* angVelB, float invMassB, Matrix3x3 invInertiaB,\n" + " float4* dLinVelA, float4* dAngVelA, float4* dLinVelB, float4* dAngVelB)\n" + "{\n" + " float minRambdaDt = 0;\n" + " float maxRambdaDt = FLT_MAX;\n" + " for(int ic=0; ic<4; ic++)\n" + " {\n" + " if( cs->m_jacCoeffInv[ic] == 0.f ) continue;\n" + " float4 angular0, angular1, linear;\n" + " float4 r0 = cs->m_worldPos[ic] - posA;\n" + " float4 r1 = cs->m_worldPos[ic] - posB;\n" + " setLinearAndAngular( cs->m_linear, r0, r1, &linear, &angular0, &angular1 );\n" + " \n" + " float rambdaDt = calcRelVel( cs->m_linear, -cs->m_linear, angular0, angular1, \n" + " *linVelA+*dLinVelA, *angVelA+*dAngVelA, *linVelB+*dLinVelB, *angVelB+*dAngVelB ) + cs->m_b[ic];\n" + " rambdaDt *= cs->m_jacCoeffInv[ic];\n" + " \n" + " {\n" + " float prevSum = cs->m_appliedRambdaDt[ic];\n" + " float updated = prevSum;\n" + " updated += rambdaDt;\n" + " updated = max2( updated, minRambdaDt );\n" + " updated = min2( updated, maxRambdaDt );\n" + " rambdaDt = updated - prevSum;\n" + " cs->m_appliedRambdaDt[ic] = updated;\n" + " }\n" + " \n" + " float4 linImp0 = invMassA*linear*rambdaDt;\n" + " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" + " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" + " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" + " \n" + " if (invMassA)\n" + " {\n" + " *dLinVelA += linImp0;\n" + " *dAngVelA += angImp0;\n" + " }\n" + " if (invMassB)\n" + " {\n" + " *dLinVelB += linImp1;\n" + " *dAngVelB += angImp1;\n" + " }\n" + " }\n" + "}\n" + "// solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,contactConstraintOffsets,offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" + "void solveContactConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs, \n" + "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" + "__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" + "{\n" + " //float frictionCoeff = ldsCs[0].m_linear.w;\n" + " int aIdx = ldsCs[0].m_bodyA;\n" + " int bIdx = ldsCs[0].m_bodyB;\n" + " float4 posA = gBodies[aIdx].m_pos;\n" + " float4 linVelA = gBodies[aIdx].m_linVel;\n" + " float4 angVelA = gBodies[aIdx].m_angVel;\n" + " float invMassA = gBodies[aIdx].m_invMass;\n" + " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" + " float4 posB = gBodies[bIdx].m_pos;\n" + " float4 linVelB = gBodies[bIdx].m_linVel;\n" + " float4 angVelB = gBodies[bIdx].m_angVel;\n" + " float invMassB = gBodies[bIdx].m_invMass;\n" + " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" + " \n" + " float4 dLinVelA = make_float4(0,0,0,0);\n" + " float4 dAngVelA = make_float4(0,0,0,0);\n" + " float4 dLinVelB = make_float4(0,0,0,0);\n" + " float4 dAngVelB = make_float4(0,0,0,0);\n" + " \n" + " int bodyOffsetA = offsetSplitBodies[aIdx];\n" + " int constraintOffsetA = contactConstraintOffsets[0].x;\n" + " int splitIndexA = bodyOffsetA+constraintOffsetA;\n" + " \n" + " if (invMassA)\n" + " {\n" + " dLinVelA = deltaLinearVelocities[splitIndexA];\n" + " dAngVelA = deltaAngularVelocities[splitIndexA];\n" + " }\n" + " int bodyOffsetB = offsetSplitBodies[bIdx];\n" + " int constraintOffsetB = contactConstraintOffsets[0].y;\n" + " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" + " if (invMassB)\n" + " {\n" + " dLinVelB = deltaLinearVelocities[splitIndexB];\n" + " dAngVelB = deltaAngularVelocities[splitIndexB];\n" + " }\n" + " solveContact( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" + " posB, &linVelB, &angVelB, invMassB, invInertiaB ,&dLinVelA, &dAngVelA, &dLinVelB, &dAngVelB);\n" + " if (invMassA)\n" + " {\n" + " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" + " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" + " } \n" + " if (invMassB)\n" + " {\n" + " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" + " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" + " }\n" + "}\n" + "__kernel void SolveContactJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" + "__global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,__global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" + "float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" + ")\n" + "{\n" + " int i = GET_GLOBAL_IDX;\n" + " if (i<numManifolds)\n" + " {\n" + " solveContactConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" + " }\n" + "}\n" + "void solveFrictionConstraint(__global Body* gBodies, __global Shape* gShapes, __global Constraint4* ldsCs,\n" + " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" + " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities)\n" + "{\n" + " float frictionCoeff = 0.7f;//ldsCs[0].m_linear.w;\n" + " int aIdx = ldsCs[0].m_bodyA;\n" + " int bIdx = ldsCs[0].m_bodyB;\n" + " float4 posA = gBodies[aIdx].m_pos;\n" + " float4 linVelA = gBodies[aIdx].m_linVel;\n" + " float4 angVelA = gBodies[aIdx].m_angVel;\n" + " float invMassA = gBodies[aIdx].m_invMass;\n" + " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" + " float4 posB = gBodies[bIdx].m_pos;\n" + " float4 linVelB = gBodies[bIdx].m_linVel;\n" + " float4 angVelB = gBodies[bIdx].m_angVel;\n" + " float invMassB = gBodies[bIdx].m_invMass;\n" + " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" + " \n" + " float4 dLinVelA = make_float4(0,0,0,0);\n" + " float4 dAngVelA = make_float4(0,0,0,0);\n" + " float4 dLinVelB = make_float4(0,0,0,0);\n" + " float4 dAngVelB = make_float4(0,0,0,0);\n" + " \n" + " int bodyOffsetA = offsetSplitBodies[aIdx];\n" + " int constraintOffsetA = contactConstraintOffsets[0].x;\n" + " int splitIndexA = bodyOffsetA+constraintOffsetA;\n" + " \n" + " if (invMassA)\n" + " {\n" + " dLinVelA = deltaLinearVelocities[splitIndexA];\n" + " dAngVelA = deltaAngularVelocities[splitIndexA];\n" + " }\n" + " int bodyOffsetB = offsetSplitBodies[bIdx];\n" + " int constraintOffsetB = contactConstraintOffsets[0].y;\n" + " int splitIndexB= bodyOffsetB+constraintOffsetB;\n" + " if (invMassB)\n" + " {\n" + " dLinVelB = deltaLinearVelocities[splitIndexB];\n" + " dAngVelB = deltaAngularVelocities[splitIndexB];\n" + " }\n" + " {\n" + " float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};\n" + " float minRambdaDt[4] = {0.f,0.f,0.f,0.f};\n" + " float sum = 0;\n" + " for(int j=0; j<4; j++)\n" + " {\n" + " sum +=ldsCs[0].m_appliedRambdaDt[j];\n" + " }\n" + " frictionCoeff = 0.7f;\n" + " for(int j=0; j<4; j++)\n" + " {\n" + " maxRambdaDt[j] = frictionCoeff*sum;\n" + " minRambdaDt[j] = -maxRambdaDt[j];\n" + " }\n" + " \n" + "// solveFriction( ldsCs, posA, &linVelA, &angVelA, invMassA, invInertiaA,\n" + "// posB, &linVelB, &angVelB, invMassB, invInertiaB, maxRambdaDt, minRambdaDt );\n" + " \n" + " \n" + " {\n" + " \n" + " __global Constraint4* cs = ldsCs;\n" + " \n" + " if( cs->m_fJacCoeffInv[0] == 0 && cs->m_fJacCoeffInv[0] == 0 ) return;\n" + " const float4 center = cs->m_center;\n" + " \n" + " float4 n = -cs->m_linear;\n" + " \n" + " float4 tangent[2];\n" + " btPlaneSpace1(n,&tangent[0],&tangent[1]);\n" + " float4 angular0, angular1, linear;\n" + " float4 r0 = center - posA;\n" + " float4 r1 = center - posB;\n" + " for(int i=0; i<2; i++)\n" + " {\n" + " setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );\n" + " float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,\n" + " linVelA+dLinVelA, angVelA+dAngVelA, linVelB+dLinVelB, angVelB+dAngVelB );\n" + " rambdaDt *= cs->m_fJacCoeffInv[i];\n" + " \n" + " {\n" + " float prevSum = cs->m_fAppliedRambdaDt[i];\n" + " float updated = prevSum;\n" + " updated += rambdaDt;\n" + " updated = max2( updated, minRambdaDt[i] );\n" + " updated = min2( updated, maxRambdaDt[i] );\n" + " rambdaDt = updated - prevSum;\n" + " cs->m_fAppliedRambdaDt[i] = updated;\n" + " }\n" + " \n" + " float4 linImp0 = invMassA*linear*rambdaDt;\n" + " float4 linImp1 = invMassB*(-linear)*rambdaDt;\n" + " float4 angImp0 = mtMul1(invInertiaA, angular0)*rambdaDt;\n" + " float4 angImp1 = mtMul1(invInertiaB, angular1)*rambdaDt;\n" + " \n" + " dLinVelA += linImp0;\n" + " dAngVelA += angImp0;\n" + " dLinVelB += linImp1;\n" + " dAngVelB += angImp1;\n" + " }\n" + " { // angular damping for point constraint\n" + " float4 ab = normalize3( posB - posA );\n" + " float4 ac = normalize3( center - posA );\n" + " if( dot3F4( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))\n" + " {\n" + " float angNA = dot3F4( n, angVelA );\n" + " float angNB = dot3F4( n, angVelB );\n" + " \n" + " dAngVelA -= (angNA*0.1f)*n;\n" + " dAngVelB -= (angNB*0.1f)*n;\n" + " }\n" + " }\n" + " }\n" + " \n" + " \n" + " }\n" + " if (invMassA)\n" + " {\n" + " deltaLinearVelocities[splitIndexA] = dLinVelA;\n" + " deltaAngularVelocities[splitIndexA] = dAngVelA;\n" + " } \n" + " if (invMassB)\n" + " {\n" + " deltaLinearVelocities[splitIndexB] = dLinVelB;\n" + " deltaAngularVelocities[splitIndexB] = dAngVelB;\n" + " }\n" + " \n" + "}\n" + "__kernel void SolveFrictionJacobiKernel(__global Constraint4* gConstraints, __global Body* gBodies, __global Shape* gShapes ,\n" + " __global int2* contactConstraintOffsets,__global unsigned int* offsetSplitBodies,\n" + " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities,\n" + " float deltaTime, float positionDrift, float positionConstraintCoeff, int fixedBodyIndex, int numManifolds\n" + ")\n" + "{\n" + " int i = GET_GLOBAL_IDX;\n" + " if (i<numManifolds)\n" + " {\n" + " solveFrictionConstraint( gBodies, gShapes, &gConstraints[i] ,&contactConstraintOffsets[i],offsetSplitBodies, deltaLinearVelocities, deltaAngularVelocities);\n" + " }\n" + "}\n" + "__kernel void UpdateBodyVelocitiesKernel(__global Body* gBodies,__global int* offsetSplitBodies,__global const unsigned int* bodyCount,\n" + " __global float4* deltaLinearVelocities, __global float4* deltaAngularVelocities, int numBodies)\n" + "{\n" + " int i = GET_GLOBAL_IDX;\n" + " if (i<numBodies)\n" + " {\n" + " if (gBodies[i].m_invMass)\n" + " {\n" + " int bodyOffset = offsetSplitBodies[i];\n" + " int count = bodyCount[i];\n" + " if (count)\n" + " {\n" + " gBodies[i].m_linVel += deltaLinearVelocities[bodyOffset];\n" + " gBodies[i].m_angVel += deltaAngularVelocities[bodyOffset];\n" + " }\n" + " }\n" + " }\n" + "}\n" + "void setConstraint4( const float4 posA, const float4 linVelA, const float4 angVelA, float invMassA, const Matrix3x3 invInertiaA,\n" + " const float4 posB, const float4 linVelB, const float4 angVelB, float invMassB, const Matrix3x3 invInertiaB, \n" + " __global struct b3Contact4Data* src, float dt, float positionDrift, float positionConstraintCoeff,float countA, float countB,\n" + " Constraint4* dstC )\n" + "{\n" + " dstC->m_bodyA = abs(src->m_bodyAPtrAndSignBit);\n" + " dstC->m_bodyB = abs(src->m_bodyBPtrAndSignBit);\n" + " float dtInv = 1.f/dt;\n" + " for(int ic=0; ic<4; ic++)\n" + " {\n" + " dstC->m_appliedRambdaDt[ic] = 0.f;\n" + " }\n" + " dstC->m_fJacCoeffInv[0] = dstC->m_fJacCoeffInv[1] = 0.f;\n" + " dstC->m_linear = src->m_worldNormalOnB;\n" + " dstC->m_linear.w = 0.7f ;//src->getFrictionCoeff() );\n" + " for(int ic=0; ic<4; ic++)\n" + " {\n" + " float4 r0 = src->m_worldPosB[ic] - posA;\n" + " float4 r1 = src->m_worldPosB[ic] - posB;\n" + " if( ic >= src->m_worldNormalOnB.w )//npoints\n" + " {\n" + " dstC->m_jacCoeffInv[ic] = 0.f;\n" + " continue;\n" + " }\n" + " float relVelN;\n" + " {\n" + " float4 linear, angular0, angular1;\n" + " setLinearAndAngular(src->m_worldNormalOnB, r0, r1, &linear, &angular0, &angular1);\n" + " dstC->m_jacCoeffInv[ic] = calcJacCoeff(linear, -linear, angular0, angular1,\n" + " invMassA, &invInertiaA, invMassB, &invInertiaB , countA, countB);\n" + " relVelN = calcRelVel(linear, -linear, angular0, angular1,\n" + " linVelA, angVelA, linVelB, angVelB);\n" + " float e = 0.f;//src->getRestituitionCoeff();\n" + " if( relVelN*relVelN < 0.004f ) e = 0.f;\n" + " dstC->m_b[ic] = e*relVelN;\n" + " //float penetration = src->m_worldPosB[ic].w;\n" + " dstC->m_b[ic] += (src->m_worldPosB[ic].w + positionDrift)*positionConstraintCoeff*dtInv;\n" + " dstC->m_appliedRambdaDt[ic] = 0.f;\n" + " }\n" + " }\n" + " if( src->m_worldNormalOnB.w > 0 )//npoints\n" + " { // prepare friction\n" + " float4 center = make_float4(0.f);\n" + " for(int i=0; i<src->m_worldNormalOnB.w; i++) \n" + " center += src->m_worldPosB[i];\n" + " center /= (float)src->m_worldNormalOnB.w;\n" + " float4 tangent[2];\n" + " btPlaneSpace1(-src->m_worldNormalOnB,&tangent[0],&tangent[1]);\n" + " \n" + " float4 r[2];\n" + " r[0] = center - posA;\n" + " r[1] = center - posB;\n" + " for(int i=0; i<2; i++)\n" + " {\n" + " float4 linear, angular0, angular1;\n" + " setLinearAndAngular(tangent[i], r[0], r[1], &linear, &angular0, &angular1);\n" + " dstC->m_fJacCoeffInv[i] = calcJacCoeff(linear, -linear, angular0, angular1,\n" + " invMassA, &invInertiaA, invMassB, &invInertiaB ,countA, countB);\n" + " dstC->m_fAppliedRambdaDt[i] = 0.f;\n" + " }\n" + " dstC->m_center = center;\n" + " }\n" + " for(int i=0; i<4; i++)\n" + " {\n" + " if( i<src->m_worldNormalOnB.w )\n" + " {\n" + " dstC->m_worldPos[i] = src->m_worldPosB[i];\n" + " }\n" + " else\n" + " {\n" + " dstC->m_worldPos[i] = make_float4(0.f);\n" + " }\n" + " }\n" + "}\n" + "__kernel\n" + "__attribute__((reqd_work_group_size(WG_SIZE,1,1)))\n" + "void ContactToConstraintSplitKernel(__global const struct b3Contact4Data* gContact, __global const Body* gBodies, __global const Shape* gShapes, __global Constraint4* gConstraintOut, \n" + "__global const unsigned int* bodyCount,\n" + "int nContacts,\n" + "float dt,\n" + "float positionDrift,\n" + "float positionConstraintCoeff\n" + ")\n" + "{\n" + " int gIdx = GET_GLOBAL_IDX;\n" + " \n" + " if( gIdx < nContacts )\n" + " {\n" + " int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);\n" + " int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);\n" + " float4 posA = gBodies[aIdx].m_pos;\n" + " float4 linVelA = gBodies[aIdx].m_linVel;\n" + " float4 angVelA = gBodies[aIdx].m_angVel;\n" + " float invMassA = gBodies[aIdx].m_invMass;\n" + " Matrix3x3 invInertiaA = gShapes[aIdx].m_invInertia;\n" + " float4 posB = gBodies[bIdx].m_pos;\n" + " float4 linVelB = gBodies[bIdx].m_linVel;\n" + " float4 angVelB = gBodies[bIdx].m_angVel;\n" + " float invMassB = gBodies[bIdx].m_invMass;\n" + " Matrix3x3 invInertiaB = gShapes[bIdx].m_invInertia;\n" + " Constraint4 cs;\n" + " float countA = invMassA != 0.f ? (float)bodyCount[aIdx] : 1;\n" + " float countB = invMassB != 0.f ? (float)bodyCount[bIdx] : 1;\n" + " setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,\n" + " &gContact[gIdx], dt, positionDrift, positionConstraintCoeff,countA,countB,\n" + " &cs );\n" + " \n" + " cs.m_batchIdx = gContact[gIdx].m_batchIdx;\n" + " gConstraintOut[gIdx] = cs;\n" + " }\n" + "}\n"; diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h index d70e74017a..bb949b2027 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/kernels/updateAabbsKernel.h @@ -1,483 +1,482 @@ //this file is autogenerated using stringify.bat (premake --stringify) in the build folder of this project -static const char* updateAabbsKernelCL= \ -"#ifndef B3_UPDATE_AABBS_H\n" -"#define B3_UPDATE_AABBS_H\n" -"#ifndef B3_AABB_H\n" -"#define B3_AABB_H\n" -"#ifndef B3_FLOAT4_H\n" -"#define B3_FLOAT4_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#define B3_PLATFORM_DEFINITIONS_H\n" -"struct MyTest\n" -"{\n" -" int bla;\n" -"};\n" -"#ifdef __cplusplus\n" -"#else\n" -"//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" -"#define B3_LARGE_FLOAT 1e18f\n" -"#define B3_INFINITY 1e18f\n" -"#define b3Assert(a)\n" -"#define b3ConstArray(a) __global const a*\n" -"#define b3AtomicInc atomic_inc\n" -"#define b3AtomicAdd atomic_add\n" -"#define b3Fabs fabs\n" -"#define b3Sqrt native_sqrt\n" -"#define b3Sin native_sin\n" -"#define b3Cos native_cos\n" -"#define B3_STATIC\n" -"#endif\n" -"#endif\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Float4;\n" -" #define b3Float4ConstArg const b3Float4\n" -" #define b3MakeFloat4 (float4)\n" -" float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return dot(a1, b1);\n" -" }\n" -" b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" -" {\n" -" float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" -" float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" -" return cross(a1, b1);\n" -" }\n" -" #define b3MinFloat4 min\n" -" #define b3MaxFloat4 max\n" -" #define b3Normalized(a) normalize(a)\n" -"#endif \n" -" \n" -"inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" -"{\n" -" if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" -" return false;\n" -" return true;\n" -"}\n" -"inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" -"{\n" -" float maxDot = -B3_INFINITY;\n" -" int i = 0;\n" -" int ptIndex = -1;\n" -" for( i = 0; i < vecLen; i++ )\n" -" {\n" -" float dot = b3Dot3F4(vecArray[i],vec);\n" -" \n" -" if( dot > maxDot )\n" -" {\n" -" maxDot = dot;\n" -" ptIndex = i;\n" -" }\n" -" }\n" -" b3Assert(ptIndex>=0);\n" -" if (ptIndex<0)\n" -" {\n" -" ptIndex = 0;\n" -" }\n" -" *dotOut = maxDot;\n" -" return ptIndex;\n" -"}\n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_MAT3x3_H\n" -"#define B3_MAT3x3_H\n" -"#ifndef B3_QUAT_H\n" -"#define B3_QUAT_H\n" -"#ifndef B3_PLATFORM_DEFINITIONS_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -" typedef float4 b3Quat;\n" -" #define b3QuatConstArg const b3Quat\n" -" \n" -" \n" -"inline float4 b3FastNormalize4(float4 v)\n" -"{\n" -" v = (float4)(v.xyz,0.f);\n" -" return fast_normalize(v);\n" -"}\n" -" \n" -"inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" -"inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" -"inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" -"{\n" -" b3Quat ans;\n" -" ans = b3Cross3( a, b );\n" -" ans += a.w*b+b.w*a;\n" -"// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" -" ans.w = a.w*b.w - b3Dot3F4(a, b);\n" -" return ans;\n" -"}\n" -"inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" -"{\n" -" b3Quat q;\n" -" q=in;\n" -" //return b3FastNormalize4(in);\n" -" float len = native_sqrt(dot(q, q));\n" -" if(len > 0.f)\n" -" {\n" -" q *= 1.f / len;\n" -" }\n" -" else\n" -" {\n" -" q.x = q.y = q.z = 0.f;\n" -" q.w = 1.f;\n" -" }\n" -" return q;\n" -"}\n" -"inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" b3Quat qInv = b3QuatInvert( q );\n" -" float4 vcpy = vec;\n" -" vcpy.w = 0.f;\n" -" float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" -" return out;\n" -"}\n" -"inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" -"{\n" -" return (b3Quat)(-q.xyz, q.w);\n" -"}\n" -"inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" -"{\n" -" return b3QuatRotate( b3QuatInvert( q ), vec );\n" -"}\n" -"inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" -"{\n" -" return b3QuatRotate( orientation, point ) + (translation);\n" -"}\n" -" \n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"typedef struct\n" -"{\n" -" b3Float4 m_row[3];\n" -"}b3Mat3x3;\n" -"#define b3Mat3x3ConstArg const b3Mat3x3\n" -"#define b3GetRow(m,row) (m.m_row[row])\n" -"inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" -"{\n" -" b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" -" b3Mat3x3 out;\n" -" out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" -" out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" -" out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" -" out.m_row[0].w = 0.f;\n" -" out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" -" out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" -" out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" -" out.m_row[1].w = 0.f;\n" -" out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" -" out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" -" out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" -" out.m_row[2].w = 0.f;\n" -" return out;\n" -"}\n" -"inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = fabs(matIn.m_row[0]);\n" -" out.m_row[1] = fabs(matIn.m_row[1]);\n" -" out.m_row[2] = fabs(matIn.m_row[2]);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtZero();\n" -"__inline\n" -"b3Mat3x3 mtIdentity();\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" -"__inline\n" -"b3Mat3x3 mtZero()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(0.f);\n" -" m.m_row[1] = (b3Float4)(0.f);\n" -" m.m_row[2] = (b3Float4)(0.f);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtIdentity()\n" -"{\n" -" b3Mat3x3 m;\n" -" m.m_row[0] = (b3Float4)(1,0,0,0);\n" -" m.m_row[1] = (b3Float4)(0,1,0,0);\n" -" m.m_row[2] = (b3Float4)(0,0,1,0);\n" -" return m;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" -"{\n" -" b3Mat3x3 out;\n" -" out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" -" out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" -" out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" -" return out;\n" -"}\n" -"__inline\n" -"b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" -"{\n" -" b3Mat3x3 transB;\n" -" transB = mtTranspose( b );\n" -" b3Mat3x3 ans;\n" -" // why this doesn't run when 0ing in the for{}\n" -" a.m_row[0].w = 0.f;\n" -" a.m_row[1].w = 0.f;\n" -" a.m_row[2].w = 0.f;\n" -" for(int i=0; i<3; i++)\n" -" {\n" -"// a.m_row[i].w = 0.f;\n" -" ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" -" ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" -" ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" -" ans.m_row[i].w = 0.f;\n" -" }\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" -"{\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a.m_row[0], b );\n" -" ans.y = b3Dot3F4( a.m_row[1], b );\n" -" ans.z = b3Dot3F4( a.m_row[2], b );\n" -" ans.w = 0.f;\n" -" return ans;\n" -"}\n" -"__inline\n" -"b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" -"{\n" -" b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" -" b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" -" b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" -" b3Float4 ans;\n" -" ans.x = b3Dot3F4( a, colx );\n" -" ans.y = b3Dot3F4( a, coly );\n" -" ans.z = b3Dot3F4( a, colz );\n" -" return ans;\n" -"}\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3Aabb b3Aabb_t;\n" -"struct b3Aabb\n" -"{\n" -" union\n" -" {\n" -" float m_min[4];\n" -" b3Float4 m_minVec;\n" -" int m_minIndices[4];\n" -" };\n" -" union\n" -" {\n" -" float m_max[4];\n" -" b3Float4 m_maxVec;\n" -" int m_signedMaxIndices[4];\n" -" };\n" -"};\n" -"inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" -" b3Float4ConstArg pos,\n" -" b3QuatConstArg orn,\n" -" b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" -"{\n" -" b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" -" localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" -" b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" -" b3Mat3x3 m;\n" -" m = b3QuatGetRotationMatrix(orn);\n" -" b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" -" b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" -" \n" -" b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" -" b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" -" 0.f);\n" -" *aabbMinOut = center-extent;\n" -" *aabbMaxOut = center+extent;\n" -"}\n" -"/// conservative test for overlap between two aabbs\n" -"inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" -" b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" -"{\n" -" bool overlap = true;\n" -" overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" -" overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" -" overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" -" return overlap;\n" -"}\n" -"#endif //B3_AABB_H\n" -"#ifndef B3_COLLIDABLE_H\n" -"#define B3_COLLIDABLE_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"enum b3ShapeTypes\n" -"{\n" -" SHAPE_HEIGHT_FIELD=1,\n" -" SHAPE_CONVEX_HULL=3,\n" -" SHAPE_PLANE=4,\n" -" SHAPE_CONCAVE_TRIMESH=5,\n" -" SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" -" SHAPE_SPHERE=7,\n" -" MAX_NUM_SHAPE_TYPES,\n" -"};\n" -"typedef struct b3Collidable b3Collidable_t;\n" -"struct b3Collidable\n" -"{\n" -" union {\n" -" int m_numChildShapes;\n" -" int m_bvhIndex;\n" -" };\n" -" union\n" -" {\n" -" float m_radius;\n" -" int m_compoundBvhIndex;\n" -" };\n" -" int m_shapeType;\n" -" union\n" -" {\n" -" int m_shapeIndex;\n" -" float m_height;\n" -" };\n" -"};\n" -"typedef struct b3GpuChildShape b3GpuChildShape_t;\n" -"struct b3GpuChildShape\n" -"{\n" -" b3Float4 m_childPosition;\n" -" b3Quat m_childOrientation;\n" -" union\n" -" {\n" -" int m_shapeIndex;//used for SHAPE_COMPOUND_OF_CONVEX_HULLS\n" -" int m_capsuleAxis;\n" -" };\n" -" union \n" -" {\n" -" float m_radius;//used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES\n" -" int m_numChildShapes;//used for compound shape\n" -" };\n" -" union \n" -" {\n" -" float m_height;//used for childshape of SHAPE_COMPOUND_OF_CAPSULES\n" -" int m_collidableShapeIndex;\n" -" };\n" -" int m_shapeType;\n" -"};\n" -"struct b3CompoundOverlappingPair\n" -"{\n" -" int m_bodyIndexA;\n" -" int m_bodyIndexB;\n" -"// int m_pairType;\n" -" int m_childShapeIndexA;\n" -" int m_childShapeIndexB;\n" -"};\n" -"#endif //B3_COLLIDABLE_H\n" -"#ifndef B3_RIGIDBODY_DATA_H\n" -"#define B3_RIGIDBODY_DATA_H\n" -"#ifndef B3_FLOAT4_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_FLOAT4_H\n" -"#ifndef B3_QUAT_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif \n" -"#endif //B3_QUAT_H\n" -"#ifndef B3_MAT3x3_H\n" -"#ifdef __cplusplus\n" -"#else\n" -"#endif\n" -"#endif //B3_MAT3x3_H\n" -"typedef struct b3RigidBodyData b3RigidBodyData_t;\n" -"struct b3RigidBodyData\n" -"{\n" -" b3Float4 m_pos;\n" -" b3Quat m_quat;\n" -" b3Float4 m_linVel;\n" -" b3Float4 m_angVel;\n" -" int m_collidableIdx;\n" -" float m_invMass;\n" -" float m_restituitionCoeff;\n" -" float m_frictionCoeff;\n" -"};\n" -"typedef struct b3InertiaData b3InertiaData_t;\n" -"struct b3InertiaData\n" -"{\n" -" b3Mat3x3 m_invInertiaWorld;\n" -" b3Mat3x3 m_initInvInertia;\n" -"};\n" -"#endif //B3_RIGIDBODY_DATA_H\n" -" \n" -"void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n" -"{\n" -" __global const b3RigidBodyData_t* body = &bodies[bodyId];\n" -" b3Float4 position = body->m_pos;\n" -" b3Quat orientation = body->m_quat;\n" -" \n" -" int collidableIndex = body->m_collidableIdx;\n" -" int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" -" \n" -" if (shapeIndex>=0)\n" -" {\n" -" \n" -" b3Aabb_t localAabb = localShapeAABB[collidableIndex];\n" -" b3Aabb_t worldAabb;\n" -" \n" -" b3Float4 aabbAMinOut,aabbAMaxOut; \n" -" float margin = 0.f;\n" -" b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);\n" -" \n" -" worldAabb.m_minVec =aabbAMinOut;\n" -" worldAabb.m_minIndices[3] = bodyId;\n" -" worldAabb.m_maxVec = aabbAMaxOut;\n" -" worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;\n" -" worldAabbs[bodyId] = worldAabb;\n" -" }\n" -"}\n" -"#endif //B3_UPDATE_AABBS_H\n" -"__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB)\n" -"{\n" -" int nodeID = get_global_id(0);\n" -" if( nodeID < numNodes )\n" -" {\n" -" b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB);\n" -" }\n" -"}\n" -"__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs)\n" -"{\n" -" int pairId = get_global_id(0);\n" -" if( pairId< numPairs )\n" -" {\n" -" pairs[pairId].z = 0xffffffff;\n" -" }\n" -"}\n" -; +static const char* updateAabbsKernelCL = + "#ifndef B3_UPDATE_AABBS_H\n" + "#define B3_UPDATE_AABBS_H\n" + "#ifndef B3_AABB_H\n" + "#define B3_AABB_H\n" + "#ifndef B3_FLOAT4_H\n" + "#define B3_FLOAT4_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#define B3_PLATFORM_DEFINITIONS_H\n" + "struct MyTest\n" + "{\n" + " int bla;\n" + "};\n" + "#ifdef __cplusplus\n" + "#else\n" + "//keep B3_LARGE_FLOAT*B3_LARGE_FLOAT < FLT_MAX\n" + "#define B3_LARGE_FLOAT 1e18f\n" + "#define B3_INFINITY 1e18f\n" + "#define b3Assert(a)\n" + "#define b3ConstArray(a) __global const a*\n" + "#define b3AtomicInc atomic_inc\n" + "#define b3AtomicAdd atomic_add\n" + "#define b3Fabs fabs\n" + "#define b3Sqrt native_sqrt\n" + "#define b3Sin native_sin\n" + "#define b3Cos native_cos\n" + "#define B3_STATIC\n" + "#endif\n" + "#endif\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Float4;\n" + " #define b3Float4ConstArg const b3Float4\n" + " #define b3MakeFloat4 (float4)\n" + " float b3Dot3F4(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return dot(a1, b1);\n" + " }\n" + " b3Float4 b3Cross3(b3Float4ConstArg v0,b3Float4ConstArg v1)\n" + " {\n" + " float4 a1 = b3MakeFloat4(v0.xyz,0.f);\n" + " float4 b1 = b3MakeFloat4(v1.xyz,0.f);\n" + " return cross(a1, b1);\n" + " }\n" + " #define b3MinFloat4 min\n" + " #define b3MaxFloat4 max\n" + " #define b3Normalized(a) normalize(a)\n" + "#endif \n" + " \n" + "inline bool b3IsAlmostZero(b3Float4ConstArg v)\n" + "{\n" + " if(b3Fabs(v.x)>1e-6 || b3Fabs(v.y)>1e-6 || b3Fabs(v.z)>1e-6) \n" + " return false;\n" + " return true;\n" + "}\n" + "inline int b3MaxDot( b3Float4ConstArg vec, __global const b3Float4* vecArray, int vecLen, float* dotOut )\n" + "{\n" + " float maxDot = -B3_INFINITY;\n" + " int i = 0;\n" + " int ptIndex = -1;\n" + " for( i = 0; i < vecLen; i++ )\n" + " {\n" + " float dot = b3Dot3F4(vecArray[i],vec);\n" + " \n" + " if( dot > maxDot )\n" + " {\n" + " maxDot = dot;\n" + " ptIndex = i;\n" + " }\n" + " }\n" + " b3Assert(ptIndex>=0);\n" + " if (ptIndex<0)\n" + " {\n" + " ptIndex = 0;\n" + " }\n" + " *dotOut = maxDot;\n" + " return ptIndex;\n" + "}\n" + "#endif //B3_FLOAT4_H\n" + "#ifndef B3_MAT3x3_H\n" + "#define B3_MAT3x3_H\n" + "#ifndef B3_QUAT_H\n" + "#define B3_QUAT_H\n" + "#ifndef B3_PLATFORM_DEFINITIONS_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif\n" + "#endif\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + " typedef float4 b3Quat;\n" + " #define b3QuatConstArg const b3Quat\n" + " \n" + " \n" + "inline float4 b3FastNormalize4(float4 v)\n" + "{\n" + " v = (float4)(v.xyz,0.f);\n" + " return fast_normalize(v);\n" + "}\n" + " \n" + "inline b3Quat b3QuatMul(b3Quat a, b3Quat b);\n" + "inline b3Quat b3QuatNormalized(b3QuatConstArg in);\n" + "inline b3Quat b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec);\n" + "inline b3Quat b3QuatInvert(b3QuatConstArg q);\n" + "inline b3Quat b3QuatInverse(b3QuatConstArg q);\n" + "inline b3Quat b3QuatMul(b3QuatConstArg a, b3QuatConstArg b)\n" + "{\n" + " b3Quat ans;\n" + " ans = b3Cross3( a, b );\n" + " ans += a.w*b+b.w*a;\n" + "// ans.w = a.w*b.w - (a.x*b.x+a.y*b.y+a.z*b.z);\n" + " ans.w = a.w*b.w - b3Dot3F4(a, b);\n" + " return ans;\n" + "}\n" + "inline b3Quat b3QuatNormalized(b3QuatConstArg in)\n" + "{\n" + " b3Quat q;\n" + " q=in;\n" + " //return b3FastNormalize4(in);\n" + " float len = native_sqrt(dot(q, q));\n" + " if(len > 0.f)\n" + " {\n" + " q *= 1.f / len;\n" + " }\n" + " else\n" + " {\n" + " q.x = q.y = q.z = 0.f;\n" + " q.w = 1.f;\n" + " }\n" + " return q;\n" + "}\n" + "inline float4 b3QuatRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" + "{\n" + " b3Quat qInv = b3QuatInvert( q );\n" + " float4 vcpy = vec;\n" + " vcpy.w = 0.f;\n" + " float4 out = b3QuatMul(b3QuatMul(q,vcpy),qInv);\n" + " return out;\n" + "}\n" + "inline b3Quat b3QuatInverse(b3QuatConstArg q)\n" + "{\n" + " return (b3Quat)(-q.xyz, q.w);\n" + "}\n" + "inline b3Quat b3QuatInvert(b3QuatConstArg q)\n" + "{\n" + " return (b3Quat)(-q.xyz, q.w);\n" + "}\n" + "inline float4 b3QuatInvRotate(b3QuatConstArg q, b3QuatConstArg vec)\n" + "{\n" + " return b3QuatRotate( b3QuatInvert( q ), vec );\n" + "}\n" + "inline b3Float4 b3TransformPoint(b3Float4ConstArg point, b3Float4ConstArg translation, b3QuatConstArg orientation)\n" + "{\n" + " return b3QuatRotate( orientation, point ) + (translation);\n" + "}\n" + " \n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "typedef struct\n" + "{\n" + " b3Float4 m_row[3];\n" + "}b3Mat3x3;\n" + "#define b3Mat3x3ConstArg const b3Mat3x3\n" + "#define b3GetRow(m,row) (m.m_row[row])\n" + "inline b3Mat3x3 b3QuatGetRotationMatrix(b3Quat quat)\n" + "{\n" + " b3Float4 quat2 = (b3Float4)(quat.x*quat.x, quat.y*quat.y, quat.z*quat.z, 0.f);\n" + " b3Mat3x3 out;\n" + " out.m_row[0].x=1-2*quat2.y-2*quat2.z;\n" + " out.m_row[0].y=2*quat.x*quat.y-2*quat.w*quat.z;\n" + " out.m_row[0].z=2*quat.x*quat.z+2*quat.w*quat.y;\n" + " out.m_row[0].w = 0.f;\n" + " out.m_row[1].x=2*quat.x*quat.y+2*quat.w*quat.z;\n" + " out.m_row[1].y=1-2*quat2.x-2*quat2.z;\n" + " out.m_row[1].z=2*quat.y*quat.z-2*quat.w*quat.x;\n" + " out.m_row[1].w = 0.f;\n" + " out.m_row[2].x=2*quat.x*quat.z-2*quat.w*quat.y;\n" + " out.m_row[2].y=2*quat.y*quat.z+2*quat.w*quat.x;\n" + " out.m_row[2].z=1-2*quat2.x-2*quat2.y;\n" + " out.m_row[2].w = 0.f;\n" + " return out;\n" + "}\n" + "inline b3Mat3x3 b3AbsoluteMat3x3(b3Mat3x3ConstArg matIn)\n" + "{\n" + " b3Mat3x3 out;\n" + " out.m_row[0] = fabs(matIn.m_row[0]);\n" + " out.m_row[1] = fabs(matIn.m_row[1]);\n" + " out.m_row[2] = fabs(matIn.m_row[2]);\n" + " return out;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtZero();\n" + "__inline\n" + "b3Mat3x3 mtIdentity();\n" + "__inline\n" + "b3Mat3x3 mtTranspose(b3Mat3x3 m);\n" + "__inline\n" + "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b);\n" + "__inline\n" + "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b);\n" + "__inline\n" + "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b);\n" + "__inline\n" + "b3Mat3x3 mtZero()\n" + "{\n" + " b3Mat3x3 m;\n" + " m.m_row[0] = (b3Float4)(0.f);\n" + " m.m_row[1] = (b3Float4)(0.f);\n" + " m.m_row[2] = (b3Float4)(0.f);\n" + " return m;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtIdentity()\n" + "{\n" + " b3Mat3x3 m;\n" + " m.m_row[0] = (b3Float4)(1,0,0,0);\n" + " m.m_row[1] = (b3Float4)(0,1,0,0);\n" + " m.m_row[2] = (b3Float4)(0,0,1,0);\n" + " return m;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtTranspose(b3Mat3x3 m)\n" + "{\n" + " b3Mat3x3 out;\n" + " out.m_row[0] = (b3Float4)(m.m_row[0].x, m.m_row[1].x, m.m_row[2].x, 0.f);\n" + " out.m_row[1] = (b3Float4)(m.m_row[0].y, m.m_row[1].y, m.m_row[2].y, 0.f);\n" + " out.m_row[2] = (b3Float4)(m.m_row[0].z, m.m_row[1].z, m.m_row[2].z, 0.f);\n" + " return out;\n" + "}\n" + "__inline\n" + "b3Mat3x3 mtMul(b3Mat3x3 a, b3Mat3x3 b)\n" + "{\n" + " b3Mat3x3 transB;\n" + " transB = mtTranspose( b );\n" + " b3Mat3x3 ans;\n" + " // why this doesn't run when 0ing in the for{}\n" + " a.m_row[0].w = 0.f;\n" + " a.m_row[1].w = 0.f;\n" + " a.m_row[2].w = 0.f;\n" + " for(int i=0; i<3; i++)\n" + " {\n" + "// a.m_row[i].w = 0.f;\n" + " ans.m_row[i].x = b3Dot3F4(a.m_row[i],transB.m_row[0]);\n" + " ans.m_row[i].y = b3Dot3F4(a.m_row[i],transB.m_row[1]);\n" + " ans.m_row[i].z = b3Dot3F4(a.m_row[i],transB.m_row[2]);\n" + " ans.m_row[i].w = 0.f;\n" + " }\n" + " return ans;\n" + "}\n" + "__inline\n" + "b3Float4 mtMul1(b3Mat3x3 a, b3Float4 b)\n" + "{\n" + " b3Float4 ans;\n" + " ans.x = b3Dot3F4( a.m_row[0], b );\n" + " ans.y = b3Dot3F4( a.m_row[1], b );\n" + " ans.z = b3Dot3F4( a.m_row[2], b );\n" + " ans.w = 0.f;\n" + " return ans;\n" + "}\n" + "__inline\n" + "b3Float4 mtMul3(b3Float4 a, b3Mat3x3 b)\n" + "{\n" + " b3Float4 colx = b3MakeFloat4(b.m_row[0].x, b.m_row[1].x, b.m_row[2].x, 0);\n" + " b3Float4 coly = b3MakeFloat4(b.m_row[0].y, b.m_row[1].y, b.m_row[2].y, 0);\n" + " b3Float4 colz = b3MakeFloat4(b.m_row[0].z, b.m_row[1].z, b.m_row[2].z, 0);\n" + " b3Float4 ans;\n" + " ans.x = b3Dot3F4( a, colx );\n" + " ans.y = b3Dot3F4( a, coly );\n" + " ans.z = b3Dot3F4( a, colz );\n" + " return ans;\n" + "}\n" + "#endif\n" + "#endif //B3_MAT3x3_H\n" + "typedef struct b3Aabb b3Aabb_t;\n" + "struct b3Aabb\n" + "{\n" + " union\n" + " {\n" + " float m_min[4];\n" + " b3Float4 m_minVec;\n" + " int m_minIndices[4];\n" + " };\n" + " union\n" + " {\n" + " float m_max[4];\n" + " b3Float4 m_maxVec;\n" + " int m_signedMaxIndices[4];\n" + " };\n" + "};\n" + "inline void b3TransformAabb2(b3Float4ConstArg localAabbMin,b3Float4ConstArg localAabbMax, float margin,\n" + " b3Float4ConstArg pos,\n" + " b3QuatConstArg orn,\n" + " b3Float4* aabbMinOut,b3Float4* aabbMaxOut)\n" + "{\n" + " b3Float4 localHalfExtents = 0.5f*(localAabbMax-localAabbMin);\n" + " localHalfExtents+=b3MakeFloat4(margin,margin,margin,0.f);\n" + " b3Float4 localCenter = 0.5f*(localAabbMax+localAabbMin);\n" + " b3Mat3x3 m;\n" + " m = b3QuatGetRotationMatrix(orn);\n" + " b3Mat3x3 abs_b = b3AbsoluteMat3x3(m);\n" + " b3Float4 center = b3TransformPoint(localCenter,pos,orn);\n" + " \n" + " b3Float4 extent = b3MakeFloat4(b3Dot3F4(localHalfExtents,b3GetRow(abs_b,0)),\n" + " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,1)),\n" + " b3Dot3F4(localHalfExtents,b3GetRow(abs_b,2)),\n" + " 0.f);\n" + " *aabbMinOut = center-extent;\n" + " *aabbMaxOut = center+extent;\n" + "}\n" + "/// conservative test for overlap between two aabbs\n" + "inline bool b3TestAabbAgainstAabb(b3Float4ConstArg aabbMin1,b3Float4ConstArg aabbMax1,\n" + " b3Float4ConstArg aabbMin2, b3Float4ConstArg aabbMax2)\n" + "{\n" + " bool overlap = true;\n" + " overlap = (aabbMin1.x > aabbMax2.x || aabbMax1.x < aabbMin2.x) ? false : overlap;\n" + " overlap = (aabbMin1.z > aabbMax2.z || aabbMax1.z < aabbMin2.z) ? false : overlap;\n" + " overlap = (aabbMin1.y > aabbMax2.y || aabbMax1.y < aabbMin2.y) ? false : overlap;\n" + " return overlap;\n" + "}\n" + "#endif //B3_AABB_H\n" + "#ifndef B3_COLLIDABLE_H\n" + "#define B3_COLLIDABLE_H\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "#ifndef B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "enum b3ShapeTypes\n" + "{\n" + " SHAPE_HEIGHT_FIELD=1,\n" + " SHAPE_CONVEX_HULL=3,\n" + " SHAPE_PLANE=4,\n" + " SHAPE_CONCAVE_TRIMESH=5,\n" + " SHAPE_COMPOUND_OF_CONVEX_HULLS=6,\n" + " SHAPE_SPHERE=7,\n" + " MAX_NUM_SHAPE_TYPES,\n" + "};\n" + "typedef struct b3Collidable b3Collidable_t;\n" + "struct b3Collidable\n" + "{\n" + " union {\n" + " int m_numChildShapes;\n" + " int m_bvhIndex;\n" + " };\n" + " union\n" + " {\n" + " float m_radius;\n" + " int m_compoundBvhIndex;\n" + " };\n" + " int m_shapeType;\n" + " union\n" + " {\n" + " int m_shapeIndex;\n" + " float m_height;\n" + " };\n" + "};\n" + "typedef struct b3GpuChildShape b3GpuChildShape_t;\n" + "struct b3GpuChildShape\n" + "{\n" + " b3Float4 m_childPosition;\n" + " b3Quat m_childOrientation;\n" + " union\n" + " {\n" + " int m_shapeIndex;//used for SHAPE_COMPOUND_OF_CONVEX_HULLS\n" + " int m_capsuleAxis;\n" + " };\n" + " union \n" + " {\n" + " float m_radius;//used for childshape of SHAPE_COMPOUND_OF_SPHERES or SHAPE_COMPOUND_OF_CAPSULES\n" + " int m_numChildShapes;//used for compound shape\n" + " };\n" + " union \n" + " {\n" + " float m_height;//used for childshape of SHAPE_COMPOUND_OF_CAPSULES\n" + " int m_collidableShapeIndex;\n" + " };\n" + " int m_shapeType;\n" + "};\n" + "struct b3CompoundOverlappingPair\n" + "{\n" + " int m_bodyIndexA;\n" + " int m_bodyIndexB;\n" + "// int m_pairType;\n" + " int m_childShapeIndexA;\n" + " int m_childShapeIndexB;\n" + "};\n" + "#endif //B3_COLLIDABLE_H\n" + "#ifndef B3_RIGIDBODY_DATA_H\n" + "#define B3_RIGIDBODY_DATA_H\n" + "#ifndef B3_FLOAT4_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_FLOAT4_H\n" + "#ifndef B3_QUAT_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif \n" + "#endif //B3_QUAT_H\n" + "#ifndef B3_MAT3x3_H\n" + "#ifdef __cplusplus\n" + "#else\n" + "#endif\n" + "#endif //B3_MAT3x3_H\n" + "typedef struct b3RigidBodyData b3RigidBodyData_t;\n" + "struct b3RigidBodyData\n" + "{\n" + " b3Float4 m_pos;\n" + " b3Quat m_quat;\n" + " b3Float4 m_linVel;\n" + " b3Float4 m_angVel;\n" + " int m_collidableIdx;\n" + " float m_invMass;\n" + " float m_restituitionCoeff;\n" + " float m_frictionCoeff;\n" + "};\n" + "typedef struct b3InertiaData b3InertiaData_t;\n" + "struct b3InertiaData\n" + "{\n" + " b3Mat3x3 m_invInertiaWorld;\n" + " b3Mat3x3 m_initInvInertia;\n" + "};\n" + "#endif //B3_RIGIDBODY_DATA_H\n" + " \n" + "void b3ComputeWorldAabb( int bodyId, __global const b3RigidBodyData_t* bodies, __global const b3Collidable_t* collidables, __global const b3Aabb_t* localShapeAABB, __global b3Aabb_t* worldAabbs)\n" + "{\n" + " __global const b3RigidBodyData_t* body = &bodies[bodyId];\n" + " b3Float4 position = body->m_pos;\n" + " b3Quat orientation = body->m_quat;\n" + " \n" + " int collidableIndex = body->m_collidableIdx;\n" + " int shapeIndex = collidables[collidableIndex].m_shapeIndex;\n" + " \n" + " if (shapeIndex>=0)\n" + " {\n" + " \n" + " b3Aabb_t localAabb = localShapeAABB[collidableIndex];\n" + " b3Aabb_t worldAabb;\n" + " \n" + " b3Float4 aabbAMinOut,aabbAMaxOut; \n" + " float margin = 0.f;\n" + " b3TransformAabb2(localAabb.m_minVec,localAabb.m_maxVec,margin,position,orientation,&aabbAMinOut,&aabbAMaxOut);\n" + " \n" + " worldAabb.m_minVec =aabbAMinOut;\n" + " worldAabb.m_minIndices[3] = bodyId;\n" + " worldAabb.m_maxVec = aabbAMaxOut;\n" + " worldAabb.m_signedMaxIndices[3] = body[bodyId].m_invMass==0.f? 0 : 1;\n" + " worldAabbs[bodyId] = worldAabb;\n" + " }\n" + "}\n" + "#endif //B3_UPDATE_AABBS_H\n" + "__kernel void initializeGpuAabbsFull( const int numNodes, __global b3RigidBodyData_t* gBodies,__global b3Collidable_t* collidables, __global b3Aabb_t* plocalShapeAABB, __global b3Aabb_t* pAABB)\n" + "{\n" + " int nodeID = get_global_id(0);\n" + " if( nodeID < numNodes )\n" + " {\n" + " b3ComputeWorldAabb(nodeID, gBodies, collidables, plocalShapeAABB,pAABB);\n" + " }\n" + "}\n" + "__kernel void clearOverlappingPairsKernel( __global int4* pairs, int numPairs)\n" + "{\n" + " int pairId = get_global_id(0);\n" + " if( pairId< numPairs )\n" + " {\n" + " pairs[pairId].z = 0xffffffff;\n" + " }\n" + "}\n"; |