summaryrefslogtreecommitdiff
path: root/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp')
-rw-r--r--thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp835
1 files changed, 369 insertions, 466 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp
index 20bf6d47c5..ccf67da1a8 100644
--- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp
+++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp
@@ -13,7 +13,6 @@ subject to the following restrictions:
*/
//Originally written by Takahiro Harada
-
#include "b3Solver.h"
///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments
@@ -38,7 +37,6 @@ bool gConvertConstraintOnCpu = false;
#include "kernels/batchingKernels.h"
#include "kernels/batchingKernelsNew.h"
-
#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h"
#include "Bullet3Common/b3Vector3.h"
@@ -48,7 +46,7 @@ struct SolverDebugInfo
int m_valInt1;
int m_valInt2;
int m_valInt3;
-
+
int m_valInt4;
int m_valInt5;
int m_valInt6;
@@ -59,11 +57,10 @@ struct SolverDebugInfo
int m_valInt10;
int m_valInt11;
- int m_valInt12;
- int m_valInt13;
- int m_valInt14;
- int m_valInt15;
-
+ int m_valInt12;
+ int m_valInt13;
+ int m_valInt14;
+ int m_valInt15;
float m_val0;
float m_val1;
@@ -71,9 +68,6 @@ struct SolverDebugInfo
float m_val3;
};
-
-
-
class SolverDeviceInl
{
public:
@@ -84,101 +78,89 @@ public:
};
};
-
-
b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity)
- :
- m_context(ctx),
- m_device(device),
- m_queue(queue),
- m_batchSizes(ctx,queue),
- m_nIterations(4)
+ : m_context(ctx),
+ m_device(device),
+ m_queue(queue),
+ m_batchSizes(ctx, queue),
+ m_nIterations(4)
{
- m_sort32 = new b3RadixSort32CL(ctx,device,queue);
- m_scan = new b3PrefixScanCL(ctx,device,queue,B3_SOLVER_N_CELLS);
- m_search = new b3BoundSearchCL(ctx,device,queue,B3_SOLVER_N_CELLS);
+ m_sort32 = new b3RadixSort32CL(ctx, device, queue);
+ m_scan = new b3PrefixScanCL(ctx, device, queue, B3_SOLVER_N_CELLS);
+ m_search = new b3BoundSearchCL(ctx, device, queue, B3_SOLVER_N_CELLS);
- const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 );
+ const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512);
- m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,queue,sortSize);
- m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx,queue);
+ m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx, queue, sortSize);
+ m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx, queue);
- m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,queue,B3_SOLVER_N_CELLS );
+ m_numConstraints = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS);
m_numConstraints->resize(B3_SOLVER_N_CELLS);
- m_offsets = new b3OpenCLArray<unsigned int>( ctx,queue,B3_SOLVER_N_CELLS);
+ m_offsets = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS);
m_offsets->resize(B3_SOLVER_N_CELLS);
const char* additionalMacros = "";
-// const char* srcFileNameForCaching="";
-
-
+ // const char* srcFileNameForCaching="";
cl_int pErrNum;
const char* batchKernelSource = batchingKernelsCL;
const char* batchKernelNewSource = batchingKernelsNewCL;
-
+
const char* solverSetupSource = solverSetupCL;
const char* solverSetup2Source = solverSetup2CL;
const char* solveContactSource = solveContactCL;
const char* solveFrictionSource = solveFrictionCL;
-
-
-
+
{
-
- cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH);
+ cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH);
b3Assert(solveContactProg);
-
- cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
+
+ cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH);
b3Assert(solveFrictionProg);
- cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH);
+ cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH);
b3Assert(solverSetup2Prog);
-
- cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH);
+ cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH);
b3Assert(solverSetupProg);
-
-
- m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros );
+
+ m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros);
b3Assert(m_solveFrictionKernel);
- m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros );
+ m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros);
b3Assert(m_solveContactKernel);
-
- m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros );
+
+ m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros);
b3Assert(m_contactToConstraintKernel);
-
- m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+
+ m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros);
b3Assert(m_setSortDataKernel);
-
- m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+
+ m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros);
b3Assert(m_reorderContactKernel);
-
- m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros );
+ m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros);
b3Assert(m_copyConstraintKernel);
-
}
{
- cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH);
+ cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH);
//cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true);
b3Assert(batchingProg);
-
- m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros );
+
+ m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros);
b3Assert(m_batchingKernel);
}
{
- cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH);
+ cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH);
b3Assert(batchingNewProg);
- m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros );
+ m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros);
//m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros );
b3Assert(m_batchingKernelNew);
}
}
-
+
b3Solver::~b3Solver()
{
delete m_offsets;
@@ -190,71 +172,68 @@ b3Solver::~b3Solver()
delete m_scan;
delete m_search;
-
clReleaseKernel(m_batchingKernel);
clReleaseKernel(m_batchingKernelNew);
-
- clReleaseKernel( m_solveContactKernel);
- clReleaseKernel( m_solveFrictionKernel);
-
- clReleaseKernel( m_contactToConstraintKernel);
- clReleaseKernel( m_setSortDataKernel);
- clReleaseKernel( m_reorderContactKernel);
- clReleaseKernel( m_copyConstraintKernel);
-
-}
+ clReleaseKernel(m_solveContactKernel);
+ clReleaseKernel(m_solveFrictionKernel);
-
+ clReleaseKernel(m_contactToConstraintKernel);
+ clReleaseKernel(m_setSortDataKernel);
+ clReleaseKernel(m_reorderContactKernel);
+ clReleaseKernel(m_copyConstraintKernel);
+}
-template<bool JACOBI>
-static
-__inline
-void solveContact(b3GpuConstraint4& cs,
- const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
- const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
- float maxRambdaDt[4], float minRambdaDt[4])
+template <bool JACOBI>
+static __inline void solveContact(b3GpuConstraint4& cs,
+ const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
+ const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
+ float maxRambdaDt[4], float minRambdaDt[4])
{
-
- b3Vector3 dLinVelA; dLinVelA.setZero();
- b3Vector3 dAngVelA; dAngVelA.setZero();
- b3Vector3 dLinVelB; dLinVelB.setZero();
- b3Vector3 dAngVelB; dAngVelB.setZero();
-
- for(int ic=0; ic<4; ic++)
+ b3Vector3 dLinVelA;
+ dLinVelA.setZero();
+ b3Vector3 dAngVelA;
+ dAngVelA.setZero();
+ b3Vector3 dLinVelB;
+ dLinVelB.setZero();
+ b3Vector3 dAngVelB;
+ dAngVelB.setZero();
+
+ for (int ic = 0; ic < 4; ic++)
{
// dont necessary because this makes change to 0
- if( cs.m_jacCoeffInv[ic] == 0.f ) continue;
+ if (cs.m_jacCoeffInv[ic] == 0.f) continue;
{
b3Vector3 angular0, angular1, linear;
b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA;
b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB;
- setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, &linear, &angular0, &angular1 );
+ setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, &linear, &angular0, &angular1);
- float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1,
- linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic];
+ float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1,
+ linVelA, angVelA, linVelB, angVelB) +
+ cs.m_b[ic];
rambdaDt *= cs.m_jacCoeffInv[ic];
{
float prevSum = cs.m_appliedRambdaDt[ic];
float updated = prevSum;
updated += rambdaDt;
- updated = b3Max( updated, minRambdaDt[ic] );
- updated = b3Min( updated, maxRambdaDt[ic] );
+ updated = b3Max(updated, minRambdaDt[ic]);
+ updated = b3Min(updated, maxRambdaDt[ic]);
rambdaDt = updated - prevSum;
cs.m_appliedRambdaDt[ic] = updated;
}
- b3Vector3 linImp0 = invMassA*linear*rambdaDt;
- b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
- b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
- b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
+ b3Vector3 linImp0 = invMassA * linear * rambdaDt;
+ b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
+ b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
+ b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
#ifdef _WIN32
- b3Assert(_finite(linImp0.getX()));
+ b3Assert(_finite(linImp0.getX()));
b3Assert(_finite(linImp1.getX()));
#endif
- if( JACOBI )
+ if (JACOBI)
{
dLinVelA += linImp0;
dAngVelA += angImp0;
@@ -271,92 +250,83 @@ void solveContact(b3GpuConstraint4& cs,
}
}
- if( JACOBI )
+ if (JACOBI)
{
linVelA += dLinVelA;
angVelA += dAngVelA;
linVelB += dLinVelB;
angVelB += dAngVelB;
}
-
}
+static __inline void solveFriction(b3GpuConstraint4& cs,
+ const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
+ const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
+ float maxRambdaDt[4], float minRambdaDt[4])
+{
+ if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return;
+ const b3Vector3& center = (const b3Vector3&)cs.m_center;
+ b3Vector3 n = -(const b3Vector3&)cs.m_linear;
-
-
- static
- __inline
- void solveFriction(b3GpuConstraint4& cs,
- const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA,
- const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB,
- float maxRambdaDt[4], float minRambdaDt[4])
- {
-
- if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return;
- const b3Vector3& center = (const b3Vector3&)cs.m_center;
-
- b3Vector3 n = -(const b3Vector3&)cs.m_linear;
-
- b3Vector3 tangent[2];
-#if 1
- b3PlaneSpace1 (n, tangent[0],tangent[1]);
+ b3Vector3 tangent[2];
+#if 1
+ b3PlaneSpace1(n, tangent[0], tangent[1]);
#else
- b3Vector3 r = cs.m_worldPos[0]-center;
- tangent[0] = cross3( n, r );
- tangent[1] = cross3( tangent[0], n );
- tangent[0] = normalize3( tangent[0] );
- tangent[1] = normalize3( tangent[1] );
+ b3Vector3 r = cs.m_worldPos[0] - center;
+ tangent[0] = cross3(n, r);
+ tangent[1] = cross3(tangent[0], n);
+ tangent[0] = normalize3(tangent[0]);
+ tangent[1] = normalize3(tangent[1]);
#endif
- b3Vector3 angular0, angular1, linear;
- b3Vector3 r0 = center - posA;
- b3Vector3 r1 = center - posB;
- for(int i=0; i<2; i++)
- {
- setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 );
- float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
- linVelA, angVelA, linVelB, angVelB );
- rambdaDt *= cs.m_fJacCoeffInv[i];
+ b3Vector3 angular0, angular1, linear;
+ b3Vector3 r0 = center - posA;
+ b3Vector3 r1 = center - posB;
+ for (int i = 0; i < 2; i++)
+ {
+ setLinearAndAngular(tangent[i], r0, r1, &linear, &angular0, &angular1);
+ float rambdaDt = calcRelVel(linear, -linear, angular0, angular1,
+ linVelA, angVelA, linVelB, angVelB);
+ rambdaDt *= cs.m_fJacCoeffInv[i];
- {
- float prevSum = cs.m_fAppliedRambdaDt[i];
- float updated = prevSum;
- updated += rambdaDt;
- updated = b3Max( updated, minRambdaDt[i] );
- updated = b3Min( updated, maxRambdaDt[i] );
- rambdaDt = updated - prevSum;
- cs.m_fAppliedRambdaDt[i] = updated;
- }
+ {
+ float prevSum = cs.m_fAppliedRambdaDt[i];
+ float updated = prevSum;
+ updated += rambdaDt;
+ updated = b3Max(updated, minRambdaDt[i]);
+ updated = b3Min(updated, maxRambdaDt[i]);
+ rambdaDt = updated - prevSum;
+ cs.m_fAppliedRambdaDt[i] = updated;
+ }
- b3Vector3 linImp0 = invMassA*linear*rambdaDt;
- b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt;
- b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt;
- b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt;
+ b3Vector3 linImp0 = invMassA * linear * rambdaDt;
+ b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt;
+ b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt;
+ b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt;
#ifdef _WIN32
- b3Assert(_finite(linImp0.getX()));
- b3Assert(_finite(linImp1.getX()));
+ b3Assert(_finite(linImp0.getX()));
+ b3Assert(_finite(linImp1.getX()));
#endif
- linVelA += linImp0;
- angVelA += angImp0;
- linVelB += linImp1;
- angVelB += angImp1;
- }
+ linVelA += linImp0;
+ angVelA += angImp0;
+ linVelB += linImp1;
+ angVelB += angImp1;
+ }
- { // angular damping for point constraint
- b3Vector3 ab = ( posB - posA ).normalized();
- b3Vector3 ac = ( center - posA ).normalized();
- if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
- {
- float angNA = b3Dot( n, angVelA );
- float angNB = b3Dot( n, angVelB );
+ { // angular damping for point constraint
+ b3Vector3 ab = (posB - posA).normalized();
+ b3Vector3 ac = (center - posA).normalized();
+ if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f))
+ {
+ float angNA = b3Dot(n, angVelA);
+ float angNB = b3Dot(n, angVelB);
- angVelA -= (angNA*0.1f)*n;
- angVelB -= (angNB*0.1f)*n;
- }
+ angVelA -= (angNA * 0.1f) * n;
+ angVelB -= (angNB * 0.1f) * n;
}
-
}
+}
/*
b3AlignedObjectArray<b3RigidBodyData>& m_bodies;
b3AlignedObjectArray<b3InertiaData>& m_shapes;
@@ -370,79 +340,69 @@ void solveContact(b3GpuConstraint4& cs,
int m_maxNumBatches;
*/
-struct SolveTask// : public ThreadPool::Task
+struct SolveTask // : public ThreadPool::Task
{
- SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
- int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex)
- : m_bodies( bodies ), m_shapes( shapes ),
- m_constraints( constraints ),
- m_batchSizes(batchSizes),
- m_cellIndex(cellIndex),
- m_curWgidx(curWgidx),
- m_start( start ),
- m_nConstraints( nConstraints ),
- m_solveFriction( true ),
- m_maxNumBatches(maxNumBatches)
- {}
-
- unsigned short int getType(){ return 0; }
+ SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints,
+ int start, int nConstraints, int maxNumBatches, b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex)
+ : m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_batchSizes(batchSizes), m_cellIndex(cellIndex), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches)
+ {
+ }
+
+ unsigned short int getType() { return 0; }
void run(int tIdx)
{
int offset = 0;
- for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++)
+ for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++)
{
- int numInBatch = m_batchSizes->at(m_cellIndex*B3_MAX_NUM_BATCHES+ii);
+ int numInBatch = m_batchSizes->at(m_cellIndex * B3_MAX_NUM_BATCHES + ii);
if (!numInBatch)
break;
- for (int jj=0;jj<numInBatch;jj++)
+ for (int jj = 0; jj < numInBatch; jj++)
{
- int i = m_start + offset+jj;
+ int i = m_start + offset + jj;
int batchId = m_constraints[i].m_batchIdx;
- b3Assert(batchId==ii);
+ b3Assert(batchId == ii);
float frictionCoeff = m_constraints[i].getFrictionCoeff();
int aIdx = (int)m_constraints[i].m_bodyA;
int bIdx = (int)m_constraints[i].m_bodyB;
-// int localBatch = m_constraints[i].m_batchIdx;
+ // int localBatch = m_constraints[i].m_batchIdx;
b3RigidBodyData& bodyA = m_bodies[aIdx];
b3RigidBodyData& bodyB = m_bodies[bIdx];
- if( !m_solveFriction )
+ if (!m_solveFriction)
{
- float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
- float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
+ float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
+ float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
- solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld,
- (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld,
- maxRambdaDt, minRambdaDt );
+ solveContact<false>(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld,
+ (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld,
+ maxRambdaDt, minRambdaDt);
}
else
{
- float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX};
- float minRambdaDt[4] = {0.f,0.f,0.f,0.f};
+ float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX};
+ float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f};
float sum = 0;
- for(int j=0; j<4; j++)
+ for (int j = 0; j < 4; j++)
{
- sum +=m_constraints[i].m_appliedRambdaDt[j];
+ sum += m_constraints[i].m_appliedRambdaDt[j];
}
frictionCoeff = 0.7f;
- for(int j=0; j<4; j++)
+ for (int j = 0; j < 4; j++)
{
- maxRambdaDt[j] = frictionCoeff*sum;
+ maxRambdaDt[j] = frictionCoeff * sum;
minRambdaDt[j] = -maxRambdaDt[j];
}
- solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld,
- (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld,
- maxRambdaDt, minRambdaDt );
-
+ solveFriction(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld,
+ (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld,
+ maxRambdaDt, minRambdaDt);
}
}
- offset+=numInBatch;
-
-
+ offset += numInBatch;
}
-/* for (int bb=0;bb<m_maxNumBatches;bb++)
+ /* for (int bb=0;bb<m_maxNumBatches;bb++)
{
//for(int ic=m_nConstraints-1; ic>=0; ic--)
for(int ic=0; ic<m_nConstraints; ic++)
@@ -491,9 +451,6 @@ struct SolveTask// : public ThreadPool::Task
}
}
*/
-
-
-
}
b3AlignedObjectArray<b3RigidBodyData>& m_bodies;
@@ -508,11 +465,9 @@ struct SolveTask// : public ThreadPool::Task
int m_maxNumBatches;
};
-
-void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf,
- b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,b3AlignedObjectArray<int>* batchSizes)
+void b3Solver::solveContactConstraintHost(b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf,
+ b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray<int>* batchSizes)
{
-
#if 0
{
int nSplitX = B3_SOLVER_N_SPLIT_X;
@@ -571,114 +526,105 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* body
//printf("------------------------\n");
b3AlignedObjectArray<unsigned int> offsetsHost;
m_offsets->copyToHost(offsetsHost);
- static int frame=0;
- bool useBatches=true;
+ static int frame = 0;
+ bool useBatches = true;
if (useBatches)
{
- for(int iter=0; iter<m_nIterations; iter++)
+ for (int iter = 0; iter < m_nIterations; iter++)
{
- for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++)
+ for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++)
{
-
int nSplitX = B3_SOLVER_N_SPLIT_X;
int nSplitY = B3_SOLVER_N_SPLIT_Y;
- int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES;
+ int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES;
//printf("cell Batch %d\n",cellBatch);
b3AlignedObjectArray<int> usedBodies[B3_SOLVER_N_CELLS];
- for (int i=0;i<B3_SOLVER_N_CELLS;i++)
+ for (int i = 0; i < B3_SOLVER_N_CELLS; i++)
{
usedBodies[i].resize(0);
}
-
-
-
//for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--)
- for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
+ for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++)
{
- int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2);
- int remain= (wgIdx%((nSplitX*nSplitY)/4));
- int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1);
- int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1);
- int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
-
-
- if( numConstraintsHost[cellIdx] == 0 )
+ int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2);
+ int remain = (wgIdx % ((nSplitX * nSplitY) / 4));
+ int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1);
+ int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1);
+ int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY);
+
+ if (numConstraintsHost[cellIdx] == 0)
continue;
//printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch);
//printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]);
if (zIdx)
{
- //printf("?\n");
+ //printf("?\n");
}
- if (iter==0)
+ if (iter == 0)
{
//printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx);
//printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]);
}
const int start = offsetsHost[cellIdx];
int numConstraintsInCell = numConstraintsHost[cellIdx];
- // const int end = start + numConstraintsInCell;
+ // const int end = start + numConstraintsInCell;
- SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx,batchSizes,cellIdx);
+ SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, usedBodies, wgIdx, batchSizes, cellIdx);
task.m_solveFriction = false;
task.run(0);
-
}
}
}
- for(int iter=0; iter<m_nIterations; iter++)
+ for (int iter = 0; iter < m_nIterations; iter++)
{
- for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++)
+ for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++)
{
int nSplitX = B3_SOLVER_N_SPLIT_X;
int nSplitY = B3_SOLVER_N_SPLIT_Y;
-
- int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES;
+ int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES;
- for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
+ for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++)
{
- int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2);
- int remain= (wgIdx%((nSplitX*nSplitY)/4));
- int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1);
- int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1);
-
- int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
-
- if( numConstraintsHost[cellIdx] == 0 )
+ int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2);
+ int remain = (wgIdx % ((nSplitX * nSplitY) / 4));
+ int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1);
+ int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1);
+
+ int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY);
+
+ if (numConstraintsHost[cellIdx] == 0)
continue;
-
+
//printf("yIdx=%d\n",yIdx);
-
+
const int start = offsetsHost[cellIdx];
int numConstraintsInCell = numConstraintsHost[cellIdx];
- // const int end = start + numConstraintsInCell;
+ // const int end = start + numConstraintsInCell;
- SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0,batchSizes,cellIdx);
+ SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, 0, 0, batchSizes, cellIdx);
task.m_solveFriction = true;
task.run(0);
-
}
}
}
-
-
- } else
+ }
+ else
{
- for(int iter=0; iter<m_nIterations; iter++)
+ for (int iter = 0; iter < m_nIterations; iter++)
{
- SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0);
+ SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0);
task.m_solveFriction = false;
task.run(0);
}
- for(int iter=0; iter<m_nIterations; iter++)
+ for (int iter = 0; iter < m_nIterations; iter++)
{
- SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0);
+ SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0);
task.m_solveFriction = true;
task.run(0);
}
@@ -688,23 +634,21 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* body
shapeBuf->copyFromHost(shapeNative);
constraint->copyFromHost(constraintNative);
frame++;
-
}
void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
- const b3OpenCLArray<b3InertiaData>* shapeBuf,
- b3OpenCLArray<b3GpuConstraint4>* constraint,
- b3OpenCLArray<unsigned int>* m_numConstraints,
- b3OpenCLArray<unsigned int>* m_offsets,
- int batchId
- )
+ const b3OpenCLArray<b3InertiaData>* shapeBuf,
+ b3OpenCLArray<b3GpuConstraint4>* constraint,
+ b3OpenCLArray<unsigned int>* m_numConstraints,
+ b3OpenCLArray<unsigned int>* m_offsets,
+ int batchId)
{
-// b3BufferInfoCL( m_numConstraints->getBufferCL() ),
-// b3BufferInfoCL( m_offsets->getBufferCL() )
-
+ // b3BufferInfoCL( m_numConstraints->getBufferCL() ),
+ // b3BufferInfoCL( m_offsets->getBufferCL() )
+
int cellBatch = batchId;
const int nn = B3_SOLVER_N_CELLS;
-// int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES;
+ // int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES;
b3AlignedObjectArray<unsigned int> gN;
m_numConstraints->copyToHost(gN);
@@ -712,243 +656,220 @@ void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
m_offsets->copyToHost(gOffsets);
int nSplitX = B3_SOLVER_N_SPLIT_X;
int nSplitY = B3_SOLVER_N_SPLIT_Y;
-
-// int bIdx = batchId;
+
+ // int bIdx = batchId;
b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints;
constraint->copyToHost(cpuConstraints);
printf("batch = %d\n", batchId);
- int numWorkgroups = nn/B3_SOLVER_N_BATCHES;
+ int numWorkgroups = nn / B3_SOLVER_N_BATCHES;
b3AlignedObjectArray<int> usedBodies;
-
- for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++)
+ for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++)
{
printf("wgIdx = %d ", wgIdx);
- int zIdx = (wgIdx/((nSplitX*nSplitY))/2)*2+((cellBatch&4)>>2);
- int remain = wgIdx%((nSplitX*nSplitY));
- int yIdx = (remain%(nSplitX/2))*2 + ((cellBatch&2)>>1);
- int xIdx = (remain/(nSplitX/2))*2 + (cellBatch&1);
+ int zIdx = (wgIdx / ((nSplitX * nSplitY)) / 2) * 2 + ((cellBatch & 4) >> 2);
+ int remain = wgIdx % ((nSplitX * nSplitY));
+ int yIdx = (remain % (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1);
+ int xIdx = (remain / (nSplitX / 2)) * 2 + (cellBatch & 1);
-
- int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY);
- printf("cellIdx=%d\n",cellIdx);
- if( gN[cellIdx] == 0 )
+ int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY);
+ printf("cellIdx=%d\n", cellIdx);
+ if (gN[cellIdx] == 0)
continue;
const int start = gOffsets[cellIdx];
const int end = start + gN[cellIdx];
- for (int c=start;c<end;c++)
+ for (int c = start; c < end; c++)
{
b3GpuConstraint4& constraint = cpuConstraints[c];
//printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB);
- if (usedBodies.findLinearSearch(constraint.m_bodyA)< usedBodies.size())
+ if (usedBodies.findLinearSearch(constraint.m_bodyA) < usedBodies.size())
{
printf("error?\n");
}
- if (usedBodies.findLinearSearch(constraint.m_bodyB)< usedBodies.size())
+ if (usedBodies.findLinearSearch(constraint.m_bodyB) < usedBodies.size())
{
printf("error?\n");
}
}
- for (int c=start;c<end;c++)
+ for (int c = start; c < end; c++)
{
b3GpuConstraint4& constraint = cpuConstraints[c];
usedBodies.push_back(constraint.m_bodyA);
usedBodies.push_back(constraint.m_bodyB);
}
-
}
}
-static bool verify=false;
+static bool verify = false;
-void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf,
- b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches)
+void b3Solver::solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf,
+ b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches)
{
-
-
- b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 );
+ b3Int4 cdata = b3MakeInt4(n, 0, 0, 0);
{
-
const int nn = B3_SOLVER_N_CELLS;
cdata.x = 0;
- cdata.y = maxNumBatches;//250;
-
+ cdata.y = maxNumBatches; //250;
- int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES;
+ int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES;
#ifdef DEBUG_ME
- SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
- adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
+ SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
+ adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems);
#endif
-
-
{
-
B3_PROFILE("m_batchSolveKernel iterations");
- for(int iter=0; iter<m_nIterations; iter++)
+ for (int iter = 0; iter < m_nIterations; iter++)
{
- for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
+ for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++)
{
-
if (verify)
{
- checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib);
+ checkConstraintBatch(bodyBuf, shapeBuf, constraint, m_numConstraints, m_offsets, ib);
}
#ifdef DEBUG_ME
- memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
- gpuDebugInfo.write(debugInfo,numWorkItems);
+ memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems);
+ gpuDebugInfo.write(debugInfo, numWorkItems);
#endif
-
cdata.z = ib;
-
- b3LauncherCL launcher( m_queue, m_solveContactKernel ,"m_solveContactKernel");
+ b3LauncherCL launcher(m_queue, m_solveContactKernel, "m_solveContactKernel");
#if 1
-
- b3BufferInfoCL bInfo[] = {
-
- b3BufferInfoCL( bodyBuf->getBufferCL() ),
- b3BufferInfoCL( shapeBuf->getBufferCL() ),
- b3BufferInfoCL( constraint->getBufferCL() ),
- b3BufferInfoCL( m_numConstraints->getBufferCL() ),
- b3BufferInfoCL( m_offsets->getBufferCL() )
+
+ b3BufferInfoCL bInfo[] = {
+
+ b3BufferInfoCL(bodyBuf->getBufferCL()),
+ b3BufferInfoCL(shapeBuf->getBufferCL()),
+ b3BufferInfoCL(constraint->getBufferCL()),
+ b3BufferInfoCL(m_numConstraints->getBufferCL()),
+ b3BufferInfoCL(m_offsets->getBufferCL())
#ifdef DEBUG_ME
- , b3BufferInfoCL(&gpuDebugInfo)
+ ,
+ b3BufferInfoCL(&gpuDebugInfo)
#endif
- };
-
-
+ };
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
//launcher.setConst( cdata.x );
- launcher.setConst( cdata.y );
- launcher.setConst( cdata.z );
- b3Int4 nSplit;
+ launcher.setConst(cdata.y);
+ launcher.setConst(cdata.z);
+ b3Int4 nSplit;
nSplit.x = B3_SOLVER_N_SPLIT_X;
nSplit.y = B3_SOLVER_N_SPLIT_Y;
nSplit.z = B3_SOLVER_N_SPLIT_Z;
- launcher.setConst( nSplit );
- launcher.launch1D( numWorkItems, 64 );
+ launcher.setConst(nSplit);
+ launcher.launch1D(numWorkItems, 64);
-
#else
- const char* fileName = "m_batchSolveKernel.bin";
- FILE* f = fopen(fileName,"rb");
- if (f)
- {
- int sizeInBytes=0;
- if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET))
- {
- printf("error, cannot get file size\n");
- exit(0);
- }
-
- unsigned char* buf = (unsigned char*) malloc(sizeInBytes);
- fread(buf,sizeInBytes,1,f);
- int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context);
- int num = *(int*)&buf[serializedBytes];
-
- launcher.launch1D( num);
-
- //this clFinish is for testing on errors
- clFinish(m_queue);
- }
+ const char* fileName = "m_batchSolveKernel.bin";
+ FILE* f = fopen(fileName, "rb");
+ if (f)
+ {
+ int sizeInBytes = 0;
+ if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET))
+ {
+ printf("error, cannot get file size\n");
+ exit(0);
+ }
+
+ unsigned char* buf = (unsigned char*)malloc(sizeInBytes);
+ fread(buf, sizeInBytes, 1, f);
+ int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context);
+ int num = *(int*)&buf[serializedBytes];
+
+ launcher.launch1D(num);
+
+ //this clFinish is for testing on errors
+ clFinish(m_queue);
+ }
#endif
-
#ifdef DEBUG_ME
clFinish(m_queue);
- gpuDebugInfo.read(debugInfo,numWorkItems);
+ gpuDebugInfo.read(debugInfo, numWorkItems);
clFinish(m_queue);
- for (int i=0;i<numWorkItems;i++)
+ for (int i = 0; i < numWorkItems; i++)
{
- if (debugInfo[i].m_valInt2>0)
+ if (debugInfo[i].m_valInt2 > 0)
{
- printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2);
+ printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2);
}
- if (debugInfo[i].m_valInt3>0)
+ if (debugInfo[i].m_valInt3 > 0)
{
- printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3);
+ printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3);
}
}
-#endif //DEBUG_ME
-
-
+#endif //DEBUG_ME
}
}
-
- clFinish(m_queue);
-
+ clFinish(m_queue);
}
cdata.x = 1;
- bool applyFriction=true;
+ bool applyFriction = true;
if (applyFriction)
- {
+ {
B3_PROFILE("m_batchSolveKernel iterations2");
- for(int iter=0; iter<m_nIterations; iter++)
+ for (int iter = 0; iter < m_nIterations; iter++)
{
- for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++)
+ for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++)
{
cdata.z = ib;
-
-
- b3BufferInfoCL bInfo[] = {
- b3BufferInfoCL( bodyBuf->getBufferCL() ),
- b3BufferInfoCL( shapeBuf->getBufferCL() ),
- b3BufferInfoCL( constraint->getBufferCL() ),
- b3BufferInfoCL( m_numConstraints->getBufferCL() ),
- b3BufferInfoCL( m_offsets->getBufferCL() )
+
+ b3BufferInfoCL bInfo[] = {
+ b3BufferInfoCL(bodyBuf->getBufferCL()),
+ b3BufferInfoCL(shapeBuf->getBufferCL()),
+ b3BufferInfoCL(constraint->getBufferCL()),
+ b3BufferInfoCL(m_numConstraints->getBufferCL()),
+ b3BufferInfoCL(m_offsets->getBufferCL())
#ifdef DEBUG_ME
- ,b3BufferInfoCL(&gpuDebugInfo)
-#endif //DEBUG_ME
+ ,
+ b3BufferInfoCL(&gpuDebugInfo)
+#endif //DEBUG_ME
};
- b3LauncherCL launcher( m_queue, m_solveFrictionKernel,"m_solveFrictionKernel" );
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
+ b3LauncherCL launcher(m_queue, m_solveFrictionKernel, "m_solveFrictionKernel");
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
//launcher.setConst( cdata.x );
- launcher.setConst( cdata.y );
- launcher.setConst( cdata.z );
- b3Int4 nSplit;
+ launcher.setConst(cdata.y);
+ launcher.setConst(cdata.z);
+ b3Int4 nSplit;
nSplit.x = B3_SOLVER_N_SPLIT_X;
nSplit.y = B3_SOLVER_N_SPLIT_Y;
nSplit.z = B3_SOLVER_N_SPLIT_Z;
- launcher.setConst( nSplit );
-
- launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 );
+ launcher.setConst(nSplit);
+
+ launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64);
}
}
clFinish(m_queue);
-
}
#ifdef DEBUG_ME
delete[] debugInfo;
-#endif //DEBUG_ME
+#endif //DEBUG_ME
}
-
-
}
-void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
- const b3OpenCLArray<b3InertiaData>* shapeBuf,
- b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
- int nContacts, const ConstraintCfg& cfg )
+void b3Solver::convertToConstraints(const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
+ const b3OpenCLArray<b3InertiaData>* shapeBuf,
+ b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData,
+ int nContacts, const ConstraintCfg& cfg)
{
-// b3OpenCLArray<b3GpuConstraint4>* constraintNative =0;
+ // b3OpenCLArray<b3GpuConstraint4>* constraintNative =0;
contactCOut->resize(nContacts);
struct CB
{
@@ -959,30 +880,28 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyB
};
{
-
CB cdata;
cdata.m_nContacts = nContacts;
cdata.m_dt = cfg.m_dt;
cdata.m_positionDrift = cfg.m_positionDrift;
cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff;
-
if (gConvertConstraintOnCpu)
{
b3AlignedObjectArray<b3RigidBodyData> gBodies;
- bodyBuf->copyToHost(gBodies);
+ bodyBuf->copyToHost(gBodies);
- b3AlignedObjectArray<b3Contact4> gContact;
- contactsIn->copyToHost(gContact);
+ b3AlignedObjectArray<b3Contact4> gContact;
+ contactsIn->copyToHost(gContact);
+
+ b3AlignedObjectArray<b3InertiaData> gShapes;
+ shapeBuf->copyToHost(gShapes);
+
+ b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut;
+ gConstraintOut.resize(nContacts);
- b3AlignedObjectArray<b3InertiaData> gShapes;
- shapeBuf->copyToHost(gShapes);
-
- b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut;
- gConstraintOut.resize(nContacts);
-
B3_PROFILE("cpu contactToConstraintKernel");
- for (int gIdx=0;gIdx<nContacts;gIdx++)
+ for (int gIdx = 0; gIdx < nContacts; gIdx++)
{
int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit);
int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit);
@@ -1001,40 +920,36 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyB
b3ContactConstraint4_t cs;
- setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
- &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff,
- &cs );
-
+ setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB,
+ &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff,
+ &cs);
+
cs.m_batchIdx = gContact[gIdx].m_batchIdx;
gConstraintOut[gIdx] = (b3GpuConstraint4&)cs;
}
contactCOut->copyFromHost(gConstraintOut);
-
- } else
+ }
+ else
{
B3_PROFILE("gpu m_contactToConstraintKernel");
-
- b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()),
- b3BufferInfoCL( contactCOut->getBufferCL() )};
- b3LauncherCL launcher( m_queue, m_contactToConstraintKernel,"m_contactToConstraintKernel" );
- launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) );
+ b3BufferInfoCL bInfo[] = {b3BufferInfoCL(contactsIn->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(shapeBuf->getBufferCL()),
+ b3BufferInfoCL(contactCOut->getBufferCL())};
+ b3LauncherCL launcher(m_queue, m_contactToConstraintKernel, "m_contactToConstraintKernel");
+ launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL));
//launcher.setConst( cdata );
-
+
launcher.setConst(cdata.m_nContacts);
launcher.setConst(cdata.m_dt);
launcher.setConst(cdata.m_positionDrift);
launcher.setConst(cdata.m_positionConstraintCoeff);
-
- launcher.launch1D( nContacts, 64 );
- clFinish(m_queue);
+ launcher.launch1D(nContacts, 64);
+ clFinish(m_queue);
}
}
-
-
}
/*
@@ -1115,28 +1030,24 @@ void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyData>* bodyBuf,
}
*/
-void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx )
+void b3Solver::batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx)
{
-
- int numWorkItems = 64*B3_SOLVER_N_CELLS;
+ int numWorkItems = 64 * B3_SOLVER_N_CELLS;
{
B3_PROFILE("batch generation");
-
+
b3Int4 cdata;
cdata.x = nContacts;
cdata.y = 0;
cdata.z = staticIdx;
-
#ifdef BATCH_DEBUG
- SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
- adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems);
- memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems);
- gpuDebugInfo.write(debugInfo,numWorkItems);
+ SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems];
+ adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems);
+ memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems);
+ gpuDebugInfo.write(debugInfo, numWorkItems);
#endif
-
-
#if 0
b3BufferInfoCL bInfo[] = {
b3BufferInfoCL( contacts->getBufferCL() ),
@@ -1148,8 +1059,6 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact
#endif
};
#endif
-
-
{
m_batchSizes.resize(nNative->size());
@@ -1157,22 +1066,21 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact
//b3LauncherCL launcher( m_queue, m_batchingKernel);
cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel;
- b3LauncherCL launcher( m_queue, k,"*batchingKernel");
- if (!useNewBatchingKernel )
+ b3LauncherCL launcher(m_queue, k, "*batchingKernel");
+ if (!useNewBatchingKernel)
{
- launcher.setBuffer( contacts->getBufferCL() );
+ launcher.setBuffer(contacts->getBufferCL());
}
- launcher.setBuffer( m_contactBuffer2->getBufferCL() );
- launcher.setBuffer( nNative->getBufferCL());
- launcher.setBuffer( offsetsNative->getBufferCL());
-
+ launcher.setBuffer(m_contactBuffer2->getBufferCL());
+ launcher.setBuffer(nNative->getBufferCL());
+ launcher.setBuffer(offsetsNative->getBufferCL());
+
launcher.setBuffer(m_batchSizes.getBufferCL());
-
//launcher.setConst( cdata );
- launcher.setConst(staticIdx);
-
- launcher.launch1D( numWorkItems, 64 );
+ launcher.setConst(staticIdx);
+
+ launcher.launch1D(numWorkItems, 64);
//clFinish(m_queue);
//b3AlignedObjectArray<int> batchSizesCPU;
//m_batchSizes.copyToHost(batchSizesCPU);
@@ -1180,46 +1088,41 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact
}
#ifdef BATCH_DEBUG
- aaaa
- b3Contact4* hostContacts = new b3Contact4[nContacts];
- m_contactBuffer->read(hostContacts,nContacts);
+ aaaa
+ b3Contact4* hostContacts = new b3Contact4[nContacts];
+ m_contactBuffer->read(hostContacts, nContacts);
clFinish(m_queue);
- gpuDebugInfo.read(debugInfo,numWorkItems);
+ gpuDebugInfo.read(debugInfo, numWorkItems);
clFinish(m_queue);
- for (int i=0;i<numWorkItems;i++)
+ for (int i = 0; i < numWorkItems; i++)
{
- if (debugInfo[i].m_valInt1>0)
+ if (debugInfo[i].m_valInt1 > 0)
{
printf("catch\n");
}
- if (debugInfo[i].m_valInt2>0)
+ if (debugInfo[i].m_valInt2 > 0)
{
printf("catch22\n");
}
- if (debugInfo[i].m_valInt3>0)
+ if (debugInfo[i].m_valInt3 > 0)
{
printf("catch666\n");
}
- if (debugInfo[i].m_valInt4>0)
+ if (debugInfo[i].m_valInt4 > 0)
{
printf("catch777\n");
}
}
delete[] debugInfo;
-#endif //BATCH_DEBUG
-
+#endif //BATCH_DEBUG
}
-// copy buffer to buffer
+ // copy buffer to buffer
//b3Assert(m_contactBuffer->size()==nContacts);
//contacts->copyFromOpenCLArray( *m_contactBuffer);
//clFinish(m_queue);//needed?
-
-
-
}
-