diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp | 835 |
1 files changed, 369 insertions, 466 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp index 20bf6d47c5..ccf67da1a8 100644 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp @@ -13,7 +13,6 @@ subject to the following restrictions: */ //Originally written by Takahiro Harada - #include "b3Solver.h" ///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments @@ -38,7 +37,6 @@ bool gConvertConstraintOnCpu = false; #include "kernels/batchingKernels.h" #include "kernels/batchingKernelsNew.h" - #include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" #include "Bullet3Common/b3Vector3.h" @@ -48,7 +46,7 @@ struct SolverDebugInfo int m_valInt1; int m_valInt2; int m_valInt3; - + int m_valInt4; int m_valInt5; int m_valInt6; @@ -59,11 +57,10 @@ struct SolverDebugInfo int m_valInt10; int m_valInt11; - int m_valInt12; - int m_valInt13; - int m_valInt14; - int m_valInt15; - + int m_valInt12; + int m_valInt13; + int m_valInt14; + int m_valInt15; float m_val0; float m_val1; @@ -71,9 +68,6 @@ struct SolverDebugInfo float m_val3; }; - - - class SolverDeviceInl { public: @@ -84,101 +78,89 @@ public: }; }; - - b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : - m_context(ctx), - m_device(device), - m_queue(queue), - m_batchSizes(ctx,queue), - m_nIterations(4) + : m_context(ctx), + m_device(device), + m_queue(queue), + m_batchSizes(ctx, queue), + m_nIterations(4) { - m_sort32 = new b3RadixSort32CL(ctx,device,queue); - m_scan = new b3PrefixScanCL(ctx,device,queue,B3_SOLVER_N_CELLS); - m_search = new b3BoundSearchCL(ctx,device,queue,B3_SOLVER_N_CELLS); + m_sort32 = new b3RadixSort32CL(ctx, device, queue); + m_scan = new b3PrefixScanCL(ctx, device, queue, B3_SOLVER_N_CELLS); + m_search = new b3BoundSearchCL(ctx, device, queue, B3_SOLVER_N_CELLS); - const int sortSize = B3NEXTMULTIPLEOF( pairCapacity, 512 ); + const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512); - m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx,queue,sortSize); - m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx,queue); + m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx, queue, sortSize); + m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx, queue); - m_numConstraints = new b3OpenCLArray<unsigned int>(ctx,queue,B3_SOLVER_N_CELLS ); + m_numConstraints = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS); m_numConstraints->resize(B3_SOLVER_N_CELLS); - m_offsets = new b3OpenCLArray<unsigned int>( ctx,queue,B3_SOLVER_N_CELLS); + m_offsets = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS); m_offsets->resize(B3_SOLVER_N_CELLS); const char* additionalMacros = ""; -// const char* srcFileNameForCaching=""; - - + // const char* srcFileNameForCaching=""; cl_int pErrNum; const char* batchKernelSource = batchingKernelsCL; const char* batchKernelNewSource = batchingKernelsNewCL; - + const char* solverSetupSource = solverSetupCL; const char* solverSetup2Source = solverSetup2CL; const char* solveContactSource = solveContactCL; const char* solveFrictionSource = solveFrictionCL; - - - + { - - cl_program solveContactProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveContactSource, &pErrNum,additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); + cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); b3Assert(solveContactProg); - - cl_program solveFrictionProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solveFrictionSource, &pErrNum,additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); + + cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); b3Assert(solveFrictionProg); - cl_program solverSetup2Prog= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetup2Source, &pErrNum,additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); + cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); b3Assert(solverSetup2Prog); - - cl_program solverSetupProg= b3OpenCLUtils::compileCLProgramFromString( ctx, device, solverSetupSource, &pErrNum,additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); + cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); b3Assert(solverSetupProg); - - - m_solveFrictionKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg,additionalMacros ); + + m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros); b3Assert(m_solveFrictionKernel); - m_solveContactKernel= b3OpenCLUtils::compileCLKernelFromString( ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg,additionalMacros ); + m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros); b3Assert(m_solveContactKernel); - - m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg,additionalMacros ); + + m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros); b3Assert(m_contactToConstraintKernel); - - m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog,additionalMacros ); + + m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros); b3Assert(m_setSortDataKernel); - - m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog,additionalMacros ); + + m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros); b3Assert(m_reorderContactKernel); - - m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog,additionalMacros ); + m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros); b3Assert(m_copyConstraintKernel); - } { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelSource, &pErrNum,additionalMacros, B3_BATCHING_PATH); + cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH); //cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true); b3Assert(batchingProg); - - m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg,additionalMacros ); + + m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros); b3Assert(m_batchingKernel); } { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, batchKernelNewSource, &pErrNum,additionalMacros, B3_BATCHING_NEW_PATH); + cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH); b3Assert(batchingNewProg); - m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg,additionalMacros ); + m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros); //m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros ); b3Assert(m_batchingKernelNew); } } - + b3Solver::~b3Solver() { delete m_offsets; @@ -190,71 +172,68 @@ b3Solver::~b3Solver() delete m_scan; delete m_search; - clReleaseKernel(m_batchingKernel); clReleaseKernel(m_batchingKernelNew); - - clReleaseKernel( m_solveContactKernel); - clReleaseKernel( m_solveFrictionKernel); - - clReleaseKernel( m_contactToConstraintKernel); - clReleaseKernel( m_setSortDataKernel); - clReleaseKernel( m_reorderContactKernel); - clReleaseKernel( m_copyConstraintKernel); - -} + clReleaseKernel(m_solveContactKernel); + clReleaseKernel(m_solveFrictionKernel); - + clReleaseKernel(m_contactToConstraintKernel); + clReleaseKernel(m_setSortDataKernel); + clReleaseKernel(m_reorderContactKernel); + clReleaseKernel(m_copyConstraintKernel); +} -template<bool JACOBI> -static -__inline -void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) +template <bool JACOBI> +static __inline void solveContact(b3GpuConstraint4& cs, + const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, + const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, + float maxRambdaDt[4], float minRambdaDt[4]) { - - b3Vector3 dLinVelA; dLinVelA.setZero(); - b3Vector3 dAngVelA; dAngVelA.setZero(); - b3Vector3 dLinVelB; dLinVelB.setZero(); - b3Vector3 dAngVelB; dAngVelB.setZero(); - - for(int ic=0; ic<4; ic++) + b3Vector3 dLinVelA; + dLinVelA.setZero(); + b3Vector3 dAngVelA; + dAngVelA.setZero(); + b3Vector3 dLinVelB; + dLinVelB.setZero(); + b3Vector3 dAngVelB; + dAngVelB.setZero(); + + for (int ic = 0; ic < 4; ic++) { // dont necessary because this makes change to 0 - if( cs.m_jacCoeffInv[ic] == 0.f ) continue; + if (cs.m_jacCoeffInv[ic] == 0.f) continue; { b3Vector3 angular0, angular1, linear; b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular( (const b3Vector3 &)cs.m_linear, (const b3Vector3 &)r0, (const b3Vector3 &)r1, &linear, &angular0, &angular1 ); + setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, &linear, &angular0, &angular1); - float rambdaDt = calcRelVel((const b3Vector3 &)cs.m_linear,(const b3Vector3 &) -cs.m_linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ) + cs.m_b[ic]; + float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB) + + cs.m_b[ic]; rambdaDt *= cs.m_jacCoeffInv[ic]; { float prevSum = cs.m_appliedRambdaDt[ic]; float updated = prevSum; updated += rambdaDt; - updated = b3Max( updated, minRambdaDt[ic] ); - updated = b3Min( updated, maxRambdaDt[ic] ); + updated = b3Max(updated, minRambdaDt[ic]); + updated = b3Min(updated, maxRambdaDt[ic]); rambdaDt = updated - prevSum; cs.m_appliedRambdaDt[ic] = updated; } - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; + b3Vector3 linImp0 = invMassA * linear * rambdaDt; + b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; + b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; + b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; #ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); + b3Assert(_finite(linImp0.getX())); b3Assert(_finite(linImp1.getX())); #endif - if( JACOBI ) + if (JACOBI) { dLinVelA += linImp0; dAngVelA += angImp0; @@ -271,92 +250,83 @@ void solveContact(b3GpuConstraint4& cs, } } - if( JACOBI ) + if (JACOBI) { linVelA += dLinVelA; angVelA += dAngVelA; linVelB += dLinVelB; angVelB += dAngVelB; } - } +static __inline void solveFriction(b3GpuConstraint4& cs, + const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, + const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, + float maxRambdaDt[4], float minRambdaDt[4]) +{ + if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; + const b3Vector3& center = (const b3Vector3&)cs.m_center; + b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - - static - __inline - void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) - { - - if( cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0 ) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1 (n, tangent[0],tangent[1]); + b3Vector3 tangent[2]; +#if 1 + b3PlaneSpace1(n, tangent[0], tangent[1]); #else - b3Vector3 r = cs.m_worldPos[0]-center; - tangent[0] = cross3( n, r ); - tangent[1] = cross3( tangent[0], n ); - tangent[0] = normalize3( tangent[0] ); - tangent[1] = normalize3( tangent[1] ); + b3Vector3 r = cs.m_worldPos[0] - center; + tangent[0] = cross3(n, r); + tangent[1] = cross3(tangent[0], n); + tangent[0] = normalize3(tangent[0]); + tangent[1] = normalize3(tangent[1]); #endif - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for(int i=0; i<2; i++) - { - setLinearAndAngular( tangent[i], r0, r1, &linear, &angular0, &angular1 ); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB ); - rambdaDt *= cs.m_fJacCoeffInv[i]; + b3Vector3 angular0, angular1, linear; + b3Vector3 r0 = center - posA; + b3Vector3 r1 = center - posB; + for (int i = 0; i < 2; i++) + { + setLinearAndAngular(tangent[i], r0, r1, &linear, &angular0, &angular1); + float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, + linVelA, angVelA, linVelB, angVelB); + rambdaDt *= cs.m_fJacCoeffInv[i]; - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max( updated, minRambdaDt[i] ); - updated = b3Min( updated, maxRambdaDt[i] ); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } + { + float prevSum = cs.m_fAppliedRambdaDt[i]; + float updated = prevSum; + updated += rambdaDt; + updated = b3Max(updated, minRambdaDt[i]); + updated = b3Min(updated, maxRambdaDt[i]); + rambdaDt = updated - prevSum; + cs.m_fAppliedRambdaDt[i] = updated; + } - b3Vector3 linImp0 = invMassA*linear*rambdaDt; - b3Vector3 linImp1 = invMassB*(-linear)*rambdaDt; - b3Vector3 angImp0 = (invInertiaA* angular0)*rambdaDt; - b3Vector3 angImp1 = (invInertiaB* angular1)*rambdaDt; + b3Vector3 linImp0 = invMassA * linear * rambdaDt; + b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; + b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; + b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; #ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); + b3Assert(_finite(linImp0.getX())); + b3Assert(_finite(linImp1.getX())); #endif - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } + linVelA += linImp0; + angVelA += angImp0; + linVelB += linImp1; + angVelB += angImp1; + } - { // angular damping for point constraint - b3Vector3 ab = ( posB - posA ).normalized(); - b3Vector3 ac = ( center - posA ).normalized(); - if( b3Dot( ab, ac ) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot( n, angVelA ); - float angNB = b3Dot( n, angVelB ); + { // angular damping for point constraint + b3Vector3 ab = (posB - posA).normalized(); + b3Vector3 ac = (center - posA).normalized(); + if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) + { + float angNA = b3Dot(n, angVelA); + float angNB = b3Dot(n, angVelB); - angVelA -= (angNA*0.1f)*n; - angVelB -= (angNB*0.1f)*n; - } + angVelA -= (angNA * 0.1f) * n; + angVelB -= (angNB * 0.1f) * n; } - } +} /* b3AlignedObjectArray<b3RigidBodyData>& m_bodies; b3AlignedObjectArray<b3InertiaData>& m_shapes; @@ -370,79 +340,69 @@ void solveContact(b3GpuConstraint4& cs, int m_maxNumBatches; */ -struct SolveTask// : public ThreadPool::Task +struct SolveTask // : public ThreadPool::Task { - SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints, - int start, int nConstraints,int maxNumBatches,b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex) - : m_bodies( bodies ), m_shapes( shapes ), - m_constraints( constraints ), - m_batchSizes(batchSizes), - m_cellIndex(cellIndex), - m_curWgidx(curWgidx), - m_start( start ), - m_nConstraints( nConstraints ), - m_solveFriction( true ), - m_maxNumBatches(maxNumBatches) - {} - - unsigned short int getType(){ return 0; } + SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints, + int start, int nConstraints, int maxNumBatches, b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex) + : m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_batchSizes(batchSizes), m_cellIndex(cellIndex), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches) + { + } + + unsigned short int getType() { return 0; } void run(int tIdx) { int offset = 0; - for (int ii=0;ii<B3_MAX_NUM_BATCHES;ii++) + for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) { - int numInBatch = m_batchSizes->at(m_cellIndex*B3_MAX_NUM_BATCHES+ii); + int numInBatch = m_batchSizes->at(m_cellIndex * B3_MAX_NUM_BATCHES + ii); if (!numInBatch) break; - for (int jj=0;jj<numInBatch;jj++) + for (int jj = 0; jj < numInBatch; jj++) { - int i = m_start + offset+jj; + int i = m_start + offset + jj; int batchId = m_constraints[i].m_batchIdx; - b3Assert(batchId==ii); + b3Assert(batchId == ii); float frictionCoeff = m_constraints[i].getFrictionCoeff(); int aIdx = (int)m_constraints[i].m_bodyA; int bIdx = (int)m_constraints[i].m_bodyB; -// int localBatch = m_constraints[i].m_batchIdx; + // int localBatch = m_constraints[i].m_batchIdx; b3RigidBodyData& bodyA = m_bodies[aIdx]; b3RigidBodyData& bodyB = m_bodies[bIdx]; - if( !m_solveFriction ) + if (!m_solveFriction) { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; + float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; + float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); + solveContact<false>(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, + (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, + maxRambdaDt, minRambdaDt); } else { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; + float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; + float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; float sum = 0; - for(int j=0; j<4; j++) + for (int j = 0; j < 4; j++) { - sum +=m_constraints[i].m_appliedRambdaDt[j]; + sum += m_constraints[i].m_appliedRambdaDt[j]; } frictionCoeff = 0.7f; - for(int j=0; j<4; j++) + for (int j = 0; j < 4; j++) { - maxRambdaDt[j] = frictionCoeff*sum; + maxRambdaDt[j] = frictionCoeff * sum; minRambdaDt[j] = -maxRambdaDt[j]; } - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - + solveFriction(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, + (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, + maxRambdaDt, minRambdaDt); } } - offset+=numInBatch; - - + offset += numInBatch; } -/* for (int bb=0;bb<m_maxNumBatches;bb++) + /* for (int bb=0;bb<m_maxNumBatches;bb++) { //for(int ic=m_nConstraints-1; ic>=0; ic--) for(int ic=0; ic<m_nConstraints; ic++) @@ -491,9 +451,6 @@ struct SolveTask// : public ThreadPool::Task } } */ - - - } b3AlignedObjectArray<b3RigidBodyData>& m_bodies; @@ -508,11 +465,9 @@ struct SolveTask// : public ThreadPool::Task int m_maxNumBatches; }; - -void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches,b3AlignedObjectArray<int>* batchSizes) +void b3Solver::solveContactConstraintHost(b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, + b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray<int>* batchSizes) { - #if 0 { int nSplitX = B3_SOLVER_N_SPLIT_X; @@ -571,114 +526,105 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* body //printf("------------------------\n"); b3AlignedObjectArray<unsigned int> offsetsHost; m_offsets->copyToHost(offsetsHost); - static int frame=0; - bool useBatches=true; + static int frame = 0; + bool useBatches = true; if (useBatches) { - for(int iter=0; iter<m_nIterations; iter++) + for (int iter = 0; iter < m_nIterations; iter++) { - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) + for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) { - int nSplitX = B3_SOLVER_N_SPLIT_X; int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; + int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; //printf("cell Batch %d\n",cellBatch); b3AlignedObjectArray<int> usedBodies[B3_SOLVER_N_CELLS]; - for (int i=0;i<B3_SOLVER_N_CELLS;i++) + for (int i = 0; i < B3_SOLVER_N_CELLS; i++) { usedBodies[i].resize(0); } - - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) + for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - - - if( numConstraintsHost[cellIdx] == 0 ) + int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); + int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); + int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); + int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); + int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); + + if (numConstraintsHost[cellIdx] == 0) continue; //printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); //printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]); if (zIdx) { - //printf("?\n"); + //printf("?\n"); } - if (iter==0) + if (iter == 0) { //printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx); //printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]); } const int start = offsetsHost[cellIdx]; int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; + // const int end = start + numConstraintsInCell; - SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell ,maxNumBatches,usedBodies,wgIdx,batchSizes,cellIdx); + SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, usedBodies, wgIdx, batchSizes, cellIdx); task.m_solveFriction = false; task.run(0); - } } } - for(int iter=0; iter<m_nIterations; iter++) + for (int iter = 0; iter < m_nIterations; iter++) { - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) + for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) { int nSplitX = B3_SOLVER_N_SPLIT_X; int nSplitY = B3_SOLVER_N_SPLIT_Y; - - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; + int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) + for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - - if( numConstraintsHost[cellIdx] == 0 ) + int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); + int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); + int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); + int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); + + int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); + + if (numConstraintsHost[cellIdx] == 0) continue; - + //printf("yIdx=%d\n",yIdx); - + const int start = offsetsHost[cellIdx]; int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; + // const int end = start + numConstraintsInCell; - SolveTask task( bodyNative, shapeNative, constraintNative, start, numConstraintsInCell,maxNumBatches, 0,0,batchSizes,cellIdx); + SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, 0, 0, batchSizes, cellIdx); task.m_solveFriction = true; task.run(0); - } } } - - - } else + } + else { - for(int iter=0; iter<m_nIterations; iter++) + for (int iter = 0; iter < m_nIterations; iter++) { - SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0); + SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); task.m_solveFriction = false; task.run(0); } - for(int iter=0; iter<m_nIterations; iter++) + for (int iter = 0; iter < m_nIterations; iter++) { - SolveTask task( bodyNative, shapeNative, constraintNative, 0, n ,maxNumBatches,0,0,0,0); + SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); task.m_solveFriction = true; task.run(0); } @@ -688,23 +634,21 @@ void b3Solver::solveContactConstraintHost( b3OpenCLArray<b3RigidBodyData>* body shapeBuf->copyFromHost(shapeNative); constraint->copyFromHost(constraintNative); frame++; - } void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, - b3OpenCLArray<unsigned int>* m_numConstraints, - b3OpenCLArray<unsigned int>* m_offsets, - int batchId - ) + const b3OpenCLArray<b3InertiaData>* shapeBuf, + b3OpenCLArray<b3GpuConstraint4>* constraint, + b3OpenCLArray<unsigned int>* m_numConstraints, + b3OpenCLArray<unsigned int>* m_offsets, + int batchId) { -// b3BufferInfoCL( m_numConstraints->getBufferCL() ), -// b3BufferInfoCL( m_offsets->getBufferCL() ) - + // b3BufferInfoCL( m_numConstraints->getBufferCL() ), + // b3BufferInfoCL( m_offsets->getBufferCL() ) + int cellBatch = batchId; const int nn = B3_SOLVER_N_CELLS; -// int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; + // int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; b3AlignedObjectArray<unsigned int> gN; m_numConstraints->copyToHost(gN); @@ -712,243 +656,220 @@ void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, m_offsets->copyToHost(gOffsets); int nSplitX = B3_SOLVER_N_SPLIT_X; int nSplitY = B3_SOLVER_N_SPLIT_Y; - -// int bIdx = batchId; + + // int bIdx = batchId; b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints; constraint->copyToHost(cpuConstraints); printf("batch = %d\n", batchId); - int numWorkgroups = nn/B3_SOLVER_N_BATCHES; + int numWorkgroups = nn / B3_SOLVER_N_BATCHES; b3AlignedObjectArray<int> usedBodies; - - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) + for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) { printf("wgIdx = %d ", wgIdx); - int zIdx = (wgIdx/((nSplitX*nSplitY))/2)*2+((cellBatch&4)>>2); - int remain = wgIdx%((nSplitX*nSplitY)); - int yIdx = (remain%(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain/(nSplitX/2))*2 + (cellBatch&1); + int zIdx = (wgIdx / ((nSplitX * nSplitY)) / 2) * 2 + ((cellBatch & 4) >> 2); + int remain = wgIdx % ((nSplitX * nSplitY)); + int yIdx = (remain % (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); + int xIdx = (remain / (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - printf("cellIdx=%d\n",cellIdx); - if( gN[cellIdx] == 0 ) + int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); + printf("cellIdx=%d\n", cellIdx); + if (gN[cellIdx] == 0) continue; const int start = gOffsets[cellIdx]; const int end = start + gN[cellIdx]; - for (int c=start;c<end;c++) + for (int c = start; c < end; c++) { b3GpuConstraint4& constraint = cpuConstraints[c]; //printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB); - if (usedBodies.findLinearSearch(constraint.m_bodyA)< usedBodies.size()) + if (usedBodies.findLinearSearch(constraint.m_bodyA) < usedBodies.size()) { printf("error?\n"); } - if (usedBodies.findLinearSearch(constraint.m_bodyB)< usedBodies.size()) + if (usedBodies.findLinearSearch(constraint.m_bodyB) < usedBodies.size()) { printf("error?\n"); } } - for (int c=start;c<end;c++) + for (int c = start; c < end; c++) { b3GpuConstraint4& constraint = cpuConstraints[c]; usedBodies.push_back(constraint.m_bodyA); usedBodies.push_back(constraint.m_bodyB); } - } } -static bool verify=false; +static bool verify = false; -void b3Solver::solveContactConstraint( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n ,int maxNumBatches) +void b3Solver::solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, + b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches) { - - - b3Int4 cdata = b3MakeInt4( n, 0, 0, 0 ); + b3Int4 cdata = b3MakeInt4(n, 0, 0, 0); { - const int nn = B3_SOLVER_N_CELLS; cdata.x = 0; - cdata.y = maxNumBatches;//250; - + cdata.y = maxNumBatches; //250; - int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; + int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES; #ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); + SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; + adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); #endif - - { - B3_PROFILE("m_batchSolveKernel iterations"); - for(int iter=0; iter<m_nIterations; iter++) + for (int iter = 0; iter < m_nIterations; iter++) { - for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++) + for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) { - if (verify) { - checkConstraintBatch(bodyBuf,shapeBuf,constraint,m_numConstraints,m_offsets,ib); + checkConstraintBatch(bodyBuf, shapeBuf, constraint, m_numConstraints, m_offsets, ib); } #ifdef DEBUG_ME - memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); - gpuDebugInfo.write(debugInfo,numWorkItems); + memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); + gpuDebugInfo.write(debugInfo, numWorkItems); #endif - cdata.z = ib; - - b3LauncherCL launcher( m_queue, m_solveContactKernel ,"m_solveContactKernel"); + b3LauncherCL launcher(m_queue, m_solveContactKernel, "m_solveContactKernel"); #if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL( bodyBuf->getBufferCL() ), - b3BufferInfoCL( shapeBuf->getBufferCL() ), - b3BufferInfoCL( constraint->getBufferCL() ), - b3BufferInfoCL( m_numConstraints->getBufferCL() ), - b3BufferInfoCL( m_offsets->getBufferCL() ) + + b3BufferInfoCL bInfo[] = { + + b3BufferInfoCL(bodyBuf->getBufferCL()), + b3BufferInfoCL(shapeBuf->getBufferCL()), + b3BufferInfoCL(constraint->getBufferCL()), + b3BufferInfoCL(m_numConstraints->getBufferCL()), + b3BufferInfoCL(m_offsets->getBufferCL()) #ifdef DEBUG_ME - , b3BufferInfoCL(&gpuDebugInfo) + , + b3BufferInfoCL(&gpuDebugInfo) #endif - }; - - + }; - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); //launcher.setConst( cdata.x ); - launcher.setConst( cdata.y ); - launcher.setConst( cdata.z ); - b3Int4 nSplit; + launcher.setConst(cdata.y); + launcher.setConst(cdata.z); + b3Int4 nSplit; nSplit.x = B3_SOLVER_N_SPLIT_X; nSplit.y = B3_SOLVER_N_SPLIT_Y; nSplit.z = B3_SOLVER_N_SPLIT_Z; - launcher.setConst( nSplit ); - launcher.launch1D( numWorkItems, 64 ); + launcher.setConst(nSplit); + launcher.launch1D(numWorkItems, 64); - #else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName,"rb"); - if (f) - { - int sizeInBytes=0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*) malloc(sizeInBytes); - fread(buf,sizeInBytes,1,f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes,m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D( num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } + const char* fileName = "m_batchSolveKernel.bin"; + FILE* f = fopen(fileName, "rb"); + if (f) + { + int sizeInBytes = 0; + if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) + { + printf("error, cannot get file size\n"); + exit(0); + } + + unsigned char* buf = (unsigned char*)malloc(sizeInBytes); + fread(buf, sizeInBytes, 1, f); + int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); + int num = *(int*)&buf[serializedBytes]; + + launcher.launch1D(num); + + //this clFinish is for testing on errors + clFinish(m_queue); + } #endif - #ifdef DEBUG_ME clFinish(m_queue); - gpuDebugInfo.read(debugInfo,numWorkItems); + gpuDebugInfo.read(debugInfo, numWorkItems); clFinish(m_queue); - for (int i=0;i<numWorkItems;i++) + for (int i = 0; i < numWorkItems; i++) { - if (debugInfo[i].m_valInt2>0) + if (debugInfo[i].m_valInt2 > 0) { - printf("debugInfo[i].m_valInt2 = %d\n",i,debugInfo[i].m_valInt2); + printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2); } - if (debugInfo[i].m_valInt3>0) + if (debugInfo[i].m_valInt3 > 0) { - printf("debugInfo[i].m_valInt3 = %d\n",i,debugInfo[i].m_valInt3); + printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3); } } -#endif //DEBUG_ME - - +#endif //DEBUG_ME } } - - clFinish(m_queue); - + clFinish(m_queue); } cdata.x = 1; - bool applyFriction=true; + bool applyFriction = true; if (applyFriction) - { + { B3_PROFILE("m_batchSolveKernel iterations2"); - for(int iter=0; iter<m_nIterations; iter++) + for (int iter = 0; iter < m_nIterations; iter++) { - for(int ib=0; ib<B3_SOLVER_N_BATCHES; ib++) + for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) { cdata.z = ib; - - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( bodyBuf->getBufferCL() ), - b3BufferInfoCL( shapeBuf->getBufferCL() ), - b3BufferInfoCL( constraint->getBufferCL() ), - b3BufferInfoCL( m_numConstraints->getBufferCL() ), - b3BufferInfoCL( m_offsets->getBufferCL() ) + + b3BufferInfoCL bInfo[] = { + b3BufferInfoCL(bodyBuf->getBufferCL()), + b3BufferInfoCL(shapeBuf->getBufferCL()), + b3BufferInfoCL(constraint->getBufferCL()), + b3BufferInfoCL(m_numConstraints->getBufferCL()), + b3BufferInfoCL(m_offsets->getBufferCL()) #ifdef DEBUG_ME - ,b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME + , + b3BufferInfoCL(&gpuDebugInfo) +#endif //DEBUG_ME }; - b3LauncherCL launcher( m_queue, m_solveFrictionKernel,"m_solveFrictionKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + b3LauncherCL launcher(m_queue, m_solveFrictionKernel, "m_solveFrictionKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); //launcher.setConst( cdata.x ); - launcher.setConst( cdata.y ); - launcher.setConst( cdata.z ); - b3Int4 nSplit; + launcher.setConst(cdata.y); + launcher.setConst(cdata.z); + b3Int4 nSplit; nSplit.x = B3_SOLVER_N_SPLIT_X; nSplit.y = B3_SOLVER_N_SPLIT_Y; nSplit.z = B3_SOLVER_N_SPLIT_Z; - launcher.setConst( nSplit ); - - launcher.launch1D( 64*nn/B3_SOLVER_N_BATCHES, 64 ); + launcher.setConst(nSplit); + + launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64); } } clFinish(m_queue); - } #ifdef DEBUG_ME delete[] debugInfo; -#endif //DEBUG_ME +#endif //DEBUG_ME } - - } -void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg ) +void b3Solver::convertToConstraints(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, + const b3OpenCLArray<b3InertiaData>* shapeBuf, + b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, + int nContacts, const ConstraintCfg& cfg) { -// b3OpenCLArray<b3GpuConstraint4>* constraintNative =0; + // b3OpenCLArray<b3GpuConstraint4>* constraintNative =0; contactCOut->resize(nContacts); struct CB { @@ -959,30 +880,28 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyB }; { - CB cdata; cdata.m_nContacts = nContacts; cdata.m_dt = cfg.m_dt; cdata.m_positionDrift = cfg.m_positionDrift; cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - if (gConvertConstraintOnCpu) { b3AlignedObjectArray<b3RigidBodyData> gBodies; - bodyBuf->copyToHost(gBodies); + bodyBuf->copyToHost(gBodies); - b3AlignedObjectArray<b3Contact4> gContact; - contactsIn->copyToHost(gContact); + b3AlignedObjectArray<b3Contact4> gContact; + contactsIn->copyToHost(gContact); + + b3AlignedObjectArray<b3InertiaData> gShapes; + shapeBuf->copyToHost(gShapes); + + b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut; + gConstraintOut.resize(nContacts); - b3AlignedObjectArray<b3InertiaData> gShapes; - shapeBuf->copyToHost(gShapes); - - b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut; - gConstraintOut.resize(nContacts); - B3_PROFILE("cpu contactToConstraintKernel"); - for (int gIdx=0;gIdx<nContacts;gIdx++) + for (int gIdx = 0; gIdx < nContacts; gIdx++) { int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); @@ -1001,40 +920,36 @@ void b3Solver::convertToConstraints( const b3OpenCLArray<b3RigidBodyData>* bodyB b3ContactConstraint4_t cs; - setConstraint4( posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff, - &cs ); - + setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, + &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff, + &cs); + cs.m_batchIdx = gContact[gIdx].m_batchIdx; gConstraintOut[gIdx] = (b3GpuConstraint4&)cs; } contactCOut->copyFromHost(gConstraintOut); - - } else + } + else { B3_PROFILE("gpu m_contactToConstraintKernel"); - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( shapeBuf->getBufferCL()), - b3BufferInfoCL( contactCOut->getBufferCL() )}; - b3LauncherCL launcher( m_queue, m_contactToConstraintKernel,"m_contactToConstraintKernel" ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); + b3BufferInfoCL bInfo[] = {b3BufferInfoCL(contactsIn->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(shapeBuf->getBufferCL()), + b3BufferInfoCL(contactCOut->getBufferCL())}; + b3LauncherCL launcher(m_queue, m_contactToConstraintKernel, "m_contactToConstraintKernel"); + launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); //launcher.setConst( cdata ); - + launcher.setConst(cdata.m_nContacts); launcher.setConst(cdata.m_dt); launcher.setConst(cdata.m_positionDrift); launcher.setConst(cdata.m_positionConstraintCoeff); - - launcher.launch1D( nContacts, 64 ); - clFinish(m_queue); + launcher.launch1D(nContacts, 64); + clFinish(m_queue); } } - - } /* @@ -1115,28 +1030,24 @@ void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, } */ -void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx ) +void b3Solver::batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx) { - - int numWorkItems = 64*B3_SOLVER_N_CELLS; + int numWorkItems = 64 * B3_SOLVER_N_CELLS; { B3_PROFILE("batch generation"); - + b3Int4 cdata; cdata.x = nContacts; cdata.y = 0; cdata.z = staticIdx; - #ifdef BATCH_DEBUG - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device,numWorkItems); - memset(debugInfo,0,sizeof(SolverDebugInfo)*numWorkItems); - gpuDebugInfo.write(debugInfo,numWorkItems); + SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; + adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); + memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); + gpuDebugInfo.write(debugInfo, numWorkItems); #endif - - #if 0 b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contacts->getBufferCL() ), @@ -1148,8 +1059,6 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact #endif }; #endif - - { m_batchSizes.resize(nNative->size()); @@ -1157,22 +1066,21 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact //b3LauncherCL launcher( m_queue, m_batchingKernel); cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; - b3LauncherCL launcher( m_queue, k,"*batchingKernel"); - if (!useNewBatchingKernel ) + b3LauncherCL launcher(m_queue, k, "*batchingKernel"); + if (!useNewBatchingKernel) { - launcher.setBuffer( contacts->getBufferCL() ); + launcher.setBuffer(contacts->getBufferCL()); } - launcher.setBuffer( m_contactBuffer2->getBufferCL() ); - launcher.setBuffer( nNative->getBufferCL()); - launcher.setBuffer( offsetsNative->getBufferCL()); - + launcher.setBuffer(m_contactBuffer2->getBufferCL()); + launcher.setBuffer(nNative->getBufferCL()); + launcher.setBuffer(offsetsNative->getBufferCL()); + launcher.setBuffer(m_batchSizes.getBufferCL()); - //launcher.setConst( cdata ); - launcher.setConst(staticIdx); - - launcher.launch1D( numWorkItems, 64 ); + launcher.setConst(staticIdx); + + launcher.launch1D(numWorkItems, 64); //clFinish(m_queue); //b3AlignedObjectArray<int> batchSizesCPU; //m_batchSizes.copyToHost(batchSizesCPU); @@ -1180,46 +1088,41 @@ void b3Solver::batchContacts( b3OpenCLArray<b3Contact4>* contacts, int nContact } #ifdef BATCH_DEBUG - aaaa - b3Contact4* hostContacts = new b3Contact4[nContacts]; - m_contactBuffer->read(hostContacts,nContacts); + aaaa + b3Contact4* hostContacts = new b3Contact4[nContacts]; + m_contactBuffer->read(hostContacts, nContacts); clFinish(m_queue); - gpuDebugInfo.read(debugInfo,numWorkItems); + gpuDebugInfo.read(debugInfo, numWorkItems); clFinish(m_queue); - for (int i=0;i<numWorkItems;i++) + for (int i = 0; i < numWorkItems; i++) { - if (debugInfo[i].m_valInt1>0) + if (debugInfo[i].m_valInt1 > 0) { printf("catch\n"); } - if (debugInfo[i].m_valInt2>0) + if (debugInfo[i].m_valInt2 > 0) { printf("catch22\n"); } - if (debugInfo[i].m_valInt3>0) + if (debugInfo[i].m_valInt3 > 0) { printf("catch666\n"); } - if (debugInfo[i].m_valInt4>0) + if (debugInfo[i].m_valInt4 > 0) { printf("catch777\n"); } } delete[] debugInfo; -#endif //BATCH_DEBUG - +#endif //BATCH_DEBUG } -// copy buffer to buffer + // copy buffer to buffer //b3Assert(m_contactBuffer->size()==nContacts); //contacts->copyFromOpenCLArray( *m_contactBuffer); //clFinish(m_queue);//needed? - - - } - |