diff options
Diffstat (limited to 'thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp')
-rw-r--r-- | thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp | 1128 |
1 files changed, 0 insertions, 1128 deletions
diff --git a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp b/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp deleted file mode 100644 index ccf67da1a8..0000000000 --- a/thirdparty/bullet/Bullet3OpenCL/RigidBody/b3Solver.cpp +++ /dev/null @@ -1,1128 +0,0 @@ -/* -Copyright (c) 2012 Advanced Micro Devices, Inc. - -This software is provided 'as-is', without any express or implied warranty. -In no event will the authors be held liable for any damages arising from the use of this software. -Permission is granted to anyone to use this software for any purpose, -including commercial applications, and to alter it and redistribute it freely, -subject to the following restrictions: - -1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. -2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. -3. This notice may not be removed or altered from any source distribution. -*/ -//Originally written by Takahiro Harada - -#include "b3Solver.h" - -///useNewBatchingKernel is a rewritten kernel using just a single thread of the warp, for experiments -bool useNewBatchingKernel = true; -bool gConvertConstraintOnCpu = false; - -#define B3_SOLVER_SETUP_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup.cl" -#define B3_SOLVER_SETUP2_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solverSetup2.cl" -#define B3_SOLVER_CONTACT_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveContact.cl" -#define B3_SOLVER_FRICTION_KERNEL_PATH "src/Bullet3OpenCL/RigidBody/kernels/solveFriction.cl" -#define B3_BATCHING_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernels.cl" -#define B3_BATCHING_NEW_PATH "src/Bullet3OpenCL/RigidBody/kernels/batchingKernelsNew.cl" - -#include "Bullet3Dynamics/shared/b3ConvertConstraint4.h" - -#include "kernels/solverSetup.h" -#include "kernels/solverSetup2.h" - -#include "kernels/solveContact.h" -#include "kernels/solveFriction.h" - -#include "kernels/batchingKernels.h" -#include "kernels/batchingKernelsNew.h" - -#include "Bullet3OpenCL/ParallelPrimitives/b3LauncherCL.h" -#include "Bullet3Common/b3Vector3.h" - -struct SolverDebugInfo -{ - int m_valInt0; - int m_valInt1; - int m_valInt2; - int m_valInt3; - - int m_valInt4; - int m_valInt5; - int m_valInt6; - int m_valInt7; - - int m_valInt8; - int m_valInt9; - int m_valInt10; - int m_valInt11; - - int m_valInt12; - int m_valInt13; - int m_valInt14; - int m_valInt15; - - float m_val0; - float m_val1; - float m_val2; - float m_val3; -}; - -class SolverDeviceInl -{ -public: - struct ParallelSolveData - { - b3OpenCLArray<unsigned int>* m_numConstraints; - b3OpenCLArray<unsigned int>* m_offsets; - }; -}; - -b3Solver::b3Solver(cl_context ctx, cl_device_id device, cl_command_queue queue, int pairCapacity) - : m_context(ctx), - m_device(device), - m_queue(queue), - m_batchSizes(ctx, queue), - m_nIterations(4) -{ - m_sort32 = new b3RadixSort32CL(ctx, device, queue); - m_scan = new b3PrefixScanCL(ctx, device, queue, B3_SOLVER_N_CELLS); - m_search = new b3BoundSearchCL(ctx, device, queue, B3_SOLVER_N_CELLS); - - const int sortSize = B3NEXTMULTIPLEOF(pairCapacity, 512); - - m_sortDataBuffer = new b3OpenCLArray<b3SortData>(ctx, queue, sortSize); - m_contactBuffer2 = new b3OpenCLArray<b3Contact4>(ctx, queue); - - m_numConstraints = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS); - m_numConstraints->resize(B3_SOLVER_N_CELLS); - - m_offsets = new b3OpenCLArray<unsigned int>(ctx, queue, B3_SOLVER_N_CELLS); - m_offsets->resize(B3_SOLVER_N_CELLS); - const char* additionalMacros = ""; - // const char* srcFileNameForCaching=""; - - cl_int pErrNum; - const char* batchKernelSource = batchingKernelsCL; - const char* batchKernelNewSource = batchingKernelsNewCL; - - const char* solverSetupSource = solverSetupCL; - const char* solverSetup2Source = solverSetup2CL; - const char* solveContactSource = solveContactCL; - const char* solveFrictionSource = solveFrictionCL; - - { - cl_program solveContactProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveContactSource, &pErrNum, additionalMacros, B3_SOLVER_CONTACT_KERNEL_PATH); - b3Assert(solveContactProg); - - cl_program solveFrictionProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solveFrictionSource, &pErrNum, additionalMacros, B3_SOLVER_FRICTION_KERNEL_PATH); - b3Assert(solveFrictionProg); - - cl_program solverSetup2Prog = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetup2Source, &pErrNum, additionalMacros, B3_SOLVER_SETUP2_KERNEL_PATH); - b3Assert(solverSetup2Prog); - - cl_program solverSetupProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, solverSetupSource, &pErrNum, additionalMacros, B3_SOLVER_SETUP_KERNEL_PATH); - b3Assert(solverSetupProg); - - m_solveFrictionKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveFrictionSource, "BatchSolveKernelFriction", &pErrNum, solveFrictionProg, additionalMacros); - b3Assert(m_solveFrictionKernel); - - m_solveContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solveContactSource, "BatchSolveKernelContact", &pErrNum, solveContactProg, additionalMacros); - b3Assert(m_solveContactKernel); - - m_contactToConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetupSource, "ContactToConstraintKernel", &pErrNum, solverSetupProg, additionalMacros); - b3Assert(m_contactToConstraintKernel); - - m_setSortDataKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "SetSortDataKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_setSortDataKernel); - - m_reorderContactKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "ReorderContactKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_reorderContactKernel); - - m_copyConstraintKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, solverSetup2Source, "CopyConstraintKernel", &pErrNum, solverSetup2Prog, additionalMacros); - b3Assert(m_copyConstraintKernel); - } - - { - cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelSource, &pErrNum, additionalMacros, B3_BATCHING_PATH); - //cl_program batchingProg = b3OpenCLUtils::compileCLProgramFromString( ctx, device, 0, &pErrNum,additionalMacros, B3_BATCHING_PATH,true); - b3Assert(batchingProg); - - m_batchingKernel = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelSource, "CreateBatches", &pErrNum, batchingProg, additionalMacros); - b3Assert(m_batchingKernel); - } - { - cl_program batchingNewProg = b3OpenCLUtils::compileCLProgramFromString(ctx, device, batchKernelNewSource, &pErrNum, additionalMacros, B3_BATCHING_NEW_PATH); - b3Assert(batchingNewProg); - - m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString(ctx, device, batchKernelNewSource, "CreateBatchesNew", &pErrNum, batchingNewProg, additionalMacros); - //m_batchingKernelNew = b3OpenCLUtils::compileCLKernelFromString( ctx, device, batchKernelNewSource, "CreateBatchesBruteForce", &pErrNum, batchingNewProg,additionalMacros ); - b3Assert(m_batchingKernelNew); - } -} - -b3Solver::~b3Solver() -{ - delete m_offsets; - delete m_numConstraints; - delete m_sortDataBuffer; - delete m_contactBuffer2; - - delete m_sort32; - delete m_scan; - delete m_search; - - clReleaseKernel(m_batchingKernel); - clReleaseKernel(m_batchingKernelNew); - - clReleaseKernel(m_solveContactKernel); - clReleaseKernel(m_solveFrictionKernel); - - clReleaseKernel(m_contactToConstraintKernel); - clReleaseKernel(m_setSortDataKernel); - clReleaseKernel(m_reorderContactKernel); - clReleaseKernel(m_copyConstraintKernel); -} - -template <bool JACOBI> -static __inline void solveContact(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - b3Vector3 dLinVelA; - dLinVelA.setZero(); - b3Vector3 dAngVelA; - dAngVelA.setZero(); - b3Vector3 dLinVelB; - dLinVelB.setZero(); - b3Vector3 dAngVelB; - dAngVelB.setZero(); - - for (int ic = 0; ic < 4; ic++) - { - // dont necessary because this makes change to 0 - if (cs.m_jacCoeffInv[ic] == 0.f) continue; - - { - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = cs.m_worldPos[ic] - (b3Vector3&)posA; - b3Vector3 r1 = cs.m_worldPos[ic] - (b3Vector3&)posB; - setLinearAndAngular((const b3Vector3&)cs.m_linear, (const b3Vector3&)r0, (const b3Vector3&)r1, &linear, &angular0, &angular1); - - float rambdaDt = calcRelVel((const b3Vector3&)cs.m_linear, (const b3Vector3&)-cs.m_linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB) + - cs.m_b[ic]; - rambdaDt *= cs.m_jacCoeffInv[ic]; - - { - float prevSum = cs.m_appliedRambdaDt[ic]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[ic]); - updated = b3Min(updated, maxRambdaDt[ic]); - rambdaDt = updated - prevSum; - cs.m_appliedRambdaDt[ic] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - if (JACOBI) - { - dLinVelA += linImp0; - dAngVelA += angImp0; - dLinVelB += linImp1; - dAngVelB += angImp1; - } - else - { - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - } - } - - if (JACOBI) - { - linVelA += dLinVelA; - angVelA += dAngVelA; - linVelB += dLinVelB; - angVelB += dAngVelB; - } -} - -static __inline void solveFriction(b3GpuConstraint4& cs, - const b3Vector3& posA, b3Vector3& linVelA, b3Vector3& angVelA, float invMassA, const b3Matrix3x3& invInertiaA, - const b3Vector3& posB, b3Vector3& linVelB, b3Vector3& angVelB, float invMassB, const b3Matrix3x3& invInertiaB, - float maxRambdaDt[4], float minRambdaDt[4]) -{ - if (cs.m_fJacCoeffInv[0] == 0 && cs.m_fJacCoeffInv[0] == 0) return; - const b3Vector3& center = (const b3Vector3&)cs.m_center; - - b3Vector3 n = -(const b3Vector3&)cs.m_linear; - - b3Vector3 tangent[2]; -#if 1 - b3PlaneSpace1(n, tangent[0], tangent[1]); -#else - b3Vector3 r = cs.m_worldPos[0] - center; - tangent[0] = cross3(n, r); - tangent[1] = cross3(tangent[0], n); - tangent[0] = normalize3(tangent[0]); - tangent[1] = normalize3(tangent[1]); -#endif - - b3Vector3 angular0, angular1, linear; - b3Vector3 r0 = center - posA; - b3Vector3 r1 = center - posB; - for (int i = 0; i < 2; i++) - { - setLinearAndAngular(tangent[i], r0, r1, &linear, &angular0, &angular1); - float rambdaDt = calcRelVel(linear, -linear, angular0, angular1, - linVelA, angVelA, linVelB, angVelB); - rambdaDt *= cs.m_fJacCoeffInv[i]; - - { - float prevSum = cs.m_fAppliedRambdaDt[i]; - float updated = prevSum; - updated += rambdaDt; - updated = b3Max(updated, minRambdaDt[i]); - updated = b3Min(updated, maxRambdaDt[i]); - rambdaDt = updated - prevSum; - cs.m_fAppliedRambdaDt[i] = updated; - } - - b3Vector3 linImp0 = invMassA * linear * rambdaDt; - b3Vector3 linImp1 = invMassB * (-linear) * rambdaDt; - b3Vector3 angImp0 = (invInertiaA * angular0) * rambdaDt; - b3Vector3 angImp1 = (invInertiaB * angular1) * rambdaDt; -#ifdef _WIN32 - b3Assert(_finite(linImp0.getX())); - b3Assert(_finite(linImp1.getX())); -#endif - linVelA += linImp0; - angVelA += angImp0; - linVelB += linImp1; - angVelB += angImp1; - } - - { // angular damping for point constraint - b3Vector3 ab = (posB - posA).normalized(); - b3Vector3 ac = (center - posA).normalized(); - if (b3Dot(ab, ac) > 0.95f || (invMassA == 0.f || invMassB == 0.f)) - { - float angNA = b3Dot(n, angVelA); - float angNB = b3Dot(n, angVelB); - - angVelA -= (angNA * 0.1f) * n; - angVelB -= (angNB * 0.1f) * n; - } - } -} -/* - b3AlignedObjectArray<b3RigidBodyData>& m_bodies; - b3AlignedObjectArray<b3InertiaData>& m_shapes; - b3AlignedObjectArray<b3GpuConstraint4>& m_constraints; - b3AlignedObjectArray<int>* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; - */ - -struct SolveTask // : public ThreadPool::Task -{ - SolveTask(b3AlignedObjectArray<b3RigidBodyData>& bodies, b3AlignedObjectArray<b3InertiaData>& shapes, b3AlignedObjectArray<b3GpuConstraint4>& constraints, - int start, int nConstraints, int maxNumBatches, b3AlignedObjectArray<int>* wgUsedBodies, int curWgidx, b3AlignedObjectArray<int>* batchSizes, int cellIndex) - : m_bodies(bodies), m_shapes(shapes), m_constraints(constraints), m_batchSizes(batchSizes), m_cellIndex(cellIndex), m_curWgidx(curWgidx), m_start(start), m_nConstraints(nConstraints), m_solveFriction(true), m_maxNumBatches(maxNumBatches) - { - } - - unsigned short int getType() { return 0; } - - void run(int tIdx) - { - int offset = 0; - for (int ii = 0; ii < B3_MAX_NUM_BATCHES; ii++) - { - int numInBatch = m_batchSizes->at(m_cellIndex * B3_MAX_NUM_BATCHES + ii); - if (!numInBatch) - break; - - for (int jj = 0; jj < numInBatch; jj++) - { - int i = m_start + offset + jj; - int batchId = m_constraints[i].m_batchIdx; - b3Assert(batchId == ii); - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; - // int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if (!m_solveFriction) - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - - solveContact<false>(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt); - } - else - { - float maxRambdaDt[4] = {FLT_MAX, FLT_MAX, FLT_MAX, FLT_MAX}; - float minRambdaDt[4] = {0.f, 0.f, 0.f, 0.f}; - float sum = 0; - for (int j = 0; j < 4; j++) - { - sum += m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for (int j = 0; j < 4; j++) - { - maxRambdaDt[j] = frictionCoeff * sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction(m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3&)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3&)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt); - } - } - offset += numInBatch; - } - /* for (int bb=0;bb<m_maxNumBatches;bb++) - { - //for(int ic=m_nConstraints-1; ic>=0; ic--) - for(int ic=0; ic<m_nConstraints; ic++) - { - - int i = m_start + ic; - if (m_constraints[i].m_batchIdx != bb) - continue; - - float frictionCoeff = m_constraints[i].getFrictionCoeff(); - int aIdx = (int)m_constraints[i].m_bodyA; - int bIdx = (int)m_constraints[i].m_bodyB; - int localBatch = m_constraints[i].m_batchIdx; - b3RigidBodyData& bodyA = m_bodies[aIdx]; - b3RigidBodyData& bodyB = m_bodies[bIdx]; - - if( !m_solveFriction ) - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - - solveContact<false>( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass, (const b3Matrix3x3 &)m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass, (const b3Matrix3x3 &)m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - } - else - { - float maxRambdaDt[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; - float minRambdaDt[4] = {0.f,0.f,0.f,0.f}; - float sum = 0; - for(int j=0; j<4; j++) - { - sum +=m_constraints[i].m_appliedRambdaDt[j]; - } - frictionCoeff = 0.7f; - for(int j=0; j<4; j++) - { - maxRambdaDt[j] = frictionCoeff*sum; - minRambdaDt[j] = -maxRambdaDt[j]; - } - solveFriction( m_constraints[i], (b3Vector3&)bodyA.m_pos, (b3Vector3&)bodyA.m_linVel, (b3Vector3&)bodyA.m_angVel, bodyA.m_invMass,(const b3Matrix3x3 &) m_shapes[aIdx].m_invInertiaWorld, - (b3Vector3&)bodyB.m_pos, (b3Vector3&)bodyB.m_linVel, (b3Vector3&)bodyB.m_angVel, bodyB.m_invMass,(const b3Matrix3x3 &) m_shapes[bIdx].m_invInertiaWorld, - maxRambdaDt, minRambdaDt ); - - } - } - } - */ - } - - b3AlignedObjectArray<b3RigidBodyData>& m_bodies; - b3AlignedObjectArray<b3InertiaData>& m_shapes; - b3AlignedObjectArray<b3GpuConstraint4>& m_constraints; - b3AlignedObjectArray<int>* m_batchSizes; - int m_cellIndex; - int m_curWgidx; - int m_start; - int m_nConstraints; - bool m_solveFriction; - int m_maxNumBatches; -}; - -void b3Solver::solveContactConstraintHost(b3OpenCLArray<b3RigidBodyData>* bodyBuf, b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches, b3AlignedObjectArray<int>* batchSizes) -{ -#if 0 - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS/B3_SOLVER_N_BATCHES; - for (int z=0;z<4;z++) - { - for (int y=0;y<4;y++) - { - for (int x=0;x<4;x++) - { - int newIndex = (x+y*nSplitX+z*nSplitX*nSplitY); - // printf("newIndex=%d\n",newIndex); - - int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - // printf("newIndex=%d\n",newIndex); - } - } - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int cellBatch=0;cellBatch<B3_SOLVER_N_BATCHES;cellBatch++) - { - for (int wgIdx=0;wgIdx<numWorkgroups;wgIdx++) - { - int zIdx = (wgIdx/((nSplitX*nSplitY)/4))*2+((cellBatch&4)>>2); - int remain= (wgIdx%((nSplitX*nSplitY)/4)); - int yIdx = (remain/(nSplitX/2))*2 + ((cellBatch&2)>>1); - int xIdx = (remain%(nSplitX/2))*2 + (cellBatch&1); - - /*int zIdx = newIndex/(nSplitX*nSplitY); - int remain = newIndex%(nSplitX*nSplitY); - int yIdx = remain/nSplitX; - int xIdx = remain%nSplitX; - */ - int cellIdx = xIdx+yIdx*nSplitX+zIdx*(nSplitX*nSplitY); - // printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - } - } - } -#endif - - b3AlignedObjectArray<b3RigidBodyData> bodyNative; - bodyBuf->copyToHost(bodyNative); - b3AlignedObjectArray<b3InertiaData> shapeNative; - shapeBuf->copyToHost(shapeNative); - b3AlignedObjectArray<b3GpuConstraint4> constraintNative; - constraint->copyToHost(constraintNative); - - b3AlignedObjectArray<unsigned int> numConstraintsHost; - m_numConstraints->copyToHost(numConstraintsHost); - - //printf("------------------------\n"); - b3AlignedObjectArray<unsigned int> offsetsHost; - m_offsets->copyToHost(offsetsHost); - static int frame = 0; - bool useBatches = true; - if (useBatches) - { - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - //printf("cell Batch %d\n",cellBatch); - b3AlignedObjectArray<int> usedBodies[B3_SOLVER_N_CELLS]; - for (int i = 0; i < B3_SOLVER_N_CELLS; i++) - { - usedBodies[i].resize(0); - } - - //for (int wgIdx=numWorkgroups-1;wgIdx>=0;wgIdx--) - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); - int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); - int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - - if (numConstraintsHost[cellIdx] == 0) - continue; - - //printf("wgIdx %d: xIdx=%d, yIdx=%d, zIdx=%d, cellIdx=%d, cell Batch %d\n",wgIdx,xIdx,yIdx,zIdx,cellIdx,cellBatch); - //printf("cell %d has %d constraints\n", cellIdx,numConstraintsHost[cellIdx]); - if (zIdx) - { - //printf("?\n"); - } - - if (iter == 0) - { - //printf("frame=%d, Cell xIdx=%x, yIdx=%d ",frame, xIdx,yIdx); - //printf("cellBatch=%d, wgIdx=%d, #constraints in cell=%d\n",cellBatch,wgIdx,numConstraintsHost[cellIdx]); - } - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, usedBodies, wgIdx, batchSizes, cellIdx); - task.m_solveFriction = false; - task.run(0); - } - } - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int cellBatch = 0; cellBatch < B3_SOLVER_N_BATCHES; cellBatch++) - { - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - int numWorkgroups = B3_SOLVER_N_CELLS / B3_SOLVER_N_BATCHES; - - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - int zIdx = (wgIdx / ((nSplitX * nSplitY) / 4)) * 2 + ((cellBatch & 4) >> 2); - int remain = (wgIdx % ((nSplitX * nSplitY) / 4)); - int yIdx = (remain / (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain % (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - - if (numConstraintsHost[cellIdx] == 0) - continue; - - //printf("yIdx=%d\n",yIdx); - - const int start = offsetsHost[cellIdx]; - int numConstraintsInCell = numConstraintsHost[cellIdx]; - // const int end = start + numConstraintsInCell; - - SolveTask task(bodyNative, shapeNative, constraintNative, start, numConstraintsInCell, maxNumBatches, 0, 0, batchSizes, cellIdx); - task.m_solveFriction = true; - task.run(0); - } - } - } - } - else - { - for (int iter = 0; iter < m_nIterations; iter++) - { - SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); - task.m_solveFriction = false; - task.run(0); - } - - for (int iter = 0; iter < m_nIterations; iter++) - { - SolveTask task(bodyNative, shapeNative, constraintNative, 0, n, maxNumBatches, 0, 0, 0, 0); - task.m_solveFriction = true; - task.run(0); - } - } - - bodyBuf->copyFromHost(bodyNative); - shapeBuf->copyFromHost(shapeNative); - constraint->copyFromHost(constraintNative); - frame++; -} - -void checkConstraintBatch(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, - b3OpenCLArray<unsigned int>* m_numConstraints, - b3OpenCLArray<unsigned int>* m_offsets, - int batchId) -{ - // b3BufferInfoCL( m_numConstraints->getBufferCL() ), - // b3BufferInfoCL( m_offsets->getBufferCL() ) - - int cellBatch = batchId; - const int nn = B3_SOLVER_N_CELLS; - // int numWorkItems = 64*nn/B3_SOLVER_N_BATCHES; - - b3AlignedObjectArray<unsigned int> gN; - m_numConstraints->copyToHost(gN); - b3AlignedObjectArray<unsigned int> gOffsets; - m_offsets->copyToHost(gOffsets); - int nSplitX = B3_SOLVER_N_SPLIT_X; - int nSplitY = B3_SOLVER_N_SPLIT_Y; - - // int bIdx = batchId; - - b3AlignedObjectArray<b3GpuConstraint4> cpuConstraints; - constraint->copyToHost(cpuConstraints); - - printf("batch = %d\n", batchId); - - int numWorkgroups = nn / B3_SOLVER_N_BATCHES; - b3AlignedObjectArray<int> usedBodies; - - for (int wgIdx = 0; wgIdx < numWorkgroups; wgIdx++) - { - printf("wgIdx = %d ", wgIdx); - - int zIdx = (wgIdx / ((nSplitX * nSplitY)) / 2) * 2 + ((cellBatch & 4) >> 2); - int remain = wgIdx % ((nSplitX * nSplitY)); - int yIdx = (remain % (nSplitX / 2)) * 2 + ((cellBatch & 2) >> 1); - int xIdx = (remain / (nSplitX / 2)) * 2 + (cellBatch & 1); - - int cellIdx = xIdx + yIdx * nSplitX + zIdx * (nSplitX * nSplitY); - printf("cellIdx=%d\n", cellIdx); - if (gN[cellIdx] == 0) - continue; - - const int start = gOffsets[cellIdx]; - const int end = start + gN[cellIdx]; - - for (int c = start; c < end; c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - //printf("constraint (%d,%d)\n", constraint.m_bodyA,constraint.m_bodyB); - if (usedBodies.findLinearSearch(constraint.m_bodyA) < usedBodies.size()) - { - printf("error?\n"); - } - if (usedBodies.findLinearSearch(constraint.m_bodyB) < usedBodies.size()) - { - printf("error?\n"); - } - } - - for (int c = start; c < end; c++) - { - b3GpuConstraint4& constraint = cpuConstraints[c]; - usedBodies.push_back(constraint.m_bodyA); - usedBodies.push_back(constraint.m_bodyB); - } - } -} - -static bool verify = false; - -void b3Solver::solveContactConstraint(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3GpuConstraint4>* constraint, void* additionalData, int n, int maxNumBatches) -{ - b3Int4 cdata = b3MakeInt4(n, 0, 0, 0); - { - const int nn = B3_SOLVER_N_CELLS; - - cdata.x = 0; - cdata.y = maxNumBatches; //250; - - int numWorkItems = 64 * nn / B3_SOLVER_N_BATCHES; -#ifdef DEBUG_ME - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); -#endif - - { - B3_PROFILE("m_batchSolveKernel iterations"); - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - if (verify) - { - checkConstraintBatch(bodyBuf, shapeBuf, constraint, m_numConstraints, m_offsets, ib); - } - -#ifdef DEBUG_ME - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - - cdata.z = ib; - - b3LauncherCL launcher(m_queue, m_solveContactKernel, "m_solveContactKernel"); -#if 1 - - b3BufferInfoCL bInfo[] = { - - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif - }; - - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - launcher.launch1D(numWorkItems, 64); - -#else - const char* fileName = "m_batchSolveKernel.bin"; - FILE* f = fopen(fileName, "rb"); - if (f) - { - int sizeInBytes = 0; - if (fseek(f, 0, SEEK_END) || (sizeInBytes = ftell(f)) == EOF || fseek(f, 0, SEEK_SET)) - { - printf("error, cannot get file size\n"); - exit(0); - } - - unsigned char* buf = (unsigned char*)malloc(sizeInBytes); - fread(buf, sizeInBytes, 1, f); - int serializedBytes = launcher.deserializeArgs(buf, sizeInBytes, m_context); - int num = *(int*)&buf[serializedBytes]; - - launcher.launch1D(num); - - //this clFinish is for testing on errors - clFinish(m_queue); - } - -#endif - -#ifdef DEBUG_ME - clFinish(m_queue); - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt2 > 0) - { - printf("debugInfo[i].m_valInt2 = %d\n", i, debugInfo[i].m_valInt2); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("debugInfo[i].m_valInt3 = %d\n", i, debugInfo[i].m_valInt3); - } - } -#endif //DEBUG_ME - } - } - - clFinish(m_queue); - } - - cdata.x = 1; - bool applyFriction = true; - if (applyFriction) - { - B3_PROFILE("m_batchSolveKernel iterations2"); - for (int iter = 0; iter < m_nIterations; iter++) - { - for (int ib = 0; ib < B3_SOLVER_N_BATCHES; ib++) - { - cdata.z = ib; - - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL(bodyBuf->getBufferCL()), - b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(constraint->getBufferCL()), - b3BufferInfoCL(m_numConstraints->getBufferCL()), - b3BufferInfoCL(m_offsets->getBufferCL()) -#ifdef DEBUG_ME - , - b3BufferInfoCL(&gpuDebugInfo) -#endif //DEBUG_ME - }; - b3LauncherCL launcher(m_queue, m_solveFrictionKernel, "m_solveFrictionKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata.x ); - launcher.setConst(cdata.y); - launcher.setConst(cdata.z); - b3Int4 nSplit; - nSplit.x = B3_SOLVER_N_SPLIT_X; - nSplit.y = B3_SOLVER_N_SPLIT_Y; - nSplit.z = B3_SOLVER_N_SPLIT_Z; - - launcher.setConst(nSplit); - - launcher.launch1D(64 * nn / B3_SOLVER_N_BATCHES, 64); - } - } - clFinish(m_queue); - } -#ifdef DEBUG_ME - delete[] debugInfo; -#endif //DEBUG_ME - } -} - -void b3Solver::convertToConstraints(const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - const b3OpenCLArray<b3InertiaData>* shapeBuf, - b3OpenCLArray<b3Contact4>* contactsIn, b3OpenCLArray<b3GpuConstraint4>* contactCOut, void* additionalData, - int nContacts, const ConstraintCfg& cfg) -{ - // b3OpenCLArray<b3GpuConstraint4>* constraintNative =0; - contactCOut->resize(nContacts); - struct CB - { - int m_nContacts; - float m_dt; - float m_positionDrift; - float m_positionConstraintCoeff; - }; - - { - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_dt = cfg.m_dt; - cdata.m_positionDrift = cfg.m_positionDrift; - cdata.m_positionConstraintCoeff = cfg.m_positionConstraintCoeff; - - if (gConvertConstraintOnCpu) - { - b3AlignedObjectArray<b3RigidBodyData> gBodies; - bodyBuf->copyToHost(gBodies); - - b3AlignedObjectArray<b3Contact4> gContact; - contactsIn->copyToHost(gContact); - - b3AlignedObjectArray<b3InertiaData> gShapes; - shapeBuf->copyToHost(gShapes); - - b3AlignedObjectArray<b3GpuConstraint4> gConstraintOut; - gConstraintOut.resize(nContacts); - - B3_PROFILE("cpu contactToConstraintKernel"); - for (int gIdx = 0; gIdx < nContacts; gIdx++) - { - int aIdx = abs(gContact[gIdx].m_bodyAPtrAndSignBit); - int bIdx = abs(gContact[gIdx].m_bodyBPtrAndSignBit); - - b3Float4 posA = gBodies[aIdx].m_pos; - b3Float4 linVelA = gBodies[aIdx].m_linVel; - b3Float4 angVelA = gBodies[aIdx].m_angVel; - float invMassA = gBodies[aIdx].m_invMass; - b3Mat3x3 invInertiaA = gShapes[aIdx].m_initInvInertia; - - b3Float4 posB = gBodies[bIdx].m_pos; - b3Float4 linVelB = gBodies[bIdx].m_linVel; - b3Float4 angVelB = gBodies[bIdx].m_angVel; - float invMassB = gBodies[bIdx].m_invMass; - b3Mat3x3 invInertiaB = gShapes[bIdx].m_initInvInertia; - - b3ContactConstraint4_t cs; - - setConstraint4(posA, linVelA, angVelA, invMassA, invInertiaA, posB, linVelB, angVelB, invMassB, invInertiaB, - &gContact[gIdx], cdata.m_dt, cdata.m_positionDrift, cdata.m_positionConstraintCoeff, - &cs); - - cs.m_batchIdx = gContact[gIdx].m_batchIdx; - - gConstraintOut[gIdx] = (b3GpuConstraint4&)cs; - } - - contactCOut->copyFromHost(gConstraintOut); - } - else - { - B3_PROFILE("gpu m_contactToConstraintKernel"); - - b3BufferInfoCL bInfo[] = {b3BufferInfoCL(contactsIn->getBufferCL()), b3BufferInfoCL(bodyBuf->getBufferCL()), b3BufferInfoCL(shapeBuf->getBufferCL()), - b3BufferInfoCL(contactCOut->getBufferCL())}; - b3LauncherCL launcher(m_queue, m_contactToConstraintKernel, "m_contactToConstraintKernel"); - launcher.setBuffers(bInfo, sizeof(bInfo) / sizeof(b3BufferInfoCL)); - //launcher.setConst( cdata ); - - launcher.setConst(cdata.m_nContacts); - launcher.setConst(cdata.m_dt); - launcher.setConst(cdata.m_positionDrift); - launcher.setConst(cdata.m_positionConstraintCoeff); - - launcher.launch1D(nContacts, 64); - clFinish(m_queue); - } - } -} - -/* -void b3Solver::sortContacts( const b3OpenCLArray<b3RigidBodyData>* bodyBuf, - b3OpenCLArray<b3Contact4>* contactsIn, void* additionalData, - int nContacts, const b3Solver::ConstraintCfg& cfg ) -{ - - - - const int sortAlignment = 512; // todo. get this out of sort - if( cfg.m_enableParallelSolve ) - { - - - int sortSize = NEXTMULTIPLEOF( nContacts, sortAlignment ); - - b3OpenCLArray<unsigned int>* countsNative = m_numConstraints;//BufferUtils::map<TYPE_CL, false>( data->m_device, &countsHost ); - b3OpenCLArray<unsigned int>* offsetsNative = m_offsets;//BufferUtils::map<TYPE_CL, false>( data->m_device, &offsetsHost ); - - { // 2. set cell idx - struct CB - { - int m_nContacts; - int m_staticIdx; - float m_scale; - int m_nSplit; - }; - - b3Assert( sortSize%64 == 0 ); - CB cdata; - cdata.m_nContacts = nContacts; - cdata.m_staticIdx = cfg.m_staticIdx; - cdata.m_scale = 1.f/(N_OBJ_PER_SPLIT*cfg.m_averageExtent); - cdata.m_nSplit = B3_SOLVER_N_SPLIT; - - - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( bodyBuf->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_setSortDataKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( sortSize, 64 ); - } - - { // 3. sort by cell idx - int n = B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT; - int sortBit = 32; - //if( n <= 0xffff ) sortBit = 16; - //if( n <= 0xff ) sortBit = 8; - m_sort32->execute(*m_sortDataBuffer,sortSize); - } - { // 4. find entries - m_search->execute( *m_sortDataBuffer, nContacts, *countsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT, b3BoundSearchCL::COUNT); - - m_scan->execute( *countsNative, *offsetsNative, B3_SOLVER_N_SPLIT*B3_SOLVER_N_SPLIT ); - } - - { // 5. sort constraints by cellIdx - // todo. preallocate this -// b3Assert( contactsIn->getType() == TYPE_HOST ); -// b3OpenCLArray<b3Contact4>* out = BufferUtils::map<TYPE_CL, false>( data->m_device, contactsIn ); // copying contacts to this buffer - - { - - - b3Int4 cdata; cdata.x = nContacts; - b3BufferInfoCL bInfo[] = { b3BufferInfoCL( contactsIn->getBufferCL() ), b3BufferInfoCL( m_contactBuffer->getBufferCL() ), b3BufferInfoCL( m_sortDataBuffer->getBufferCL() ) }; - b3LauncherCL launcher( m_queue, m_reorderContactKernel ); - launcher.setBuffers( bInfo, sizeof(bInfo)/sizeof(b3BufferInfoCL) ); - launcher.setConst( cdata ); - launcher.launch1D( nContacts, 64 ); - } -// BufferUtils::unmap<true>( out, contactsIn, nContacts ); - } - } - - -} - -*/ -void b3Solver::batchContacts(b3OpenCLArray<b3Contact4>* contacts, int nContacts, b3OpenCLArray<unsigned int>* nNative, b3OpenCLArray<unsigned int>* offsetsNative, int staticIdx) -{ - int numWorkItems = 64 * B3_SOLVER_N_CELLS; - { - B3_PROFILE("batch generation"); - - b3Int4 cdata; - cdata.x = nContacts; - cdata.y = 0; - cdata.z = staticIdx; - -#ifdef BATCH_DEBUG - SolverDebugInfo* debugInfo = new SolverDebugInfo[numWorkItems]; - adl::b3OpenCLArray<SolverDebugInfo> gpuDebugInfo(data->m_device, numWorkItems); - memset(debugInfo, 0, sizeof(SolverDebugInfo) * numWorkItems); - gpuDebugInfo.write(debugInfo, numWorkItems); -#endif - -#if 0 - b3BufferInfoCL bInfo[] = { - b3BufferInfoCL( contacts->getBufferCL() ), - b3BufferInfoCL( m_contactBuffer2->getBufferCL()), - b3BufferInfoCL( nNative->getBufferCL() ), - b3BufferInfoCL( offsetsNative->getBufferCL() ), -#ifdef BATCH_DEBUG - , b3BufferInfoCL(&gpuDebugInfo) -#endif - }; -#endif - - { - m_batchSizes.resize(nNative->size()); - B3_PROFILE("batchingKernel"); - //b3LauncherCL launcher( m_queue, m_batchingKernel); - cl_kernel k = useNewBatchingKernel ? m_batchingKernelNew : m_batchingKernel; - - b3LauncherCL launcher(m_queue, k, "*batchingKernel"); - if (!useNewBatchingKernel) - { - launcher.setBuffer(contacts->getBufferCL()); - } - launcher.setBuffer(m_contactBuffer2->getBufferCL()); - launcher.setBuffer(nNative->getBufferCL()); - launcher.setBuffer(offsetsNative->getBufferCL()); - - launcher.setBuffer(m_batchSizes.getBufferCL()); - - //launcher.setConst( cdata ); - launcher.setConst(staticIdx); - - launcher.launch1D(numWorkItems, 64); - //clFinish(m_queue); - //b3AlignedObjectArray<int> batchSizesCPU; - //m_batchSizes.copyToHost(batchSizesCPU); - //printf(".\n"); - } - -#ifdef BATCH_DEBUG - aaaa - b3Contact4* hostContacts = new b3Contact4[nContacts]; - m_contactBuffer->read(hostContacts, nContacts); - clFinish(m_queue); - - gpuDebugInfo.read(debugInfo, numWorkItems); - clFinish(m_queue); - - for (int i = 0; i < numWorkItems; i++) - { - if (debugInfo[i].m_valInt1 > 0) - { - printf("catch\n"); - } - if (debugInfo[i].m_valInt2 > 0) - { - printf("catch22\n"); - } - - if (debugInfo[i].m_valInt3 > 0) - { - printf("catch666\n"); - } - - if (debugInfo[i].m_valInt4 > 0) - { - printf("catch777\n"); - } - } - delete[] debugInfo; -#endif //BATCH_DEBUG - } - - // copy buffer to buffer - //b3Assert(m_contactBuffer->size()==nContacts); - //contacts->copyFromOpenCLArray( *m_contactBuffer); - //clFinish(m_queue);//needed? -} |