diff options
Diffstat (limited to 'thirdparty')
134 files changed, 17464 insertions, 8500 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md index b2707e7f7c..73a62458c3 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -40,11 +40,9 @@ Files extracted from upstream source: ## bullet - Upstream: https://github.com/bulletphysics/bullet3 -- Version: git pre-2.90 (cd8cf7521cbb8b7808126a6adebd47bb83ea166a, 2020) +- Version: 3.07 (e32fc59c88a3908876949c6f2665e8d091d987fa, 2020) - License: zlib -Important: Synced with a pre-release version of bullet 2.90 from the master branch. - Files extracted from upstream source: - src/* apart from CMakeLists.txt and premake4.lua files @@ -341,7 +339,7 @@ changes are marked with `// -- GODOT --` comments. ## mbedtls - Upstream: https://tls.mbed.org/ -- Version: 2.16.8 (2020) +- Version: 2.16.9 (2020) - License: Apache 2.0 File extracted from upstream release tarball: @@ -358,6 +356,21 @@ File extracted from upstream release tarball: for light bundling with core. +## meshoptimizer + +- Upstream: https://github.com/zeux/meshoptimizer +- Version: 0.15 (2020) +- License: MIT + +File extracted from upstream release tarball: + +- All files in `src/`. + +Important: Some files have Godot-made changes. +They can be applied with the patch in the `patches` folder, but are meant to be superseded +by upstream API changes. + + ## miniupnpc - Upstream: https://github.com/miniupnp/miniupnp/tree/master/miniupnpc diff --git a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp index 6f2c5251a0..4938fa17af 100644 --- a/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp +++ b/thirdparty/bullet/Bullet3OpenCL/NarrowphaseCollision/b3OptimizedBvh.cpp @@ -285,7 +285,6 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart); curNodeSubPart = nodeSubPart; - b3Assert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT); } //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, @@ -293,7 +292,13 @@ void b3OptimizedBvh::updateBvhNodes(b3StridingMeshInterface* meshInterface, int for (int j = 2; j >= 0; j--) { - int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j]; + int graphicsindex; + switch (indicestype) { + case PHY_INTEGER: graphicsindex = gfxbase[j]; break; + case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; + case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; + default: b3Assert(0); + } if (type == PHY_FLOAT) { float* graphicsbase = (float*)(vertexbase + graphicsindex * stride); diff --git a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp index 145de62db3..f6c779a919 100644 --- a/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp +++ b/thirdparty/bullet/Bullet3Serialize/Bullet2FileLoader/b3File.cpp @@ -851,12 +851,12 @@ void bFile::swapData(char *data, short type, int arraySize, bool ignoreEndianFla void bFile::safeSwapPtr(char *dst, const char *src) { + if (!src || !dst) + return; + int ptrFile = mFileDNA->getPointerSize(); int ptrMem = mMemoryDNA->getPointerSize(); - if (!src && !dst) - return; - if (ptrFile == ptrMem) { memcpy(dst, src, ptrMem); diff --git a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp index 4954e773e2..19f1737b73 100644 --- a/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp +++ b/thirdparty/bullet/BulletCollision/BroadphaseCollision/btQuantizedBvh.cpp @@ -346,8 +346,6 @@ void btQuantizedBvh::reportAabbOverlappingNodex(btNodeOverlapCallback* nodeCallb } } -int maxIterations = 0; - void btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback, const btVector3& aabbMin, const btVector3& aabbMax) const { btAssert(!m_useQuantization); @@ -387,8 +385,6 @@ void btQuantizedBvh::walkStacklessTree(btNodeOverlapCallback* nodeCallback, cons curIndex += escapeIndex; } } - if (maxIterations < walkIterations) - maxIterations = walkIterations; } /* @@ -529,8 +525,6 @@ void btQuantizedBvh::walkStacklessTreeAgainstRay(btNodeOverlapCallback* nodeCall curIndex += escapeIndex; } } - if (maxIterations < walkIterations) - maxIterations = walkIterations; } void btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* nodeCallback, const btVector3& raySource, const btVector3& rayTarget, const btVector3& aabbMin, const btVector3& aabbMax, int startNodeIndex, int endNodeIndex) const @@ -654,8 +648,6 @@ void btQuantizedBvh::walkStacklessQuantizedTreeAgainstRay(btNodeOverlapCallback* curIndex += escapeIndex; } } - if (maxIterations < walkIterations) - maxIterations = walkIterations; } void btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallback, unsigned short int* quantizedQueryAabbMin, unsigned short int* quantizedQueryAabbMax, int startNodeIndex, int endNodeIndex) const @@ -718,8 +710,6 @@ void btQuantizedBvh::walkStacklessQuantizedTree(btNodeOverlapCallback* nodeCallb curIndex += escapeIndex; } } - if (maxIterations < walkIterations) - maxIterations = walkIterations; } //This traversal can be called from Playstation 3 SPU diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h index 85dc488c8c..e085c40892 100644 --- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h +++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btCollisionObject.h @@ -127,6 +127,7 @@ public: enum CollisionFlags { + CF_DYNAMIC_OBJECT = 0, CF_STATIC_OBJECT = 1, CF_KINEMATIC_OBJECT = 2, CF_NO_CONTACT_RESPONSE = 4, @@ -251,6 +252,16 @@ public: m_checkCollideWith = m_objectsWithoutCollisionCheck.size() > 0; } + int getNumObjectsWithoutCollision() const + { + return m_objectsWithoutCollisionCheck.size(); + } + + const btCollisionObject* getObjectWithoutCollision(int index) + { + return m_objectsWithoutCollisionCheck[index]; + } + virtual bool checkCollideWithOverride(const btCollisionObject* co) const { int index = m_objectsWithoutCollisionCheck.findLinearSearch(co); diff --git a/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp b/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp index a4252c296a..a71700f58a 100644 --- a/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp +++ b/thirdparty/bullet/BulletCollision/CollisionDispatch/btInternalEdgeUtility.cpp @@ -361,7 +361,13 @@ void btGenerateInternalEdgeInfo(btBvhTriangleMeshShape* trimeshShape, btTriangle for (int j = 2; j >= 0; j--) { - int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j]; + int graphicsindex; + switch (indicestype) { + case PHY_INTEGER: graphicsindex = gfxbase[j]; break; + case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; + case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; + default: btAssert(0); + } if (type == PHY_FLOAT) { float* graphicsbase = (float*)(vertexbase + graphicsindex * stride); diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp index d663b3d6d6..c66ce58e3e 100644 --- a/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp +++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btBvhTriangleMeshShape.cpp @@ -124,12 +124,17 @@ void btBvhTriangleMeshShape::performRaycast(btTriangleCallback* callback, const nodeSubPart); unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride); - btAssert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT); const btVector3& meshScaling = m_meshInterface->getScaling(); for (int j = 2; j >= 0; j--) { - int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j]; + int graphicsindex; + switch (indicestype) { + case PHY_INTEGER: graphicsindex = gfxbase[j]; break; + case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; + case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; + default: btAssert(0); + } if (type == PHY_FLOAT) { @@ -193,12 +198,17 @@ void btBvhTriangleMeshShape::performConvexcast(btTriangleCallback* callback, con nodeSubPart); unsigned int* gfxbase = (unsigned int*)(indexbase + nodeTriangleIndex * indexstride); - btAssert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT); const btVector3& meshScaling = m_meshInterface->getScaling(); for (int j = 2; j >= 0; j--) { - int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j]; + int graphicsindex; + switch (indicestype) { + case PHY_INTEGER: graphicsindex = gfxbase[j]; break; + case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; + case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; + default: btAssert(0); + } if (type == PHY_FLOAT) { diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h b/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h index c80e105a4d..16f9e0c77a 100644 --- a/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h +++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btCollisionShape.h @@ -30,11 +30,12 @@ protected: int m_shapeType; void* m_userPointer; int m_userIndex; + int m_userIndex2; public: BT_DECLARE_ALIGNED_ALLOCATOR(); - btCollisionShape() : m_shapeType(INVALID_SHAPE_PROXYTYPE), m_userPointer(0), m_userIndex(-1) + btCollisionShape() : m_shapeType(INVALID_SHAPE_PROXYTYPE), m_userPointer(0), m_userIndex(-1), m_userIndex2(-1) { } @@ -137,6 +138,16 @@ public: return m_userIndex; } + void setUserIndex2(int index) + { + m_userIndex2 = index; + } + + int getUserIndex2() const + { + return m_userIndex2; + } + virtual int calculateSerializeBufferSize() const; ///fills the dataBuffer and returns the struct name (and 0 on failure) diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp index 34e7926f17..cab6980b65 100644 --- a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp +++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.cpp @@ -21,8 +21,7 @@ btHeightfieldTerrainShape::btHeightfieldTerrainShape( int heightStickWidth, int heightStickLength, const void* heightfieldData, btScalar heightScale, btScalar minHeight, btScalar maxHeight, int upAxis, PHY_ScalarType hdt, bool flipQuadEdges) - :m_userIndex2(-1), - m_userValue3(0), + :m_userValue3(0), m_triangleInfoMap(0) { initialize(heightStickWidth, heightStickLength, heightfieldData, @@ -31,8 +30,7 @@ btHeightfieldTerrainShape::btHeightfieldTerrainShape( } btHeightfieldTerrainShape::btHeightfieldTerrainShape(int heightStickWidth, int heightStickLength, const void* heightfieldData, btScalar maxHeight, int upAxis, bool useFloatData, bool flipQuadEdges) - :m_userIndex2(-1), - m_userValue3(0), + : m_userValue3(0), m_triangleInfoMap(0) { // legacy constructor: support only float or unsigned char, diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h index 8dea98fc6b..2cf3c00721 100644 --- a/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h +++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btHeightfieldTerrainShape.h @@ -114,7 +114,7 @@ protected: int m_vboundsGridLength; int m_vboundsChunkSize; - int m_userIndex2; + btScalar m_userValue3; struct btTriangleInfoMap* m_triangleInfoMap; @@ -192,14 +192,6 @@ public: virtual const char* getName() const { return "HEIGHTFIELD"; } - void setUserIndex2(int index) - { - m_userIndex2 = index; - } - int getUserIndex2() const - { - return m_userIndex2; - } void setUserValue3(btScalar value) { m_userValue3 = value; diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp index 687399e0a9..863ea6d6ac 100644 --- a/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp +++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btOptimizedBvh.cpp @@ -286,7 +286,6 @@ void btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface, int meshInterface->getLockedReadOnlyVertexIndexBase(&vertexbase, numverts, type, stride, &indexbase, indexstride, numfaces, indicestype, nodeSubPart); curNodeSubPart = nodeSubPart; - btAssert(indicestype == PHY_INTEGER || indicestype == PHY_SHORT); } //triangles->getLockedReadOnlyVertexIndexBase(vertexBase,numVerts, @@ -294,7 +293,13 @@ void btOptimizedBvh::updateBvhNodes(btStridingMeshInterface* meshInterface, int for (int j = 2; j >= 0; j--) { - int graphicsindex = indicestype == PHY_SHORT ? ((unsigned short*)gfxbase)[j] : gfxbase[j]; + int graphicsindex; + switch (indicestype) { + case PHY_INTEGER: graphicsindex = gfxbase[j]; break; + case PHY_SHORT: graphicsindex = ((unsigned short*)gfxbase)[j]; break; + case PHY_UCHAR: graphicsindex = ((unsigned char*)gfxbase)[j]; break; + default: btAssert(0); + } if (type == PHY_FLOAT) { float* graphicsbase = (float*)(vertexbase + graphicsindex * stride); diff --git a/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp b/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp index 4a95dbea4f..23c95ad3ff 100644 --- a/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp +++ b/thirdparty/bullet/BulletCollision/CollisionShapes/btSdfCollisionShape.cpp @@ -2,8 +2,11 @@ #include "btMiniSDF.h" #include "LinearMath/btAabbUtil2.h" -struct btSdfCollisionShapeInternalData +ATTRIBUTE_ALIGNED16(struct) +btSdfCollisionShapeInternalData { + BT_DECLARE_ALIGNED_ALLOCATOR(); + btVector3 m_localScaling; btScalar m_margin; btMiniSDF m_sdf; diff --git a/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h b/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h index 5b85e87041..cc91079579 100644 --- a/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h +++ b/thirdparty/bullet/BulletCollision/Gimpact/btGImpactShape.h @@ -623,13 +623,21 @@ public: i1 = s_indices[1]; i2 = s_indices[2]; } - else + else if (indicestype == PHY_INTEGER) { unsigned int* i_indices = (unsigned int*)(indexbase + face_index * indexstride); i0 = i_indices[0]; i1 = i_indices[1]; i2 = i_indices[2]; } + else + { + btAssert(indicestype == PHY_UCHAR); + unsigned char* i_indices = (unsigned char*)(indexbase + face_index * indexstride); + i0 = i_indices[0]; + i1 = i_indices[1]; + i2 = i_indices[2]; + } } SIMD_FORCE_INLINE void get_vertex(unsigned int vertex_index, btVector3& vertex) const diff --git a/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp b/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp index 45d1817135..7d53f8624a 100644 --- a/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp +++ b/thirdparty/bullet/BulletCollision/NarrowPhaseCollision/btGjkEpa2.cpp @@ -1049,7 +1049,8 @@ btScalar btGjkEpaSolver2::SignedDistance(const btVector3& position, const btScalar length = delta.length(); results.normal = delta / length; results.witnesses[0] += results.normal * margin; - return (length - margin); + results.distance = length - margin; + return results.distance; } else { diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp index 27f76b8425..0f5ed1c2ce 100644 --- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp +++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btBatchedConstraints.cpp @@ -852,7 +852,7 @@ static void setupSpatialGridBatchesMt( memHelper.addChunk((void**)&constraintRowBatchIds, sizeof(int) * numConstraintRows); size_t scratchSize = memHelper.getSizeToAllocate(); // if we need to reallocate - if (scratchMemory->capacity() < scratchSize) + if (static_cast<size_t>(scratchMemory->capacity()) < scratchSize) { // allocate 6.25% extra to avoid repeated reallocs scratchMemory->reserve(scratchSize + scratchSize / 16); diff --git a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h index 4356c12abf..3316403a87 100644 --- a/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h +++ b/thirdparty/bullet/BulletDynamics/ConstraintSolver/btContactSolverInfo.h @@ -47,6 +47,8 @@ struct btContactSolverInfoData btScalar m_erp; //error reduction for non-contact constraints btScalar m_erp2; //error reduction for contact constraints btScalar m_deformable_erp; //error reduction for deformable constraints + btScalar m_deformable_cfm; //constraint force mixing for deformable constraints + btScalar m_deformable_maxErrorReduction; // maxErrorReduction for deformable contact btScalar m_globalCfm; //constraint force mixing for contacts and non-contacts btScalar m_frictionERP; //error reduction for friction constraints btScalar m_frictionCFM; //constraint force mixing for friction constraints @@ -83,7 +85,9 @@ struct btContactSolverInfo : public btContactSolverInfoData m_numIterations = 10; m_erp = btScalar(0.2); m_erp2 = btScalar(0.2); - m_deformable_erp = btScalar(0.1); + m_deformable_erp = btScalar(0.06); + m_deformable_cfm = btScalar(0.01); + m_deformable_maxErrorReduction = btScalar(0.1); m_globalCfm = btScalar(0.); m_frictionERP = btScalar(0.2); //positional friction 'anchors' are disabled by default m_frictionCFM = btScalar(0.); diff --git a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h index 943d724cce..7442dd1e6a 100644 --- a/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h +++ b/thirdparty/bullet/BulletDynamics/Dynamics/btRigidBody.h @@ -356,12 +356,12 @@ public: } } - btVector3 getPushVelocity() + btVector3 getPushVelocity() const { return m_pushVelocity; } - btVector3 getTurnVelocity() + btVector3 getTurnVelocity() const { return m_turnVelocity; } @@ -465,6 +465,12 @@ public: //for kinematic objects, we could also use use: // return (m_worldTransform(rel_pos) - m_interpolationWorldTransform(rel_pos)) / m_kinematicTimeStep; } + + btVector3 getPushVelocityInLocalPoint(const btVector3& rel_pos) const + { + //we also calculate lin/ang velocity for kinematic objects + return m_pushVelocity + m_turnVelocity.cross(rel_pos); + } void translate(const btVector3& v) { diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp index a1d5bb9ca8..bec8c6530d 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.cpp @@ -344,6 +344,8 @@ void btMultiBody::finalizeMultiDof() { m_deltaV.resize(0); m_deltaV.resize(6 + m_dofCount); + m_splitV.resize(0); + m_splitV.resize(6 + m_dofCount); m_realBuf.resize(6 + m_dofCount + m_dofCount * m_dofCount + 6 + m_dofCount); //m_dofCount for joint-space vels + m_dofCount^2 for "D" matrices + delta-pos vector (6 base "vels" + joint "vels") m_vectorBuf.resize(2 * m_dofCount); //two 3-vectors (i.e. one six-vector) for each system dof ("h" matrices) m_matrixBuf.resize(m_links.size() + 1); @@ -671,6 +673,30 @@ btScalar *btMultiBody::getJointTorqueMultiDof(int i) return &m_links[i].m_jointTorque[0]; } +bool btMultiBody::hasFixedBase() const +{ + return m_fixedBase || (getBaseCollider() && getBaseCollider()->isStaticObject()); +} + +bool btMultiBody::isBaseStaticOrKinematic() const +{ + return m_fixedBase || (getBaseCollider() && getBaseCollider()->isStaticOrKinematicObject()); +} + +bool btMultiBody::isBaseKinematic() const +{ + return getBaseCollider() && getBaseCollider()->isKinematicObject(); +} + +void btMultiBody::setBaseDynamicType(int dynamicType) +{ + if(getBaseCollider()) { + int oldFlags = getBaseCollider()->getCollisionFlags(); + oldFlags &= ~(btCollisionObject::CF_STATIC_OBJECT | btCollisionObject::CF_KINEMATIC_OBJECT); + getBaseCollider()->setCollisionFlags(oldFlags | dynamicType); + } +} + inline btMatrix3x3 outerProduct(const btVector3 &v0, const btVector3 &v1) //renamed it from vecMulVecTranspose (http://en.wikipedia.org/wiki/Outer_product); maybe it should be moved to btVector3 like dot and cross? { btVector3 row0 = btVector3( @@ -796,7 +822,7 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar //create the vector of spatial velocity of the base by transforming global-coor linear and angular velocities into base-local coordinates spatVel[0].setVector(rot_from_parent[0] * base_omega, rot_from_parent[0] * base_vel); - if (m_fixedBase) + if (isBaseStaticOrKinematic()) { zeroAccSpatFrc[0].setZero(); } @@ -872,31 +898,53 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar // calculate zhat_i^A // - //external forces - btVector3 linkAppliedForce = isConstraintPass ? m_links[i].m_appliedConstraintForce : m_links[i].m_appliedForce; - btVector3 linkAppliedTorque = isConstraintPass ? m_links[i].m_appliedConstraintTorque : m_links[i].m_appliedTorque; + if (isLinkAndAllAncestorsKinematic(i)) + { + zeroAccSpatFrc[i].setZero(); + } + else{ + //external forces + btVector3 linkAppliedForce = isConstraintPass ? m_links[i].m_appliedConstraintForce : m_links[i].m_appliedForce; + btVector3 linkAppliedTorque = isConstraintPass ? m_links[i].m_appliedConstraintTorque : m_links[i].m_appliedTorque; - zeroAccSpatFrc[i + 1].setVector(-(rot_from_world[i + 1] * linkAppliedTorque), -(rot_from_world[i + 1] * linkAppliedForce)); + zeroAccSpatFrc[i + 1].setVector(-(rot_from_world[i + 1] * linkAppliedTorque), -(rot_from_world[i + 1] * linkAppliedForce)); #if 0 - { + { - b3Printf("stepVelocitiesMultiDof zeroAccSpatFrc[%d] linear:%f,%f,%f, angular:%f,%f,%f", - i+1, - zeroAccSpatFrc[i+1].m_topVec[0], - zeroAccSpatFrc[i+1].m_topVec[1], - zeroAccSpatFrc[i+1].m_topVec[2], + b3Printf("stepVelocitiesMultiDof zeroAccSpatFrc[%d] linear:%f,%f,%f, angular:%f,%f,%f", + i+1, + zeroAccSpatFrc[i+1].m_topVec[0], + zeroAccSpatFrc[i+1].m_topVec[1], + zeroAccSpatFrc[i+1].m_topVec[2], - zeroAccSpatFrc[i+1].m_bottomVec[0], - zeroAccSpatFrc[i+1].m_bottomVec[1], - zeroAccSpatFrc[i+1].m_bottomVec[2]); - } + zeroAccSpatFrc[i+1].m_bottomVec[0], + zeroAccSpatFrc[i+1].m_bottomVec[1], + zeroAccSpatFrc[i+1].m_bottomVec[2]); + } #endif - // - //adding damping terms (only) - btScalar linDampMult = 1., angDampMult = 1.; - zeroAccSpatFrc[i + 1].addVector(angDampMult * m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular() * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR * spatVel[i + 1].getAngular().safeNorm()), - linDampMult * m_links[i].m_mass * spatVel[i + 1].getLinear() * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR * spatVel[i + 1].getLinear().safeNorm())); + // + //adding damping terms (only) + btScalar linDampMult = 1., angDampMult = 1.; + zeroAccSpatFrc[i + 1].addVector(angDampMult * m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular() * (DAMPING_K1_ANGULAR + DAMPING_K2_ANGULAR * spatVel[i + 1].getAngular().safeNorm()), + linDampMult * m_links[i].m_mass * spatVel[i + 1].getLinear() * (DAMPING_K1_LINEAR + DAMPING_K2_LINEAR * spatVel[i + 1].getLinear().safeNorm())); + //p += vhat x Ihat vhat - done in a simpler way + if (m_useGyroTerm) + zeroAccSpatFrc[i + 1].addAngular(spatVel[i + 1].getAngular().cross(m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular())); + // + zeroAccSpatFrc[i + 1].addLinear(m_links[i].m_mass * spatVel[i + 1].getAngular().cross(spatVel[i + 1].getLinear())); + // + //btVector3 temp = m_links[i].m_mass * spatVel[i+1].getAngular().cross(spatVel[i+1].getLinear()); + ////clamp parent's omega + //btScalar parOmegaMod = temp.length(); + //btScalar parOmegaModMax = 1000; + //if(parOmegaMod > parOmegaModMax) + // temp *= parOmegaModMax / parOmegaMod; + //zeroAccSpatFrc[i+1].addLinear(temp); + //printf("|zeroAccSpatFrc[%d]| = %.4f\n", i+1, temp.length()); + //temp = spatCoriolisAcc[i].getLinear(); + //printf("|spatCoriolisAcc[%d]| = %.4f\n", i+1, temp.length()); + } // calculate Ihat_i^A //init the spatial AB inertia (it has the simple form thanks to choosing local body frames origins at their COMs) @@ -909,22 +957,6 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar btMatrix3x3(m_links[i].m_inertiaLocal[0], 0, 0, 0, m_links[i].m_inertiaLocal[1], 0, 0, 0, m_links[i].m_inertiaLocal[2])); - // - //p += vhat x Ihat vhat - done in a simpler way - if (m_useGyroTerm) - zeroAccSpatFrc[i + 1].addAngular(spatVel[i + 1].getAngular().cross(m_links[i].m_inertiaLocal * spatVel[i + 1].getAngular())); - // - zeroAccSpatFrc[i + 1].addLinear(m_links[i].m_mass * spatVel[i + 1].getAngular().cross(spatVel[i + 1].getLinear())); - //btVector3 temp = m_links[i].m_mass * spatVel[i+1].getAngular().cross(spatVel[i+1].getLinear()); - ////clamp parent's omega - //btScalar parOmegaMod = temp.length(); - //btScalar parOmegaModMax = 1000; - //if(parOmegaMod > parOmegaModMax) - // temp *= parOmegaModMax / parOmegaMod; - //zeroAccSpatFrc[i+1].addLinear(temp); - //printf("|zeroAccSpatFrc[%d]| = %.4f\n", i+1, temp.length()); - //temp = spatCoriolisAcc[i].getLinear(); - //printf("|spatCoriolisAcc[%d]| = %.4f\n", i+1, temp.length()); //printf("w[%d] = [%.4f %.4f %.4f]\n", i, vel_top_angular[i+1].x(), vel_top_angular[i+1].y(), vel_top_angular[i+1].z()); //printf("v[%d] = [%.4f %.4f %.4f]\n", i, vel_bottom_linear[i+1].x(), vel_bottom_linear[i+1].y(), vel_bottom_linear[i+1].z()); @@ -935,6 +967,8 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar // (part of TreeForwardDynamics in Mirtich.) for (int i = num_links - 1; i >= 0; --i) { + if(isLinkAndAllAncestorsKinematic(i)) + continue; const int parent = m_links[i].m_parent; fromParent.m_rotMat = rot_from_parent[i + 1]; fromParent.m_trnVec = m_links[i].m_cachedRVector; @@ -1047,7 +1081,7 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar // Second 'upward' loop // (part of TreeForwardDynamics in Mirtich) - if (m_fixedBase) + if (isBaseStaticOrKinematic()) { spatAcc[0].setZero(); } @@ -1081,21 +1115,23 @@ void btMultiBody::computeAccelerationsArticulatedBodyAlgorithmMultiDof(btScalar fromParent.transform(spatAcc[parent + 1], spatAcc[i + 1]); - for (int dof = 0; dof < m_links[i].m_dofCount; ++dof) + if(!isLinkAndAllAncestorsKinematic(i)) { - const btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof]; - // - Y_minus_hT_a[dof] = Y[m_links[i].m_dofOffset + dof] - spatAcc[i + 1].dot(hDof); - } - - btScalar *invDi = &invD[m_links[i].m_dofOffset * m_links[i].m_dofOffset]; - //D^{-1} * (Y - h^{T}*apar) - mulMatrix(invDi, Y_minus_hT_a, m_links[i].m_dofCount, m_links[i].m_dofCount, m_links[i].m_dofCount, 1, &joint_accel[m_links[i].m_dofOffset]); + for (int dof = 0; dof < m_links[i].m_dofCount; ++dof) + { + const btSpatialForceVector &hDof = h[m_links[i].m_dofOffset + dof]; + // + Y_minus_hT_a[dof] = Y[m_links[i].m_dofOffset + dof] - spatAcc[i + 1].dot(hDof); + } + btScalar *invDi = &invD[m_links[i].m_dofOffset * m_links[i].m_dofOffset]; + //D^{-1} * (Y - h^{T}*apar) + mulMatrix(invDi, Y_minus_hT_a, m_links[i].m_dofCount, m_links[i].m_dofCount, m_links[i].m_dofCount, 1, &joint_accel[m_links[i].m_dofOffset]); - spatAcc[i + 1] += spatCoriolisAcc[i]; + spatAcc[i + 1] += spatCoriolisAcc[i]; - for (int dof = 0; dof < m_links[i].m_dofCount; ++dof) - spatAcc[i + 1] += m_links[i].m_axes[dof] * joint_accel[m_links[i].m_dofOffset + dof]; + for (int dof = 0; dof < m_links[i].m_dofCount; ++dof) + spatAcc[i + 1] += m_links[i].m_axes[dof] * joint_accel[m_links[i].m_dofOffset + dof]; + } if (m_links[i].m_jointFeedback) { @@ -1432,7 +1468,7 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar // Fill in zero_acc // -- set to force/torque on the base, zero otherwise - if (m_fixedBase) + if (isBaseStaticOrKinematic()) { zeroAccSpatFrc[0].setZero(); } @@ -1451,6 +1487,8 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar // (part of TreeForwardDynamics in Mirtich.) for (int i = num_links - 1; i >= 0; --i) { + if(isLinkAndAllAncestorsKinematic(i)) + continue; const int parent = m_links[i].m_parent; fromParent.m_rotMat = rot_from_parent[i + 1]; fromParent.m_trnVec = m_links[i].m_cachedRVector; @@ -1494,7 +1532,7 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar // Second 'upward' loop // (part of TreeForwardDynamics in Mirtich) - if (m_fixedBase) + if (isBaseStaticOrKinematic()) { spatAcc[0].setZero(); } @@ -1507,6 +1545,8 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar // now do the loop over the m_links for (int i = 0; i < num_links; ++i) { + if(isLinkAndAllAncestorsKinematic(i)) + continue; const int parent = m_links[i].m_parent; fromParent.m_rotMat = rot_from_parent[i + 1]; fromParent.m_trnVec = m_links[i].m_cachedRVector; @@ -1550,23 +1590,26 @@ void btMultiBody::calcAccelerationDeltasMultiDof(const btScalar *force, btScalar void btMultiBody::predictPositionsMultiDof(btScalar dt) { int num_links = getNumLinks(); - // step position by adding dt * velocity - //btVector3 v = getBaseVel(); - //m_basePos += dt * v; - // - btScalar *pBasePos; - btScalar *pBaseVel = &m_realBuf[3]; //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety) - - // reset to current position - for (int i = 0; i < 3; ++i) - { - m_basePos_interpolate[i] = m_basePos[i]; - } - pBasePos = m_basePos_interpolate; + if(!isBaseKinematic()) + { + // step position by adding dt * velocity + //btVector3 v = getBaseVel(); + //m_basePos += dt * v; + // + btScalar *pBasePos; + btScalar *pBaseVel = &m_realBuf[3]; //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety) - pBasePos[0] += dt * pBaseVel[0]; - pBasePos[1] += dt * pBaseVel[1]; - pBasePos[2] += dt * pBaseVel[2]; + // reset to current position + for (int i = 0; i < 3; ++i) + { + m_basePos_interpolate[i] = m_basePos[i]; + } + pBasePos = m_basePos_interpolate; + + pBasePos[0] += dt * pBaseVel[0]; + pBasePos[1] += dt * pBaseVel[1]; + pBasePos[2] += dt * pBaseVel[2]; + } /////////////////////////////// //local functor for quaternion integration (to avoid error prone redundancy) @@ -1617,26 +1660,29 @@ void btMultiBody::predictPositionsMultiDof(btScalar dt) //pQuatUpdateFun(getBaseOmega(), m_baseQuat, true, dt); // - btScalar *pBaseQuat; - - // reset to current orientation - for (int i = 0; i < 4; ++i) - { - m_baseQuat_interpolate[i] = m_baseQuat[i]; - } - pBaseQuat = m_baseQuat_interpolate; + if(!isBaseKinematic()) + { + btScalar *pBaseQuat; - btScalar *pBaseOmega = &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety) - // - btQuaternion baseQuat; - baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]); - btVector3 baseOmega; - baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]); - pQuatUpdateFun(baseOmega, baseQuat, true, dt); - pBaseQuat[0] = baseQuat.x(); - pBaseQuat[1] = baseQuat.y(); - pBaseQuat[2] = baseQuat.z(); - pBaseQuat[3] = baseQuat.w(); + // reset to current orientation + for (int i = 0; i < 4; ++i) + { + m_baseQuat_interpolate[i] = m_baseQuat[i]; + } + pBaseQuat = m_baseQuat_interpolate; + + btScalar *pBaseOmega = &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety) + // + btQuaternion baseQuat; + baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]); + btVector3 baseOmega; + baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]); + pQuatUpdateFun(baseOmega, baseQuat, true, dt); + pBaseQuat[0] = baseQuat.x(); + pBaseQuat[1] = baseQuat.y(); + pBaseQuat[2] = baseQuat.z(); + pBaseQuat[3] = baseQuat.w(); + } // Finally we can update m_jointPos for each of the m_links for (int i = 0; i < num_links; ++i) @@ -1644,55 +1690,88 @@ void btMultiBody::predictPositionsMultiDof(btScalar dt) btScalar *pJointPos; pJointPos = &m_links[i].m_jointPos_interpolate[0]; - btScalar *pJointVel = getJointVelMultiDof(i); - - switch (m_links[i].m_jointType) - { - case btMultibodyLink::ePrismatic: - case btMultibodyLink::eRevolute: - { - //reset to current pos - pJointPos[0] = m_links[i].m_jointPos[0]; - btScalar jointVel = pJointVel[0]; - pJointPos[0] += dt * jointVel; - break; - } - case btMultibodyLink::eSpherical: - { - //reset to current pos - - for (int j = 0; j < 4; ++j) + if (m_links[i].m_collider && m_links[i].m_collider->isStaticOrKinematic()) + { + switch (m_links[i].m_jointType) + { + case btMultibodyLink::ePrismatic: + case btMultibodyLink::eRevolute: { - pJointPos[j] = m_links[i].m_jointPos[j]; + pJointPos[0] = m_links[i].m_jointPos[0]; + break; } - - btVector3 jointVel; - jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]); - btQuaternion jointOri; - jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]); - pQuatUpdateFun(jointVel, jointOri, false, dt); - pJointPos[0] = jointOri.x(); - pJointPos[1] = jointOri.y(); - pJointPos[2] = jointOri.z(); - pJointPos[3] = jointOri.w(); - break; - } - case btMultibodyLink::ePlanar: - { - for (int j = 0; j < 3; ++j) + case btMultibodyLink::eSpherical: { - pJointPos[j] = m_links[i].m_jointPos[j]; + for (int j = 0; j < 4; ++j) + { + pJointPos[j] = m_links[i].m_jointPos[j]; + } + break; } - pJointPos[0] += dt * getJointVelMultiDof(i)[0]; - - btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2); - btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2); - pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt; - pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt; - break; + case btMultibodyLink::ePlanar: + { + for (int j = 0; j < 3; ++j) + { + pJointPos[j] = m_links[i].m_jointPos[j]; + } + break; + } + default: + break; } - default: + } + else + { + btScalar *pJointVel = getJointVelMultiDof(i); + + switch (m_links[i].m_jointType) { + case btMultibodyLink::ePrismatic: + case btMultibodyLink::eRevolute: + { + //reset to current pos + pJointPos[0] = m_links[i].m_jointPos[0]; + btScalar jointVel = pJointVel[0]; + pJointPos[0] += dt * jointVel; + break; + } + case btMultibodyLink::eSpherical: + { + //reset to current pos + + for (int j = 0; j < 4; ++j) + { + pJointPos[j] = m_links[i].m_jointPos[j]; + } + + btVector3 jointVel; + jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]); + btQuaternion jointOri; + jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]); + pQuatUpdateFun(jointVel, jointOri, false, dt); + pJointPos[0] = jointOri.x(); + pJointPos[1] = jointOri.y(); + pJointPos[2] = jointOri.z(); + pJointPos[3] = jointOri.w(); + break; + } + case btMultibodyLink::ePlanar: + { + for (int j = 0; j < 3; ++j) + { + pJointPos[j] = m_links[i].m_jointPos[j]; + } + pJointPos[0] += dt * getJointVelMultiDof(i)[0]; + + btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2); + btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2); + pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt; + pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt; + break; + } + default: + { + } } } @@ -1703,16 +1782,19 @@ void btMultiBody::predictPositionsMultiDof(btScalar dt) void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd) { int num_links = getNumLinks(); - // step position by adding dt * velocity - //btVector3 v = getBaseVel(); - //m_basePos += dt * v; - // - btScalar *pBasePos = (pq ? &pq[4] : m_basePos); - btScalar *pBaseVel = (pqd ? &pqd[3] : &m_realBuf[3]); //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety) - - pBasePos[0] += dt * pBaseVel[0]; - pBasePos[1] += dt * pBaseVel[1]; - pBasePos[2] += dt * pBaseVel[2]; + if(!isBaseKinematic()) + { + // step position by adding dt * velocity + //btVector3 v = getBaseVel(); + //m_basePos += dt * v; + // + btScalar *pBasePos = (pq ? &pq[4] : m_basePos); + btScalar *pBaseVel = (pqd ? &pqd[3] : &m_realBuf[3]); //note: the !pqd case assumes m_realBuf holds with base velocity at 3,4,5 (should be wrapped for safety) + + pBasePos[0] += dt * pBaseVel[0]; + pBasePos[1] += dt * pBaseVel[1]; + pBasePos[2] += dt * pBaseVel[2]; + } /////////////////////////////// //local functor for quaternion integration (to avoid error prone redundancy) @@ -1763,22 +1845,25 @@ void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd //pQuatUpdateFun(getBaseOmega(), m_baseQuat, true, dt); // - btScalar *pBaseQuat = pq ? pq : m_baseQuat; - btScalar *pBaseOmega = pqd ? pqd : &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety) - // - btQuaternion baseQuat; - baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]); - btVector3 baseOmega; - baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]); - pQuatUpdateFun(baseOmega, baseQuat, true, dt); - pBaseQuat[0] = baseQuat.x(); - pBaseQuat[1] = baseQuat.y(); - pBaseQuat[2] = baseQuat.z(); - pBaseQuat[3] = baseQuat.w(); - - //printf("pBaseOmega = %.4f %.4f %.4f\n", pBaseOmega->x(), pBaseOmega->y(), pBaseOmega->z()); - //printf("pBaseVel = %.4f %.4f %.4f\n", pBaseVel->x(), pBaseVel->y(), pBaseVel->z()); - //printf("baseQuat = %.4f %.4f %.4f %.4f\n", pBaseQuat->x(), pBaseQuat->y(), pBaseQuat->z(), pBaseQuat->w()); + if(!isBaseKinematic()) + { + btScalar *pBaseQuat = pq ? pq : m_baseQuat; + btScalar *pBaseOmega = pqd ? pqd : &m_realBuf[0]; //note: the !pqd case assumes m_realBuf starts with base omega (should be wrapped for safety) + // + btQuaternion baseQuat; + baseQuat.setValue(pBaseQuat[0], pBaseQuat[1], pBaseQuat[2], pBaseQuat[3]); + btVector3 baseOmega; + baseOmega.setValue(pBaseOmega[0], pBaseOmega[1], pBaseOmega[2]); + pQuatUpdateFun(baseOmega, baseQuat, true, dt); + pBaseQuat[0] = baseQuat.x(); + pBaseQuat[1] = baseQuat.y(); + pBaseQuat[2] = baseQuat.z(); + pBaseQuat[3] = baseQuat.w(); + + //printf("pBaseOmega = %.4f %.4f %.4f\n", pBaseOmega->x(), pBaseOmega->y(), pBaseOmega->z()); + //printf("pBaseVel = %.4f %.4f %.4f\n", pBaseVel->x(), pBaseVel->y(), pBaseVel->z()); + //printf("baseQuat = %.4f %.4f %.4f %.4f\n", pBaseQuat->x(), pBaseQuat->y(), pBaseQuat->z(), pBaseQuat->w()); + } if (pq) pq += 7; @@ -1788,48 +1873,51 @@ void btMultiBody::stepPositionsMultiDof(btScalar dt, btScalar *pq, btScalar *pqd // Finally we can update m_jointPos for each of the m_links for (int i = 0; i < num_links; ++i) { - btScalar *pJointPos; - pJointPos= (pq ? pq : &m_links[i].m_jointPos[0]); - - btScalar *pJointVel = (pqd ? pqd : getJointVelMultiDof(i)); - - switch (m_links[i].m_jointType) + if (!(m_links[i].m_collider && m_links[i].m_collider->isStaticOrKinematic())) { - case btMultibodyLink::ePrismatic: - case btMultibodyLink::eRevolute: - { - //reset to current pos - btScalar jointVel = pJointVel[0]; - pJointPos[0] += dt * jointVel; - break; - } - case btMultibodyLink::eSpherical: - { - //reset to current pos - btVector3 jointVel; - jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]); - btQuaternion jointOri; - jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]); - pQuatUpdateFun(jointVel, jointOri, false, dt); - pJointPos[0] = jointOri.x(); - pJointPos[1] = jointOri.y(); - pJointPos[2] = jointOri.z(); - pJointPos[3] = jointOri.w(); - break; - } - case btMultibodyLink::ePlanar: + btScalar *pJointPos; + pJointPos= (pq ? pq : &m_links[i].m_jointPos[0]); + + btScalar *pJointVel = (pqd ? pqd : getJointVelMultiDof(i)); + + switch (m_links[i].m_jointType) { - pJointPos[0] += dt * getJointVelMultiDof(i)[0]; + case btMultibodyLink::ePrismatic: + case btMultibodyLink::eRevolute: + { + //reset to current pos + btScalar jointVel = pJointVel[0]; + pJointPos[0] += dt * jointVel; + break; + } + case btMultibodyLink::eSpherical: + { + //reset to current pos + btVector3 jointVel; + jointVel.setValue(pJointVel[0], pJointVel[1], pJointVel[2]); + btQuaternion jointOri; + jointOri.setValue(pJointPos[0], pJointPos[1], pJointPos[2], pJointPos[3]); + pQuatUpdateFun(jointVel, jointOri, false, dt); + pJointPos[0] = jointOri.x(); + pJointPos[1] = jointOri.y(); + pJointPos[2] = jointOri.z(); + pJointPos[3] = jointOri.w(); + break; + } + case btMultibodyLink::ePlanar: + { + pJointPos[0] += dt * getJointVelMultiDof(i)[0]; - btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2); - btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2); - pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt; - pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt; + btVector3 q0_coors_qd1qd2 = getJointVelMultiDof(i)[1] * m_links[i].getAxisBottom(1) + getJointVelMultiDof(i)[2] * m_links[i].getAxisBottom(2); + btVector3 no_q0_coors_qd1qd2 = quatRotate(btQuaternion(m_links[i].getAxisTop(0), pJointPos[0]), q0_coors_qd1qd2); + pJointPos[1] += m_links[i].getAxisBottom(1).dot(no_q0_coors_qd1qd2) * dt; + pJointPos[2] += m_links[i].getAxisBottom(2).dot(no_q0_coors_qd1qd2) * dt; - break; - } - default: - { + break; + } + default: + { + } } } @@ -2135,8 +2223,15 @@ void btMultiBody::updateCollisionObjectInterpolationWorldTransforms(btAlignedObj world_to_local.resize(getNumLinks() + 1); local_origin.resize(getNumLinks() + 1); - world_to_local[0] = getInterpolateWorldToBaseRot(); - local_origin[0] = getInterpolateBasePos(); + if(isBaseKinematic()){ + world_to_local[0] = getWorldToBaseRot(); + local_origin[0] = getBasePos(); + } + else + { + world_to_local[0] = getInterpolateWorldToBaseRot(); + local_origin[0] = getInterpolateBasePos(); + } if (getBaseCollider()) { @@ -2282,3 +2377,81 @@ const char *btMultiBody::serialize(void *dataBuffer, class btSerializer *seriali return btMultiBodyDataName; } + +void btMultiBody::saveKinematicState(btScalar timeStep) +{ + //todo: clamp to some (user definable) safe minimum timestep, to limit maximum angular/linear velocities + if (timeStep != btScalar(0.)) + { + btVector3 linearVelocity, angularVelocity; + btTransformUtil::calculateVelocity(getInterpolateBaseWorldTransform(), getBaseWorldTransform(), timeStep, linearVelocity, angularVelocity); + setBaseVel(linearVelocity); + setBaseOmega(angularVelocity); + setInterpolateBaseWorldTransform(getBaseWorldTransform()); + } +} + +void btMultiBody::setLinkDynamicType(const int i, int type) +{ + if (i == -1) + { + setBaseDynamicType(type); + } + else if (i >= 0 && i < getNumLinks()) + { + if (m_links[i].m_collider) + { + m_links[i].m_collider->setDynamicType(type); + } + } +} + +bool btMultiBody::isLinkStaticOrKinematic(const int i) const +{ + if (i == -1) + { + return isBaseStaticOrKinematic(); + } + else + { + if (m_links[i].m_collider) + return m_links[i].m_collider->isStaticOrKinematic(); + } + return false; +} + +bool btMultiBody::isLinkKinematic(const int i) const +{ + if (i == -1) + { + return isBaseKinematic(); + } + else + { + if (m_links[i].m_collider) + return m_links[i].m_collider->isKinematic(); + } + return false; +} + +bool btMultiBody::isLinkAndAllAncestorsStaticOrKinematic(const int i) const +{ + int link = i; + while (link != -1) { + if (!isLinkStaticOrKinematic(link)) + return false; + link = m_links[link].m_parent; + } + return isBaseStaticOrKinematic(); +} + +bool btMultiBody::isLinkAndAllAncestorsKinematic(const int i) const +{ + int link = i; + while (link != -1) { + if (!isLinkKinematic(link)) + return false; + link = m_links[link].m_parent; + } + return isBaseKinematic(); +} diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h index be795633fd..25112a6805 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBody.h @@ -210,7 +210,13 @@ public: void setBasePos(const btVector3 &pos) { m_basePos = pos; - m_basePos_interpolate = pos; + if(!isBaseKinematic()) + m_basePos_interpolate = pos; + } + + void setInterpolateBasePos(const btVector3 &pos) + { + m_basePos_interpolate = pos; } void setBaseWorldTransform(const btTransform &tr) @@ -227,17 +233,39 @@ public: return tr; } + void setInterpolateBaseWorldTransform(const btTransform &tr) + { + setInterpolateBasePos(tr.getOrigin()); + setInterpolateWorldToBaseRot(tr.getRotation().inverse()); + } + + btTransform getInterpolateBaseWorldTransform() const + { + btTransform tr; + tr.setOrigin(getInterpolateBasePos()); + tr.setRotation(getInterpolateWorldToBaseRot().inverse()); + return tr; + } + void setBaseVel(const btVector3 &vel) { m_realBuf[3] = vel[0]; m_realBuf[4] = vel[1]; m_realBuf[5] = vel[2]; } + void setWorldToBaseRot(const btQuaternion &rot) { m_baseQuat = rot; //m_baseQuat asumed to ba alias!? - m_baseQuat_interpolate = rot; + if(!isBaseKinematic()) + m_baseQuat_interpolate = rot; + } + + void setInterpolateWorldToBaseRot(const btQuaternion &rot) + { + m_baseQuat_interpolate = rot; } + void setBaseOmega(const btVector3 &omega) { m_realBuf[0] = omega[0]; @@ -245,6 +273,8 @@ public: m_realBuf[2] = omega[2]; } + void saveKinematicState(btScalar timeStep); + // // get/set pos/vel for child m_links (i = 0 to num_links-1) // @@ -278,6 +308,11 @@ public: { return &m_deltaV[0]; } + + const btScalar *getSplitVelocityVector() const + { + return &m_splitV[0]; + } /* btScalar * getVelocityVector() { return &real_buf[0]; @@ -397,6 +432,26 @@ public: m_deltaV[dof] += delta_vee[dof] * multiplier; } } + void applyDeltaSplitVeeMultiDof(const btScalar *delta_vee, btScalar multiplier) + { + for (int dof = 0; dof < 6 + getNumDofs(); ++dof) + { + m_splitV[dof] += delta_vee[dof] * multiplier; + } + } + void addSplitV() + { + applyDeltaVeeMultiDof(&m_splitV[0], 1); + } + void substractSplitV() + { + applyDeltaVeeMultiDof(&m_splitV[0], -1); + + for (int dof = 0; dof < 6 + getNumDofs(); ++dof) + { + m_splitV[dof] = 0.f; + } + } void processDeltaVeeMultiDof2() { applyDeltaVeeMultiDof(&m_deltaV[0], 1); @@ -495,14 +550,22 @@ public: void goToSleep(); void checkMotionAndSleepIfRequired(btScalar timestep); - bool hasFixedBase() const - { - return m_fixedBase; - } + bool hasFixedBase() const; + + bool isBaseKinematic() const; + + bool isBaseStaticOrKinematic() const; + + // set the dynamic type in the base's collision flags. + void setBaseDynamicType(int dynamicType); void setFixedBase(bool fixedBase) { m_fixedBase = fixedBase; + if(m_fixedBase) + setBaseDynamicType(btCollisionObject::CF_STATIC_OBJECT); + else + setBaseDynamicType(btCollisionObject::CF_DYNAMIC_OBJECT); } int getCompanionId() const @@ -653,7 +716,15 @@ public: btVector3 &top_out, // top part of output vector btVector3 &bottom_out); // bottom part of output vector + void setLinkDynamicType(const int i, int type); + + bool isLinkStaticOrKinematic(const int i) const; + + bool isLinkKinematic(const int i) const; + + bool isLinkAndAllAncestorsStaticOrKinematic(const int i) const; + bool isLinkAndAllAncestorsKinematic(const int i) const; private: btMultiBody(const btMultiBody &); // not implemented @@ -711,6 +782,7 @@ private: // offset size array // 0 num_links+1 rot_from_parent // + btAlignedObjectArray<btScalar> m_splitV; btAlignedObjectArray<btScalar> m_deltaV; btAlignedObjectArray<btScalar> m_realBuf; btAlignedObjectArray<btVector3> m_vectorBuf; diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp index d7ed05ce57..1ba5861145 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.cpp @@ -2,11 +2,12 @@ #include "BulletDynamics/Dynamics/btRigidBody.h" #include "btMultiBodyPoint2Point.h" //for testing (BTMBP2PCONSTRAINT_BLOCK_ANGULAR_MOTION_TEST macro) -btMultiBodyConstraint::btMultiBodyConstraint(btMultiBody* bodyA, btMultiBody* bodyB, int linkA, int linkB, int numRows, bool isUnilateral) +btMultiBodyConstraint::btMultiBodyConstraint(btMultiBody* bodyA, btMultiBody* bodyB, int linkA, int linkB, int numRows, bool isUnilateral, int type) : m_bodyA(bodyA), m_bodyB(bodyB), m_linkA(linkA), m_linkB(linkB), + m_type(type), m_numRows(numRows), m_jacSizeA(0), m_jacSizeBoth(0), diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h index 5c15f3e851..4a6007ee3e 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyConstraint.h @@ -20,6 +20,21 @@ subject to the following restrictions: #include "LinearMath/btAlignedObjectArray.h" #include "btMultiBody.h" + +//Don't change any of the existing enum values, so add enum types at the end for serialization compatibility +enum btTypedMultiBodyConstraintType +{ + MULTIBODY_CONSTRAINT_LIMIT=3, + MULTIBODY_CONSTRAINT_1DOF_JOINT_MOTOR, + MULTIBODY_CONSTRAINT_GEAR, + MULTIBODY_CONSTRAINT_POINT_TO_POINT, + MULTIBODY_CONSTRAINT_SLIDER, + MULTIBODY_CONSTRAINT_SPHERICAL_MOTOR, + MULTIBODY_CONSTRAINT_FIXED, + + MAX_MULTIBODY_CONSTRAINT_TYPE, +}; + class btMultiBody; struct btSolverInfo; @@ -46,6 +61,8 @@ protected: int m_linkA; int m_linkB; + int m_type; //btTypedMultiBodyConstraintType + int m_numRows; int m_jacSizeA; int m_jacSizeBoth; @@ -82,12 +99,16 @@ protected: public: BT_DECLARE_ALIGNED_ALLOCATOR(); - btMultiBodyConstraint(btMultiBody * bodyA, btMultiBody * bodyB, int linkA, int linkB, int numRows, bool isUnilateral); + btMultiBodyConstraint(btMultiBody * bodyA, btMultiBody * bodyB, int linkA, int linkB, int numRows, bool isUnilateral, int type); virtual ~btMultiBodyConstraint(); void updateJacobianSizes(); void allocateJacobiansMultiDof(); + int getConstraintType() const + { + return m_type; + } //many constraints have setFrameInB/setPivotInB. Will use 'getConstraintType' later. virtual void setFrameInB(const btMatrix3x3& frameInB) {} virtual void setPivotInB(const btVector3& pivotInB) {} diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp index cd1bad089e..fef95f0c4e 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.cpp @@ -592,6 +592,7 @@ void btMultiBodyDynamicsWorld::integrateMultiBodyTransforms(btScalar timeStep) if (!isSleeping) { + bod->addSplitV(); int nLinks = bod->getNumLinks(); ///base + num m_links @@ -610,6 +611,7 @@ void btMultiBodyDynamicsWorld::integrateMultiBodyTransforms(btScalar timeStep) m_scratch_world_to_local.resize(nLinks + 1); m_scratch_local_origin.resize(nLinks + 1); bod->updateCollisionObjectWorldTransforms(m_scratch_world_to_local, m_scratch_local_origin); + bod->substractSplitV(); } else { @@ -867,6 +869,18 @@ void btMultiBodyDynamicsWorld::serializeMultiBodies(btSerializer* serializer) } } } + +void btMultiBodyDynamicsWorld::saveKinematicState(btScalar timeStep) +{ + btDiscreteDynamicsWorld::saveKinematicState(timeStep); + for(int i = 0; i < m_multiBodies.size(); i++) + { + btMultiBody* body = m_multiBodies[i]; + if(body->isBaseKinematic()) + body->saveKinematicState(timeStep); + } +} + // //void btMultiBodyDynamicsWorld::setSplitIslands(bool split) //{ diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h index 9ac46f4b64..d2d76c8b92 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyDynamicsWorld.h @@ -120,5 +120,7 @@ public: virtual void solveExternalForces(btContactSolverInfo& solverInfo); virtual void solveInternalConstraints(btContactSolverInfo& solverInfo); void buildIslands(); + + virtual void saveKinematicState(btScalar timeStep); }; #endif //BT_MULTIBODY_DYNAMICS_WORLD_H diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp index 5ef9444c2f..df2abbe97a 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyFixedConstraint.cpp @@ -24,7 +24,7 @@ subject to the following restrictions: #define BTMBFIXEDCONSTRAINT_DIM 6 btMultiBodyFixedConstraint::btMultiBodyFixedConstraint(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB) - : btMultiBodyConstraint(body, 0, link, -1, BTMBFIXEDCONSTRAINT_DIM, false), + : btMultiBodyConstraint(body, 0, link, -1, BTMBFIXEDCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_FIXED), m_rigidBodyA(0), m_rigidBodyB(bodyB), m_pivotInA(pivotInA), @@ -36,7 +36,7 @@ btMultiBodyFixedConstraint::btMultiBodyFixedConstraint(btMultiBody* body, int li } btMultiBodyFixedConstraint::btMultiBodyFixedConstraint(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB) - : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBFIXEDCONSTRAINT_DIM, false), + : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBFIXEDCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_FIXED), m_rigidBodyA(0), m_rigidBodyB(0), m_pivotInA(pivotInA), diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp index bf6b811d26..ee02cf9b07 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyGearConstraint.cpp @@ -21,7 +21,7 @@ subject to the following restrictions: #include "BulletCollision/CollisionDispatch/btCollisionObject.h" btMultiBodyGearConstraint::btMultiBodyGearConstraint(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB) - : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, 1, false), + : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, 1, false, MULTIBODY_CONSTRAINT_GEAR), m_gearRatio(1), m_gearAuxLink(-1), m_erp(0), diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp index 8791ad2868..94b36ac108 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.cpp @@ -22,7 +22,7 @@ subject to the following restrictions: btMultiBodyJointLimitConstraint::btMultiBodyJointLimitConstraint(btMultiBody* body, int link, btScalar lower, btScalar upper) //:btMultiBodyConstraint(body,0,link,-1,2,true), - : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 2, true), + : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 2, true, MULTIBODY_CONSTRAINT_LIMIT), m_lowerBound(lower), m_upperBound(upper) { diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h index 6716ba490f..b810692b4c 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointLimitConstraint.h @@ -42,6 +42,22 @@ public: { //todo(erwincoumans) } + btScalar getLowerBound() const + { + return m_lowerBound; + } + btScalar getUpperBound() const + { + return m_upperBound; + } + void setLowerBound(btScalar lower) + { + m_lowerBound = lower; + } + void setUpperBound(btScalar upper) + { + m_upperBound = upper; + } }; #endif //BT_MULTIBODY_JOINT_LIMIT_CONSTRAINT_H diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp index 5c816c4987..fec9b03213 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyJointMotor.cpp @@ -21,7 +21,7 @@ subject to the following restrictions: #include "BulletCollision/CollisionDispatch/btCollisionObject.h" btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, btScalar desiredVelocity, btScalar maxMotorImpulse) - : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true), + : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true, MULTIBODY_CONSTRAINT_1DOF_JOINT_MOTOR), m_desiredVelocity(desiredVelocity), m_desiredPosition(0), m_kd(1.), @@ -51,7 +51,7 @@ void btMultiBodyJointMotor::finalizeMultiDof() btMultiBodyJointMotor::btMultiBodyJointMotor(btMultiBody* body, int link, int linkDoF, btScalar desiredVelocity, btScalar maxMotorImpulse) //:btMultiBodyConstraint(body,0,link,-1,1,true), - : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true), + : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 1, true, MULTIBODY_CONSTRAINT_1DOF_JOINT_MOTOR), m_desiredVelocity(desiredVelocity), m_desiredPosition(0), m_kd(1.), diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h index 01d5583c2f..5a1429340f 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLink.h @@ -295,6 +295,9 @@ struct btMultibodyLink } } } + + + }; #endif //BT_MULTIBODY_LINK_H diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h index bc909990c2..3dc35a5814 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyLinkCollider.h @@ -130,6 +130,23 @@ public: return true; } + bool isStaticOrKinematic() const + { + return isStaticOrKinematicObject(); + } + + bool isKinematic() const + { + return isKinematicObject(); + } + + void setDynamicType(int dynamicType) + { + int oldFlags = getCollisionFlags(); + oldFlags &= ~(btCollisionObject::CF_STATIC_OBJECT | btCollisionObject::CF_KINEMATIC_OBJECT); + setCollisionFlags(oldFlags | dynamicType); + } + virtual int calculateSerializeBufferSize() const; ///fills the dataBuffer and returns the struct name (and 0 on failure) diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp index 37d3aede37..f51e69deb1 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodyPoint2Point.cpp @@ -27,7 +27,7 @@ subject to the following restrictions: #endif btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB) - : btMultiBodyConstraint(body, 0, link, -1, BTMBP2PCONSTRAINT_DIM, false), + : btMultiBodyConstraint(body, 0, link, -1, BTMBP2PCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_POINT_TO_POINT), m_rigidBodyA(0), m_rigidBodyB(bodyB), m_pivotInA(pivotInA), @@ -37,7 +37,7 @@ btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* body, int link, btRi } btMultiBodyPoint2Point::btMultiBodyPoint2Point(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB) - : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBP2PCONSTRAINT_DIM, false), + : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBP2PCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_POINT_TO_POINT), m_rigidBodyA(0), m_rigidBodyB(0), m_pivotInA(pivotInA), diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp index e025302ce6..48ec1d5af2 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySliderConstraint.cpp @@ -25,7 +25,7 @@ subject to the following restrictions: #define EPSILON 0.000001 btMultiBodySliderConstraint::btMultiBodySliderConstraint(btMultiBody* body, int link, btRigidBody* bodyB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB, const btVector3& jointAxis) - : btMultiBodyConstraint(body, 0, link, -1, BTMBSLIDERCONSTRAINT_DIM, false), + : btMultiBodyConstraint(body, 0, link, -1, BTMBSLIDERCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_SLIDER), m_rigidBodyA(0), m_rigidBodyB(bodyB), m_pivotInA(pivotInA), @@ -38,7 +38,7 @@ btMultiBodySliderConstraint::btMultiBodySliderConstraint(btMultiBody* body, int } btMultiBodySliderConstraint::btMultiBodySliderConstraint(btMultiBody* bodyA, int linkA, btMultiBody* bodyB, int linkB, const btVector3& pivotInA, const btVector3& pivotInB, const btMatrix3x3& frameInA, const btMatrix3x3& frameInB, const btVector3& jointAxis) - : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBSLIDERCONSTRAINT_DIM, false), + : btMultiBodyConstraint(bodyA, bodyB, linkA, linkB, BTMBSLIDERCONSTRAINT_DIM, false, MULTIBODY_CONSTRAINT_SLIDER), m_rigidBodyA(0), m_rigidBodyB(0), m_pivotInA(pivotInA), diff --git a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp index 3e5aa30f28..25ddd539bf 100644 --- a/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp +++ b/thirdparty/bullet/BulletDynamics/Featherstone/btMultiBodySphericalJointMotor.cpp @@ -23,7 +23,7 @@ subject to the following restrictions: #include "BulletDynamics/ConstraintSolver/btGeneric6DofSpring2Constraint.h" btMultiBodySphericalJointMotor::btMultiBodySphericalJointMotor(btMultiBody* body, int link, btScalar maxMotorImpulse) - : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 3, true), + : btMultiBodyConstraint(body, body, link, body->getLink(link).m_parent, 3, true, MULTIBODY_CONSTRAINT_SPHERICAL_MOTOR), m_desiredVelocity(0, 0, 0), m_desiredPosition(0,0,0,1), m_kd(1.), diff --git a/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h b/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h index 7b225701f6..01c7e93a1b 100644 --- a/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h +++ b/thirdparty/bullet/BulletSoftBody/DeformableBodyInplaceSolverIslandCallback.h @@ -13,13 +13,12 @@ struct DeformableBodyInplaceSolverIslandCallback : public MultiBodyInplaceSolver btDeformableMultiBodyConstraintSolver* m_deformableSolver; DeformableBodyInplaceSolverIslandCallback(btDeformableMultiBodyConstraintSolver* solver, - btDispatcher* dispatcher) - : MultiBodyInplaceSolverIslandCallback(solver, dispatcher), m_deformableSolver(solver) + btDispatcher* dispatcher) + : MultiBodyInplaceSolverIslandCallback(solver, dispatcher), m_deformableSolver(solver) { } - - virtual void processConstraints(int islandId=-1) + virtual void processConstraints(int islandId = -1) { btCollisionObject** bodies = m_bodies.size() ? &m_bodies[0] : 0; btCollisionObject** softBodies = m_softBodies.size() ? &m_softBodies[0] : 0; @@ -30,7 +29,7 @@ struct DeformableBodyInplaceSolverIslandCallback : public MultiBodyInplaceSolver //printf("mb contacts = %d, mb constraints = %d\n", mbContacts, m_multiBodyConstraints.size()); m_deformableSolver->solveDeformableBodyGroup(bodies, m_bodies.size(), softBodies, m_softBodies.size(), manifold, m_manifolds.size(), constraints, m_constraints.size(), multiBodyConstraints, m_multiBodyConstraints.size(), *m_solverInfo, m_debugDrawer, m_dispatcher); - if (m_bodies.size() && (m_solverInfo->m_reportSolverAnalytics&1)) + if (m_bodies.size() && (m_solverInfo->m_reportSolverAnalytics & 1)) { m_deformableSolver->m_analyticsData.m_islandId = islandId; m_islandAnalyticsData.push_back(m_solver->m_analyticsData); diff --git a/thirdparty/bullet/BulletSoftBody/btCGProjection.h b/thirdparty/bullet/BulletSoftBody/btCGProjection.h index d047e6d3d9..e05970664c 100644 --- a/thirdparty/bullet/BulletSoftBody/btCGProjection.h +++ b/thirdparty/bullet/BulletSoftBody/btCGProjection.h @@ -22,85 +22,83 @@ struct DeformableContactConstraint { - const btSoftBody::Node* m_node; - btAlignedObjectArray<const btSoftBody::RContact*> m_contact; - btAlignedObjectArray<btVector3> m_total_normal_dv; - btAlignedObjectArray<btVector3> m_total_tangent_dv; - btAlignedObjectArray<bool> m_static; - btAlignedObjectArray<bool> m_can_be_dynamic; - - DeformableContactConstraint(const btSoftBody::RContact& rcontact): m_node(rcontact.m_node) - { - append(rcontact); - } - - DeformableContactConstraint(): m_node(NULL) - { - m_contact.push_back(NULL); - } - - void append(const btSoftBody::RContact& rcontact) - { - m_contact.push_back(&rcontact); - m_total_normal_dv.push_back(btVector3(0,0,0)); - m_total_tangent_dv.push_back(btVector3(0,0,0)); - m_static.push_back(false); - m_can_be_dynamic.push_back(true); - } - - void replace(const btSoftBody::RContact& rcontact) - { - m_contact.clear(); - m_total_normal_dv.clear(); - m_total_tangent_dv.clear(); - m_static.clear(); - m_can_be_dynamic.clear(); - append(rcontact); - } - - ~DeformableContactConstraint() - { - } + const btSoftBody::Node* m_node; + btAlignedObjectArray<const btSoftBody::RContact*> m_contact; + btAlignedObjectArray<btVector3> m_total_normal_dv; + btAlignedObjectArray<btVector3> m_total_tangent_dv; + btAlignedObjectArray<bool> m_static; + btAlignedObjectArray<bool> m_can_be_dynamic; + + DeformableContactConstraint(const btSoftBody::RContact& rcontact) : m_node(rcontact.m_node) + { + append(rcontact); + } + + DeformableContactConstraint() : m_node(NULL) + { + m_contact.push_back(NULL); + } + + void append(const btSoftBody::RContact& rcontact) + { + m_contact.push_back(&rcontact); + m_total_normal_dv.push_back(btVector3(0, 0, 0)); + m_total_tangent_dv.push_back(btVector3(0, 0, 0)); + m_static.push_back(false); + m_can_be_dynamic.push_back(true); + } + + void replace(const btSoftBody::RContact& rcontact) + { + m_contact.clear(); + m_total_normal_dv.clear(); + m_total_tangent_dv.clear(); + m_static.clear(); + m_can_be_dynamic.clear(); + append(rcontact); + } + + ~DeformableContactConstraint() + { + } }; class btCGProjection { public: - typedef btAlignedObjectArray<btVector3> TVStack; - typedef btAlignedObjectArray<btAlignedObjectArray<btVector3> > TVArrayStack; - typedef btAlignedObjectArray<btAlignedObjectArray<btScalar> > TArrayStack; - btAlignedObjectArray<btSoftBody *>& m_softBodies; - const btScalar& m_dt; - // map from node indices to node pointers - const btAlignedObjectArray<btSoftBody::Node*>* m_nodes; - - btCGProjection(btAlignedObjectArray<btSoftBody *>& softBodies, const btScalar& dt) - : m_softBodies(softBodies) - , m_dt(dt) - { - } - - virtual ~btCGProjection() - { - } - - // apply the constraints - virtual void project(TVStack& x) = 0; - - virtual void setConstraints() = 0; - - // update the constraints - virtual btScalar update() = 0; - - virtual void reinitialize(bool nodeUpdated) - { - } - - virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes) - { - m_nodes = nodes; - } -}; + typedef btAlignedObjectArray<btVector3> TVStack; + typedef btAlignedObjectArray<btAlignedObjectArray<btVector3> > TVArrayStack; + typedef btAlignedObjectArray<btAlignedObjectArray<btScalar> > TArrayStack; + btAlignedObjectArray<btSoftBody*>& m_softBodies; + const btScalar& m_dt; + // map from node indices to node pointers + const btAlignedObjectArray<btSoftBody::Node*>* m_nodes; + + btCGProjection(btAlignedObjectArray<btSoftBody*>& softBodies, const btScalar& dt) + : m_softBodies(softBodies), m_dt(dt) + { + } + virtual ~btCGProjection() + { + } + + // apply the constraints + virtual void project(TVStack& x) = 0; + + virtual void setConstraints() = 0; + + // update the constraints + virtual btScalar update() = 0; + + virtual void reinitialize(bool nodeUpdated) + { + } + + virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes) + { + m_nodes = nodes; + } +}; #endif /* btCGProjection_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h b/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h index bd51e584b9..bcd5e6b519 100644 --- a/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h +++ b/thirdparty/bullet/BulletSoftBody/btConjugateGradient.h @@ -15,144 +15,103 @@ #ifndef BT_CONJUGATE_GRADIENT_H #define BT_CONJUGATE_GRADIENT_H -#include <iostream> -#include <cmath> -#include <limits> -#include <LinearMath/btAlignedObjectArray.h> -#include <LinearMath/btVector3.h> -#include "LinearMath/btQuickprof.h" +#include "btKrylovSolver.h" template <class MatrixX> -class btConjugateGradient +class btConjugateGradient : public btKrylovSolver<MatrixX> { - typedef btAlignedObjectArray<btVector3> TVStack; - TVStack r,p,z,temp; - int max_iterations; - btScalar tolerance_squared; + typedef btAlignedObjectArray<btVector3> TVStack; + typedef btKrylovSolver<MatrixX> Base; + TVStack r, p, z, temp; + public: - btConjugateGradient(const int max_it_in) - : max_iterations(max_it_in) - { - tolerance_squared = 1e-5; - } - - virtual ~btConjugateGradient(){} - - // return the number of iterations taken - int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false) - { - BT_PROFILE("CGSolve"); - btAssert(x.size() == b.size()); - reinitialize(b); - // r = b - A * x --with assigned dof zeroed out - A.multiply(x, temp); - r = sub(b, temp); - A.project(r); - // z = M^(-1) * r - A.precondition(r, z); - A.project(z); - btScalar r_dot_z = dot(z,r); - if (r_dot_z <= tolerance_squared) { - if (verbose) - { - std::cout << "Iteration = 0" << std::endl; - std::cout << "Two norm of the residual = " << r_dot_z << std::endl; - } - return 0; - } - p = z; - btScalar r_dot_z_new = r_dot_z; - for (int k = 1; k <= max_iterations; k++) { - // temp = A*p - A.multiply(p, temp); - A.project(temp); - if (dot(p,temp) < SIMD_EPSILON) - { - if (verbose) - std::cout << "Encountered negative direction in CG!" << std::endl; - if (k == 1) - { - x = b; - } - return k; - } - // alpha = r^T * z / (p^T * A * p) - btScalar alpha = r_dot_z_new / dot(p, temp); - // x += alpha * p; - multAndAddTo(alpha, p, x); - // r -= alpha * temp; - multAndAddTo(-alpha, temp, r); - // z = M^(-1) * r - A.precondition(r, z); - r_dot_z = r_dot_z_new; - r_dot_z_new = dot(r,z); - if (r_dot_z_new < tolerance_squared) { - if (verbose) - { - std::cout << "ConjugateGradient iterations " << k << std::endl; - } - return k; - } + btConjugateGradient(const int max_it_in) + : btKrylovSolver<MatrixX>(max_it_in, SIMD_EPSILON) + { + } + + virtual ~btConjugateGradient() {} + + // return the number of iterations taken + int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false) + { + BT_PROFILE("CGSolve"); + btAssert(x.size() == b.size()); + reinitialize(b); + temp = b; + A.project(temp); + p = temp; + A.precondition(p, z); + btScalar d0 = this->dot(z, temp); + d0 = btMin(btScalar(1), d0); + // r = b - A * x --with assigned dof zeroed out + A.multiply(x, temp); + r = this->sub(b, temp); + A.project(r); + // z = M^(-1) * r + A.precondition(r, z); + A.project(z); + btScalar r_dot_z = this->dot(z, r); + if (r_dot_z <= Base::m_tolerance * d0) + { + if (verbose) + { + std::cout << "Iteration = 0" << std::endl; + std::cout << "Two norm of the residual = " << r_dot_z << std::endl; + } + return 0; + } + p = z; + btScalar r_dot_z_new = r_dot_z; + for (int k = 1; k <= Base::m_maxIterations; k++) + { + // temp = A*p + A.multiply(p, temp); + A.project(temp); + if (this->dot(p, temp) < 0) + { + if (verbose) + std::cout << "Encountered negative direction in CG!" << std::endl; + if (k == 1) + { + x = b; + } + return k; + } + // alpha = r^T * z / (p^T * A * p) + btScalar alpha = r_dot_z_new / this->dot(p, temp); + // x += alpha * p; + this->multAndAddTo(alpha, p, x); + // r -= alpha * temp; + this->multAndAddTo(-alpha, temp, r); + // z = M^(-1) * r + A.precondition(r, z); + r_dot_z = r_dot_z_new; + r_dot_z_new = this->dot(r, z); + if (r_dot_z_new < Base::m_tolerance * d0) + { + if (verbose) + { + std::cout << "ConjugateGradient iterations " << k << " residual = " << r_dot_z_new << std::endl; + } + return k; + } + + btScalar beta = r_dot_z_new / r_dot_z; + p = this->multAndAdd(beta, p, z); + } + if (verbose) + { + std::cout << "ConjugateGradient max iterations reached " << Base::m_maxIterations << " error = " << r_dot_z_new << std::endl; + } + return Base::m_maxIterations; + } - btScalar beta = r_dot_z_new/r_dot_z; - p = multAndAdd(beta, p, z); - } - if (verbose) - { - std::cout << "ConjugateGradient max iterations reached " << max_iterations << std::endl; - } - return max_iterations; - } - - void reinitialize(const TVStack& b) - { - r.resize(b.size()); - p.resize(b.size()); - z.resize(b.size()); - temp.resize(b.size()); - } - - TVStack sub(const TVStack& a, const TVStack& b) - { - // c = a-b - btAssert(a.size() == b.size()); - TVStack c; - c.resize(a.size()); - for (int i = 0; i < a.size(); ++i) - { - c[i] = a[i] - b[i]; - } - return c; - } - - btScalar squaredNorm(const TVStack& a) - { - return dot(a,a); - } - - btScalar dot(const TVStack& a, const TVStack& b) - { - btScalar ans(0); - for (int i = 0; i < a.size(); ++i) - ans += a[i].dot(b[i]); - return ans; - } - - void multAndAddTo(btScalar s, const TVStack& a, TVStack& result) - { -// result += s*a - btAssert(a.size() == result.size()); - for (int i = 0; i < a.size(); ++i) - result[i] += s * a[i]; - } - - TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b) - { - // result = a*s + b - TVStack result; - result.resize(a.size()); - for (int i = 0; i < a.size(); ++i) - result[i] = s * a[i] + b[i]; - return result; - } + void reinitialize(const TVStack& b) + { + r.resize(b.size()); + p.resize(b.size()); + z.resize(b.size()); + temp.resize(b.size()); + } }; #endif /* btConjugateGradient_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h b/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h index 7b211c4172..6146120365 100644 --- a/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h +++ b/thirdparty/bullet/BulletSoftBody/btConjugateResidual.h @@ -15,174 +15,98 @@ #ifndef BT_CONJUGATE_RESIDUAL_H #define BT_CONJUGATE_RESIDUAL_H -#include <iostream> -#include <cmath> -#include <limits> -#include <LinearMath/btAlignedObjectArray.h> -#include <LinearMath/btVector3.h> -#include <LinearMath/btScalar.h> -#include "LinearMath/btQuickprof.h" +#include "btKrylovSolver.h" + template <class MatrixX> -class btConjugateResidual +class btConjugateResidual : public btKrylovSolver<MatrixX> { - typedef btAlignedObjectArray<btVector3> TVStack; - TVStack r,p,z,temp_p, temp_r, best_x; - // temp_r = A*r - // temp_p = A*p - // z = M^(-1) * temp_p = M^(-1) * A * p - int max_iterations; - btScalar tolerance_squared, best_r; + typedef btAlignedObjectArray<btVector3> TVStack; + typedef btKrylovSolver<MatrixX> Base; + TVStack r, p, z, temp_p, temp_r, best_x; + // temp_r = A*r + // temp_p = A*p + // z = M^(-1) * temp_p = M^(-1) * A * p + btScalar best_r; + public: - btConjugateResidual(const int max_it_in) - : max_iterations(max_it_in) - { - tolerance_squared = 1e-2; - } - - virtual ~btConjugateResidual(){} - - // return the number of iterations taken - int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false) - { - BT_PROFILE("CRSolve"); - btAssert(x.size() == b.size()); - reinitialize(b); - // r = b - A * x --with assigned dof zeroed out - A.multiply(x, temp_r); // borrow temp_r here to store A*x - r = sub(b, temp_r); - // z = M^(-1) * r - A.precondition(r, z); // borrow z to store preconditioned r - r = z; - btScalar residual_norm = norm(r); - if (residual_norm <= tolerance_squared) { - if (verbose) - { - std::cout << "Iteration = 0" << std::endl; - std::cout << "Two norm of the residual = " << residual_norm << std::endl; - } - return 0; - } - p = r; - btScalar r_dot_Ar, r_dot_Ar_new; - // temp_p = A*p - A.multiply(p, temp_p); - // temp_r = A*r - temp_r = temp_p; - r_dot_Ar = dot(r, temp_r); - for (int k = 1; k <= max_iterations; k++) { - // z = M^(-1) * Ap - A.precondition(temp_p, z); - // alpha = r^T * A * r / (Ap)^T * M^-1 * Ap) - btScalar alpha = r_dot_Ar / dot(temp_p, z); - // x += alpha * p; - multAndAddTo(alpha, p, x); - // r -= alpha * z; - multAndAddTo(-alpha, z, r); - btScalar norm_r = norm(r); - if (norm_r < best_r) - { - best_x = x; - best_r = norm_r; - if (norm_r < tolerance_squared) { - if (verbose) - { - std::cout << "ConjugateResidual iterations " << k << std::endl; - } - return k; - } - else - { - if (verbose) - { - std::cout << "ConjugateResidual iterations " << k << " has residual "<< norm_r << std::endl; - } - } - } - // temp_r = A * r; - A.multiply(r, temp_r); - r_dot_Ar_new = dot(r, temp_r); - btScalar beta = r_dot_Ar_new/r_dot_Ar; - r_dot_Ar = r_dot_Ar_new; - // p = beta*p + r; - p = multAndAdd(beta, p, r); - // temp_p = beta*temp_p + temp_r; - temp_p = multAndAdd(beta, temp_p, temp_r); - } - if (verbose) - { - std::cout << "ConjugateResidual max iterations reached " << max_iterations << std::endl; - } - x = best_x; - return max_iterations; - } - - void reinitialize(const TVStack& b) - { - r.resize(b.size()); - p.resize(b.size()); - z.resize(b.size()); - temp_p.resize(b.size()); - temp_r.resize(b.size()); - best_x.resize(b.size()); - best_r = SIMD_INFINITY; - } - - TVStack sub(const TVStack& a, const TVStack& b) - { - // c = a-b - btAssert(a.size() == b.size()); - TVStack c; - c.resize(a.size()); - for (int i = 0; i < a.size(); ++i) - { - c[i] = a[i] - b[i]; - } - return c; - } - - btScalar squaredNorm(const TVStack& a) - { - return dot(a,a); - } - - btScalar norm(const TVStack& a) - { - btScalar ret = 0; - for (int i = 0; i < a.size(); ++i) - { - for (int d = 0; d < 3; ++d) - { - ret = btMax(ret, btFabs(a[i][d])); - } - } - return ret; - } - - btScalar dot(const TVStack& a, const TVStack& b) - { - btScalar ans(0); - for (int i = 0; i < a.size(); ++i) - ans += a[i].dot(b[i]); - return ans; - } - - void multAndAddTo(btScalar s, const TVStack& a, TVStack& result) - { - // result += s*a - btAssert(a.size() == result.size()); - for (int i = 0; i < a.size(); ++i) - result[i] += s * a[i]; - } - - TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b) - { - // result = a*s + b - TVStack result; - result.resize(a.size()); - for (int i = 0; i < a.size(); ++i) - result[i] = s * a[i] + b[i]; - return result; - } + btConjugateResidual(const int max_it_in) + : Base(max_it_in, 1e-8) + { + } + + virtual ~btConjugateResidual() {} + + // return the number of iterations taken + int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false) + { + BT_PROFILE("CRSolve"); + btAssert(x.size() == b.size()); + reinitialize(b); + // r = b - A * x --with assigned dof zeroed out + A.multiply(x, temp_r); // borrow temp_r here to store A*x + r = this->sub(b, temp_r); + // z = M^(-1) * r + A.precondition(r, z); // borrow z to store preconditioned r + r = z; + btScalar residual_norm = this->norm(r); + if (residual_norm <= Base::m_tolerance) + { + return 0; + } + p = r; + btScalar r_dot_Ar, r_dot_Ar_new; + // temp_p = A*p + A.multiply(p, temp_p); + // temp_r = A*r + temp_r = temp_p; + r_dot_Ar = this->dot(r, temp_r); + for (int k = 1; k <= Base::m_maxIterations; k++) + { + // z = M^(-1) * Ap + A.precondition(temp_p, z); + // alpha = r^T * A * r / (Ap)^T * M^-1 * Ap) + btScalar alpha = r_dot_Ar / this->dot(temp_p, z); + // x += alpha * p; + this->multAndAddTo(alpha, p, x); + // r -= alpha * z; + this->multAndAddTo(-alpha, z, r); + btScalar norm_r = this->norm(r); + if (norm_r < best_r) + { + best_x = x; + best_r = norm_r; + if (norm_r < Base::m_tolerance) + { + return k; + } + } + // temp_r = A * r; + A.multiply(r, temp_r); + r_dot_Ar_new = this->dot(r, temp_r); + btScalar beta = r_dot_Ar_new / r_dot_Ar; + r_dot_Ar = r_dot_Ar_new; + // p = beta*p + r; + p = this->multAndAdd(beta, p, r); + // temp_p = beta*temp_p + temp_r; + temp_p = this->multAndAdd(beta, temp_p, temp_r); + } + if (verbose) + { + std::cout << "ConjugateResidual max iterations reached, residual = " << best_r << std::endl; + } + x = best_x; + return Base::m_maxIterations; + } + + void reinitialize(const TVStack& b) + { + r.resize(b.size()); + p.resize(b.size()); + z.resize(b.size()); + temp_p.resize(b.size()); + temp_r.resize(b.size()); + best_x.resize(b.size()); + best_r = SIMD_INFINITY; + } }; #endif /* btConjugateResidual_h */ - diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp index 5381ee6265..2455ed2138 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp +++ b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.cpp @@ -17,211 +17,283 @@ #include "btPreconditioner.h" #include "LinearMath/btQuickprof.h" -btDeformableBackwardEulerObjective::btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody *>& softBodies, const TVStack& backup_v) -: m_softBodies(softBodies) -, m_projection(softBodies) -, m_backupVelocity(backup_v) -, m_implicit(false) +btDeformableBackwardEulerObjective::btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody*>& softBodies, const TVStack& backup_v) + : m_softBodies(softBodies), m_projection(softBodies), m_backupVelocity(backup_v), m_implicit(false) { - m_massPreconditioner = new MassPreconditioner(m_softBodies); - m_KKTPreconditioner = new KKTPreconditioner(m_softBodies, m_projection, m_lf, m_dt, m_implicit); - m_preconditioner = m_KKTPreconditioner; + m_massPreconditioner = new MassPreconditioner(m_softBodies); + m_KKTPreconditioner = new KKTPreconditioner(m_softBodies, m_projection, m_lf, m_dt, m_implicit); + m_preconditioner = m_KKTPreconditioner; } btDeformableBackwardEulerObjective::~btDeformableBackwardEulerObjective() { - delete m_KKTPreconditioner; - delete m_massPreconditioner; + delete m_KKTPreconditioner; + delete m_massPreconditioner; } void btDeformableBackwardEulerObjective::reinitialize(bool nodeUpdated, btScalar dt) { - BT_PROFILE("reinitialize"); - if (dt > 0) - { - setDt(dt); - } - if(nodeUpdated) - { - updateId(); - } - for (int i = 0; i < m_lf.size(); ++i) - { - m_lf[i]->reinitialize(nodeUpdated); - } - m_projection.reinitialize(nodeUpdated); -// m_preconditioner->reinitialize(nodeUpdated); + BT_PROFILE("reinitialize"); + if (dt > 0) + { + setDt(dt); + } + if (nodeUpdated) + { + updateId(); + } + for (int i = 0; i < m_lf.size(); ++i) + { + m_lf[i]->reinitialize(nodeUpdated); + } + btMatrix3x3 I; + I.setIdentity(); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + if (psb->m_nodes[j].m_im > 0) + psb->m_nodes[j].m_effectiveMass = I * (1.0 / psb->m_nodes[j].m_im); + } + } + m_projection.reinitialize(nodeUpdated); + // m_preconditioner->reinitialize(nodeUpdated); } void btDeformableBackwardEulerObjective::setDt(btScalar dt) { - m_dt = dt; + m_dt = dt; } void btDeformableBackwardEulerObjective::multiply(const TVStack& x, TVStack& b) const { - BT_PROFILE("multiply"); - // add in the mass term - size_t counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - const btSoftBody::Node& node = psb->m_nodes[j]; - b[counter] = (node.m_im == 0) ? btVector3(0,0,0) : x[counter] / node.m_im; - ++counter; - } - } - - for (int i = 0; i < m_lf.size(); ++i) - { - // add damping matrix - m_lf[i]->addScaledDampingForceDifferential(-m_dt, x, b); - if (m_implicit) - { - m_lf[i]->addScaledElasticForceDifferential(-m_dt*m_dt, x, b); - } - } - int offset = m_nodes.size(); - for (int i = offset; i < b.size(); ++i) - { - b[i].setZero(); - } - // add in the lagrange multiplier terms - - for (int c = 0; c < m_projection.m_lagrangeMultipliers.size(); ++c) - { - // C^T * lambda - const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[c]; - for (int i = 0; i < lm.m_num_nodes; ++i) - { - for (int j = 0; j < lm.m_num_constraints; ++j) - { - b[lm.m_indices[i]] += x[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j]; - } - } - // C * x - for (int d = 0; d < lm.m_num_constraints; ++d) - { - for (int i = 0; i < lm.m_num_nodes; ++i) - { - b[offset+c][d] += lm.m_weights[i] * x[lm.m_indices[i]].dot(lm.m_dirs[d]); - } - } - } + BT_PROFILE("multiply"); + // add in the mass term + size_t counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + b[counter] = (node.m_im == 0) ? btVector3(0, 0, 0) : x[counter] / node.m_im; + ++counter; + } + } + + for (int i = 0; i < m_lf.size(); ++i) + { + // add damping matrix + m_lf[i]->addScaledDampingForceDifferential(-m_dt, x, b); + // Always integrate picking force implicitly for stability. + if (m_implicit || m_lf[i]->getForceType() == BT_MOUSE_PICKING_FORCE) + { + m_lf[i]->addScaledElasticForceDifferential(-m_dt * m_dt, x, b); + } + } + int offset = m_nodes.size(); + for (int i = offset; i < b.size(); ++i) + { + b[i].setZero(); + } + // add in the lagrange multiplier terms + + for (int c = 0; c < m_projection.m_lagrangeMultipliers.size(); ++c) + { + // C^T * lambda + const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[c]; + for (int i = 0; i < lm.m_num_nodes; ++i) + { + for (int j = 0; j < lm.m_num_constraints; ++j) + { + b[lm.m_indices[i]] += x[offset + c][j] * lm.m_weights[i] * lm.m_dirs[j]; + } + } + // C * x + for (int d = 0; d < lm.m_num_constraints; ++d) + { + for (int i = 0; i < lm.m_num_nodes; ++i) + { + b[offset + c][d] += lm.m_weights[i] * x[lm.m_indices[i]].dot(lm.m_dirs[d]); + } + } + } } void btDeformableBackwardEulerObjective::updateVelocity(const TVStack& dv) { - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - btSoftBody::Node& node = psb->m_nodes[j]; - node.m_v = m_backupVelocity[node.index] + dv[node.index]; - } - } + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + btSoftBody::Node& node = psb->m_nodes[j]; + node.m_v = m_backupVelocity[node.index] + dv[node.index]; + } + } } void btDeformableBackwardEulerObjective::applyForce(TVStack& force, bool setZero) { - size_t counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - counter += psb->m_nodes.size(); - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - btScalar one_over_mass = (psb->m_nodes[j].m_im == 0) ? 0 : psb->m_nodes[j].m_im; - psb->m_nodes[j].m_v += one_over_mass * force[counter++]; - } - } - if (setZero) - { - for (int i = 0; i < force.size(); ++i) - force[i].setZero(); - } + size_t counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + counter += psb->m_nodes.size(); + continue; + } + if (m_implicit) + { + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + if (psb->m_nodes[j].m_im != 0) + { + psb->m_nodes[j].m_v += psb->m_nodes[j].m_effectiveMass_inv * force[counter++]; + } + } + } + else + { + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + btScalar one_over_mass = (psb->m_nodes[j].m_im == 0) ? 0 : psb->m_nodes[j].m_im; + psb->m_nodes[j].m_v += one_over_mass * force[counter++]; + } + } + } + if (setZero) + { + for (int i = 0; i < force.size(); ++i) + force[i].setZero(); + } } -void btDeformableBackwardEulerObjective::computeResidual(btScalar dt, TVStack &residual) +void btDeformableBackwardEulerObjective::computeResidual(btScalar dt, TVStack& residual) { - BT_PROFILE("computeResidual"); - // add implicit force - for (int i = 0; i < m_lf.size(); ++i) - { - if (m_implicit) - { - m_lf[i]->addScaledForces(dt, residual); - } - else - { - m_lf[i]->addScaledDampingForce(dt, residual); - } - } -// m_projection.project(residual); + BT_PROFILE("computeResidual"); + // add implicit force + for (int i = 0; i < m_lf.size(); ++i) + { + // Always integrate picking force implicitly for stability. + if (m_implicit || m_lf[i]->getForceType() == BT_MOUSE_PICKING_FORCE) + { + m_lf[i]->addScaledForces(dt, residual); + } + else + { + m_lf[i]->addScaledDampingForce(dt, residual); + } + } + // m_projection.project(residual); } btScalar btDeformableBackwardEulerObjective::computeNorm(const TVStack& residual) const { - btScalar mag = 0; - for (int i = 0; i < residual.size(); ++i) - { - mag += residual[i].length2(); - } - return std::sqrt(mag); + btScalar mag = 0; + for (int i = 0; i < residual.size(); ++i) + { + mag += residual[i].length2(); + } + return std::sqrt(mag); } btScalar btDeformableBackwardEulerObjective::totalEnergy(btScalar dt) { - btScalar e = 0; - for (int i = 0; i < m_lf.size(); ++i) - { - e += m_lf[i]->totalEnergy(dt); - } - return e; + btScalar e = 0; + for (int i = 0; i < m_lf.size(); ++i) + { + e += m_lf[i]->totalEnergy(dt); + } + return e; } void btDeformableBackwardEulerObjective::applyExplicitForce(TVStack& force) { - for (int i = 0; i < m_softBodies.size(); ++i) - { - m_softBodies[i]->advanceDeformation(); - } - - for (int i = 0; i < m_lf.size(); ++i) - { - m_lf[i]->addScaledExplicitForce(m_dt, force); - } - applyForce(force, true); + for (int i = 0; i < m_softBodies.size(); ++i) + { + m_softBodies[i]->advanceDeformation(); + } + if (m_implicit) + { + // apply forces except gravity force + btVector3 gravity; + for (int i = 0; i < m_lf.size(); ++i) + { + if (m_lf[i]->getForceType() == BT_GRAVITY_FORCE) + { + gravity = static_cast<btDeformableGravityForce*>(m_lf[i])->m_gravity; + } + else + { + m_lf[i]->addScaledForces(m_dt, force); + } + } + for (int i = 0; i < m_lf.size(); ++i) + { + m_lf[i]->addScaledHessian(m_dt); + } + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (psb->isActive()) + { + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + // add gravity explicitly + psb->m_nodes[j].m_v += m_dt * psb->m_gravityFactor * gravity; + } + } + } + } + else + { + for (int i = 0; i < m_lf.size(); ++i) + { + m_lf[i]->addScaledExplicitForce(m_dt, force); + } + } + // calculate inverse mass matrix for all nodes + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (psb->isActive()) + { + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + if (psb->m_nodes[j].m_im > 0) + { + psb->m_nodes[j].m_effectiveMass_inv = psb->m_nodes[j].m_effectiveMass.inverse(); + } + } + } + } + applyForce(force, true); } void btDeformableBackwardEulerObjective::initialGuess(TVStack& dv, const TVStack& residual) { - size_t counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - dv[counter] = psb->m_nodes[j].m_im * residual[counter]; - ++counter; - } - } + size_t counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + dv[counter] = psb->m_nodes[j].m_im * residual[counter]; + ++counter; + } + } } //set constraints as projections void btDeformableBackwardEulerObjective::setConstraints(const btContactSolverInfo& infoGlobal) { - m_projection.setConstraints(infoGlobal); + m_projection.setConstraints(infoGlobal); } void btDeformableBackwardEulerObjective::applyDynamicFriction(TVStack& r) { - m_projection.applyDynamicFriction(r); + m_projection.applyDynamicFriction(r); } diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h index 86579e71ac..eb05b9f010 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableBackwardEulerObjective.h @@ -31,143 +31,168 @@ class btDeformableBackwardEulerObjective { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btScalar m_dt; - btAlignedObjectArray<btDeformableLagrangianForce*> m_lf; - btAlignedObjectArray<btSoftBody *>& m_softBodies; - Preconditioner* m_preconditioner; - btDeformableContactProjection m_projection; - const TVStack& m_backupVelocity; - btAlignedObjectArray<btSoftBody::Node* > m_nodes; - bool m_implicit; - MassPreconditioner* m_massPreconditioner; - KKTPreconditioner* m_KKTPreconditioner; - - btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody *>& softBodies, const TVStack& backup_v); - - virtual ~btDeformableBackwardEulerObjective(); - - void initialize(){} - - // compute the rhs for CG solve, i.e, add the dt scaled implicit force to residual - void computeResidual(btScalar dt, TVStack& residual); - - // add explicit force to the velocity - void applyExplicitForce(TVStack& force); - - // apply force to velocity and optionally reset the force to zero - void applyForce(TVStack& force, bool setZero); - - // compute the norm of the residual - btScalar computeNorm(const TVStack& residual) const; - - // compute one step of the solve (there is only one solve if the system is linear) - void computeStep(TVStack& dv, const TVStack& residual, const btScalar& dt); - - // perform A*x = b - void multiply(const TVStack& x, TVStack& b) const; - - // set initial guess for CG solve - void initialGuess(TVStack& dv, const TVStack& residual); - - // reset data structure and reset dt - void reinitialize(bool nodeUpdated, btScalar dt); - - void setDt(btScalar dt); - - // add friction force to residual - void applyDynamicFriction(TVStack& r); - - // add dv to velocity - void updateVelocity(const TVStack& dv); - - //set constraints as projections - void setConstraints(const btContactSolverInfo& infoGlobal); - - // update the projections and project the residual - void project(TVStack& r) - { - BT_PROFILE("project"); - m_projection.project(r); - } - - // perform precondition M^(-1) x = b - void precondition(const TVStack& x, TVStack& b) - { - m_preconditioner->operator()(x,b); - } - - // reindex all the vertices - virtual void updateId() - { - size_t node_id = 0; - size_t face_id = 0; - m_nodes.clear(); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].index = node_id; - m_nodes.push_back(&psb->m_nodes[j]); - ++node_id; - } - for (int j = 0; j < psb->m_faces.size(); ++j) - { - psb->m_faces[j].m_index = face_id; - ++face_id; - } - } - } - - const btAlignedObjectArray<btSoftBody::Node*>* getIndices() const - { - return &m_nodes; - } - - void setImplicit(bool implicit) - { - m_implicit = implicit; - } - - // Calculate the total potential energy in the system - btScalar totalEnergy(btScalar dt); - - void addLagrangeMultiplier(const TVStack& vec, TVStack& extended_vec) - { - extended_vec.resize(vec.size() + m_projection.m_lagrangeMultipliers.size()); - for (int i = 0; i < vec.size(); ++i) - { - extended_vec[i] = vec[i]; - } - int offset = vec.size(); - for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i) - { - extended_vec[offset + i].setZero(); - } - } - - void addLagrangeMultiplierRHS(const TVStack& residual, const TVStack& m_dv, TVStack& extended_residual) - { - extended_residual.resize(residual.size() + m_projection.m_lagrangeMultipliers.size()); - for (int i = 0; i < residual.size(); ++i) - { - extended_residual[i] = residual[i]; - } - int offset = residual.size(); - for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i) - { - const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[i]; - extended_residual[offset + i].setZero(); - for (int d = 0; d < lm.m_num_constraints; ++d) - { - for (int n = 0; n < lm.m_num_nodes; ++n) - { - extended_residual[offset + i][d] += lm.m_weights[n] * m_dv[lm.m_indices[n]].dot(lm.m_dirs[d]); - } - } - } - } + typedef btAlignedObjectArray<btVector3> TVStack; + btScalar m_dt; + btAlignedObjectArray<btDeformableLagrangianForce*> m_lf; + btAlignedObjectArray<btSoftBody*>& m_softBodies; + Preconditioner* m_preconditioner; + btDeformableContactProjection m_projection; + const TVStack& m_backupVelocity; + btAlignedObjectArray<btSoftBody::Node*> m_nodes; + bool m_implicit; + MassPreconditioner* m_massPreconditioner; + KKTPreconditioner* m_KKTPreconditioner; + + btDeformableBackwardEulerObjective(btAlignedObjectArray<btSoftBody*>& softBodies, const TVStack& backup_v); + + virtual ~btDeformableBackwardEulerObjective(); + + void initialize() {} + + // compute the rhs for CG solve, i.e, add the dt scaled implicit force to residual + void computeResidual(btScalar dt, TVStack& residual); + + // add explicit force to the velocity + void applyExplicitForce(TVStack& force); + + // apply force to velocity and optionally reset the force to zero + void applyForce(TVStack& force, bool setZero); + + // compute the norm of the residual + btScalar computeNorm(const TVStack& residual) const; + + // compute one step of the solve (there is only one solve if the system is linear) + void computeStep(TVStack& dv, const TVStack& residual, const btScalar& dt); + + // perform A*x = b + void multiply(const TVStack& x, TVStack& b) const; + + // set initial guess for CG solve + void initialGuess(TVStack& dv, const TVStack& residual); + + // reset data structure and reset dt + void reinitialize(bool nodeUpdated, btScalar dt); + + void setDt(btScalar dt); + + // add friction force to residual + void applyDynamicFriction(TVStack& r); + + // add dv to velocity + void updateVelocity(const TVStack& dv); + + //set constraints as projections + void setConstraints(const btContactSolverInfo& infoGlobal); + + // update the projections and project the residual + void project(TVStack& r) + { + BT_PROFILE("project"); + m_projection.project(r); + } + + // perform precondition M^(-1) x = b + void precondition(const TVStack& x, TVStack& b) + { + m_preconditioner->operator()(x, b); + } + + // reindex all the vertices + virtual void updateId() + { + size_t node_id = 0; + size_t face_id = 0; + m_nodes.clear(); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].index = node_id; + m_nodes.push_back(&psb->m_nodes[j]); + ++node_id; + } + for (int j = 0; j < psb->m_faces.size(); ++j) + { + psb->m_faces[j].m_index = face_id; + ++face_id; + } + } + } + + const btAlignedObjectArray<btSoftBody::Node*>* getIndices() const + { + return &m_nodes; + } + + void setImplicit(bool implicit) + { + m_implicit = implicit; + } + + // Calculate the total potential energy in the system + btScalar totalEnergy(btScalar dt); + + void addLagrangeMultiplier(const TVStack& vec, TVStack& extended_vec) + { + extended_vec.resize(vec.size() + m_projection.m_lagrangeMultipliers.size()); + for (int i = 0; i < vec.size(); ++i) + { + extended_vec[i] = vec[i]; + } + int offset = vec.size(); + for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i) + { + extended_vec[offset + i].setZero(); + } + } + + void addLagrangeMultiplierRHS(const TVStack& residual, const TVStack& m_dv, TVStack& extended_residual) + { + extended_residual.resize(residual.size() + m_projection.m_lagrangeMultipliers.size()); + for (int i = 0; i < residual.size(); ++i) + { + extended_residual[i] = residual[i]; + } + int offset = residual.size(); + for (int i = 0; i < m_projection.m_lagrangeMultipliers.size(); ++i) + { + const LagrangeMultiplier& lm = m_projection.m_lagrangeMultipliers[i]; + extended_residual[offset + i].setZero(); + for (int d = 0; d < lm.m_num_constraints; ++d) + { + for (int n = 0; n < lm.m_num_nodes; ++n) + { + extended_residual[offset + i][d] += lm.m_weights[n] * m_dv[lm.m_indices[n]].dot(lm.m_dirs[d]); + } + } + } + } + + void calculateContactForce(const TVStack& dv, const TVStack& rhs, TVStack& f) + { + size_t counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + f[counter] = (node.m_im == 0) ? btVector3(0, 0, 0) : dv[counter] / node.m_im; + ++counter; + } + } + for (int i = 0; i < m_lf.size(); ++i) + { + // add damping matrix + m_lf[i]->addScaledDampingForceDifferential(-m_dt, dv, f); + } + counter = 0; + for (; counter < f.size(); ++counter) + { + f[counter] = rhs[counter] - f[counter]; + } + } }; #endif /* btBackwardEulerObjective_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp index 132699c54f..4b11fccecb 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp +++ b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.cpp @@ -18,468 +18,489 @@ #include "btDeformableBodySolver.h" #include "btSoftBodyInternals.h" #include "LinearMath/btQuickprof.h" -static const int kMaxConjugateGradientIterations = 50; +static const int kMaxConjugateGradientIterations = 300; btDeformableBodySolver::btDeformableBodySolver() -: m_numNodes(0) -, m_cg(kMaxConjugateGradientIterations) -, m_cr(kMaxConjugateGradientIterations) -, m_maxNewtonIterations(5) -, m_newtonTolerance(1e-4) -, m_lineSearch(false) -, m_useProjection(false) + : m_numNodes(0), m_cg(kMaxConjugateGradientIterations), m_cr(kMaxConjugateGradientIterations), m_maxNewtonIterations(1), m_newtonTolerance(1e-4), m_lineSearch(false), m_useProjection(false) { - m_objective = new btDeformableBackwardEulerObjective(m_softBodies, m_backupVelocity); + m_objective = new btDeformableBackwardEulerObjective(m_softBodies, m_backupVelocity); } btDeformableBodySolver::~btDeformableBodySolver() { - delete m_objective; + delete m_objective; } void btDeformableBodySolver::solveDeformableConstraints(btScalar solverdt) { - BT_PROFILE("solveDeformableConstraints"); - if (!m_implicit) - { - m_objective->computeResidual(solverdt, m_residual); - m_objective->applyDynamicFriction(m_residual); - if (m_useProjection) - { - computeStep(m_dv, m_residual); - } - else - { - TVStack rhs, x; - m_objective->addLagrangeMultiplierRHS(m_residual, m_dv, rhs); - m_objective->addLagrangeMultiplier(m_dv, x); - m_objective->m_preconditioner->reinitialize(true); - computeStep(x, rhs); - for (int i = 0; i<m_dv.size(); ++i) - { - m_dv[i] = x[i]; - } - } - updateVelocity(); - } - else - { - for (int i = 0; i < m_maxNewtonIterations; ++i) - { - updateState(); - // add the inertia term in the residual - int counter = 0; - for (int k = 0; k < m_softBodies.size(); ++k) - { - btSoftBody* psb = m_softBodies[k]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - if (psb->m_nodes[j].m_im > 0) - { - m_residual[counter] = (-1./psb->m_nodes[j].m_im) * m_dv[counter]; - } - ++counter; - } - } - - m_objective->computeResidual(solverdt, m_residual); - if (m_objective->computeNorm(m_residual) < m_newtonTolerance && i > 0) - { - break; - } - // todo xuchenhan@: this really only needs to be calculated once - m_objective->applyDynamicFriction(m_residual); - if (m_lineSearch) - { - btScalar inner_product = computeDescentStep(m_ddv,m_residual); - btScalar alpha = 0.01, beta = 0.5; // Boyd & Vandenberghe suggested alpha between 0.01 and 0.3, beta between 0.1 to 0.8 - btScalar scale = 2; - btScalar f0 = m_objective->totalEnergy(solverdt)+kineticEnergy(), f1, f2; - backupDv(); - do { - scale *= beta; - if (scale < 1e-8) { - return; - } - updateEnergy(scale); - f1 = m_objective->totalEnergy(solverdt)+kineticEnergy(); - f2 = f0 - alpha * scale * inner_product; - } while (!(f1 < f2+SIMD_EPSILON)); // if anything here is nan then the search continues - revertDv(); - updateDv(scale); - } - else - { - computeStep(m_ddv, m_residual); - updateDv(); - } - for (int j = 0; j < m_numNodes; ++j) - { - m_ddv[j].setZero(); - m_residual[j].setZero(); - } - } - updateVelocity(); - } + BT_PROFILE("solveDeformableConstraints"); + if (!m_implicit) + { + m_objective->computeResidual(solverdt, m_residual); + m_objective->applyDynamicFriction(m_residual); + if (m_useProjection) + { + computeStep(m_dv, m_residual); + } + else + { + TVStack rhs, x; + m_objective->addLagrangeMultiplierRHS(m_residual, m_dv, rhs); + m_objective->addLagrangeMultiplier(m_dv, x); + m_objective->m_preconditioner->reinitialize(true); + computeStep(x, rhs); + for (int i = 0; i < m_dv.size(); ++i) + { + m_dv[i] = x[i]; + } + } + updateVelocity(); + } + else + { + for (int i = 0; i < m_maxNewtonIterations; ++i) + { + updateState(); + // add the inertia term in the residual + int counter = 0; + for (int k = 0; k < m_softBodies.size(); ++k) + { + btSoftBody* psb = m_softBodies[k]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + if (psb->m_nodes[j].m_im > 0) + { + m_residual[counter] = (-1. / psb->m_nodes[j].m_im) * m_dv[counter]; + } + ++counter; + } + } + + m_objective->computeResidual(solverdt, m_residual); + if (m_objective->computeNorm(m_residual) < m_newtonTolerance && i > 0) + { + break; + } + // todo xuchenhan@: this really only needs to be calculated once + m_objective->applyDynamicFriction(m_residual); + if (m_lineSearch) + { + btScalar inner_product = computeDescentStep(m_ddv, m_residual); + btScalar alpha = 0.01, beta = 0.5; // Boyd & Vandenberghe suggested alpha between 0.01 and 0.3, beta between 0.1 to 0.8 + btScalar scale = 2; + btScalar f0 = m_objective->totalEnergy(solverdt) + kineticEnergy(), f1, f2; + backupDv(); + do + { + scale *= beta; + if (scale < 1e-8) + { + return; + } + updateEnergy(scale); + f1 = m_objective->totalEnergy(solverdt) + kineticEnergy(); + f2 = f0 - alpha * scale * inner_product; + } while (!(f1 < f2 + SIMD_EPSILON)); // if anything here is nan then the search continues + revertDv(); + updateDv(scale); + } + else + { + computeStep(m_ddv, m_residual); + updateDv(); + } + for (int j = 0; j < m_numNodes; ++j) + { + m_ddv[j].setZero(); + m_residual[j].setZero(); + } + } + updateVelocity(); + } } btScalar btDeformableBodySolver::kineticEnergy() { - btScalar ke = 0; - for (int i = 0; i < m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size();++j) - { - btSoftBody::Node& node = psb->m_nodes[j]; - if (node.m_im > 0) - { - ke += m_dv[node.index].length2() * 0.5 / node.m_im; - } - } - } - return ke; + btScalar ke = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + btSoftBody::Node& node = psb->m_nodes[j]; + if (node.m_im > 0) + { + ke += m_dv[node.index].length2() * 0.5 / node.m_im; + } + } + } + return ke; } void btDeformableBodySolver::backupDv() { - m_backup_dv.resize(m_dv.size()); - for (int i = 0; i<m_backup_dv.size(); ++i) - { - m_backup_dv[i] = m_dv[i]; - } + m_backup_dv.resize(m_dv.size()); + for (int i = 0; i < m_backup_dv.size(); ++i) + { + m_backup_dv[i] = m_dv[i]; + } } void btDeformableBodySolver::revertDv() { - for (int i = 0; i<m_backup_dv.size(); ++i) - { - m_dv[i] = m_backup_dv[i]; - } + for (int i = 0; i < m_backup_dv.size(); ++i) + { + m_dv[i] = m_backup_dv[i]; + } } void btDeformableBodySolver::updateEnergy(btScalar scale) { - for (int i = 0; i<m_dv.size(); ++i) - { - m_dv[i] = m_backup_dv[i] + scale * m_ddv[i]; - } - updateState(); + for (int i = 0; i < m_dv.size(); ++i) + { + m_dv[i] = m_backup_dv[i] + scale * m_ddv[i]; + } + updateState(); } - btScalar btDeformableBodySolver::computeDescentStep(TVStack& ddv, const TVStack& residual, bool verbose) { - m_cg.solve(*m_objective, ddv, residual, false); - btScalar inner_product = m_cg.dot(residual, m_ddv); - btScalar res_norm = m_objective->computeNorm(residual); - btScalar tol = 1e-5 * res_norm * m_objective->computeNorm(m_ddv); - if (inner_product < -tol) - { - if (verbose) - { - std::cout << "Looking backwards!" << std::endl; - } - for (int i = 0; i < m_ddv.size();++i) - { - m_ddv[i] = -m_ddv[i]; - } - inner_product = -inner_product; - } - else if (std::abs(inner_product) < tol) - { - if (verbose) - { - std::cout << "Gradient Descent!" << std::endl; - } - btScalar scale = m_objective->computeNorm(m_ddv) / res_norm; - for (int i = 0; i < m_ddv.size();++i) - { - m_ddv[i] = scale * residual[i]; - } - inner_product = scale * res_norm * res_norm; - } - return inner_product; + m_cg.solve(*m_objective, ddv, residual, false); + btScalar inner_product = m_cg.dot(residual, m_ddv); + btScalar res_norm = m_objective->computeNorm(residual); + btScalar tol = 1e-5 * res_norm * m_objective->computeNorm(m_ddv); + if (inner_product < -tol) + { + if (verbose) + { + std::cout << "Looking backwards!" << std::endl; + } + for (int i = 0; i < m_ddv.size(); ++i) + { + m_ddv[i] = -m_ddv[i]; + } + inner_product = -inner_product; + } + else if (std::abs(inner_product) < tol) + { + if (verbose) + { + std::cout << "Gradient Descent!" << std::endl; + } + btScalar scale = m_objective->computeNorm(m_ddv) / res_norm; + for (int i = 0; i < m_ddv.size(); ++i) + { + m_ddv[i] = scale * residual[i]; + } + inner_product = scale * res_norm * res_norm; + } + return inner_product; } void btDeformableBodySolver::updateState() { - updateVelocity(); - updateTempPosition(); + updateVelocity(); + updateTempPosition(); } void btDeformableBodySolver::updateDv(btScalar scale) { - for (int i = 0; i < m_numNodes; ++i) - { - m_dv[i] += scale * m_ddv[i]; - } + for (int i = 0; i < m_numNodes; ++i) + { + m_dv[i] += scale * m_ddv[i]; + } } void btDeformableBodySolver::computeStep(TVStack& ddv, const TVStack& residual) { - if (m_useProjection) - m_cg.solve(*m_objective, ddv, residual, false); - else - m_cr.solve(*m_objective, ddv, residual, false); + if (m_useProjection) + m_cg.solve(*m_objective, ddv, residual, false); + else + m_cr.solve(*m_objective, ddv, residual, false); } -void btDeformableBodySolver::reinitialize(const btAlignedObjectArray<btSoftBody *>& softBodies, btScalar dt) +void btDeformableBodySolver::reinitialize(const btAlignedObjectArray<btSoftBody*>& softBodies, btScalar dt) { - m_softBodies.copyFromArray(softBodies); - bool nodeUpdated = updateNodes(); - - if (nodeUpdated) - { - m_dv.resize(m_numNodes, btVector3(0,0,0)); - m_ddv.resize(m_numNodes, btVector3(0,0,0)); - m_residual.resize(m_numNodes, btVector3(0,0,0)); - m_backupVelocity.resize(m_numNodes, btVector3(0,0,0)); - } - - // need to setZero here as resize only set value for newly allocated items - for (int i = 0; i < m_numNodes; ++i) - { - m_dv[i].setZero(); - m_ddv[i].setZero(); - m_residual[i].setZero(); - } - - m_dt = dt; - m_objective->reinitialize(nodeUpdated, dt); - updateSoftBodies(); -} + m_softBodies.copyFromArray(softBodies); + bool nodeUpdated = updateNodes(); -void btDeformableBodySolver::setConstraints(const btContactSolverInfo& infoGlobal) -{ - BT_PROFILE("setConstraint"); - m_objective->setConstraints(infoGlobal); + if (nodeUpdated) + { + m_dv.resize(m_numNodes, btVector3(0, 0, 0)); + m_ddv.resize(m_numNodes, btVector3(0, 0, 0)); + m_residual.resize(m_numNodes, btVector3(0, 0, 0)); + m_backupVelocity.resize(m_numNodes, btVector3(0, 0, 0)); + } + + // need to setZero here as resize only set value for newly allocated items + for (int i = 0; i < m_numNodes; ++i) + { + m_dv[i].setZero(); + m_ddv[i].setZero(); + m_residual[i].setZero(); + } + + if (dt > 0) + { + m_dt = dt; + } + m_objective->reinitialize(nodeUpdated, dt); + updateSoftBodies(); } -btScalar btDeformableBodySolver::solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal) +void btDeformableBodySolver::setConstraints(const btContactSolverInfo& infoGlobal) { - BT_PROFILE("solveContactConstraints"); - btScalar maxSquaredResidual = m_objective->m_projection.update(deformableBodies,numDeformableBodies, infoGlobal); - return maxSquaredResidual; + BT_PROFILE("setConstraint"); + m_objective->setConstraints(infoGlobal); } -void btDeformableBodySolver::splitImpulseSetup(const btContactSolverInfo& infoGlobal) +btScalar btDeformableBodySolver::solveContactConstraints(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal) { - m_objective->m_projection.splitImpulseSetup(infoGlobal); + BT_PROFILE("solveContactConstraints"); + btScalar maxSquaredResidual = m_objective->m_projection.update(deformableBodies, numDeformableBodies, infoGlobal); + return maxSquaredResidual; } void btDeformableBodySolver::updateVelocity() { - int counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - psb->m_maxSpeedSquared = 0; - if (!psb->isActive()) - { - counter += psb->m_nodes.size(); - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - // set NaN to zero; - if (m_dv[counter] != m_dv[counter]) - { - m_dv[counter].setZero(); - } - psb->m_nodes[j].m_v = m_backupVelocity[counter]+m_dv[counter]; - psb->m_maxSpeedSquared = btMax(psb->m_maxSpeedSquared, psb->m_nodes[j].m_v.length2()); - ++counter; - } - } + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + psb->m_maxSpeedSquared = 0; + if (!psb->isActive()) + { + counter += psb->m_nodes.size(); + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + // set NaN to zero; + if (m_dv[counter] != m_dv[counter]) + { + m_dv[counter].setZero(); + } + if (m_implicit) + { + psb->m_nodes[j].m_v = m_backupVelocity[counter] + m_dv[counter]; + } + else + { + psb->m_nodes[j].m_v = m_backupVelocity[counter] + m_dv[counter] - psb->m_nodes[j].m_splitv; + } + psb->m_maxSpeedSquared = btMax(psb->m_maxSpeedSquared, psb->m_nodes[j].m_v.length2()); + ++counter; + } + } } void btDeformableBodySolver::updateTempPosition() { - int counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - counter += psb->m_nodes.size(); - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + m_dt * psb->m_nodes[j].m_v; - ++counter; - } - psb->updateDeformation(); - } + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + counter += psb->m_nodes.size(); + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + m_dt * (psb->m_nodes[j].m_v + psb->m_nodes[j].m_splitv); + ++counter; + } + psb->updateDeformation(); + } } void btDeformableBodySolver::backupVelocity() { - int counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - m_backupVelocity[counter++] = psb->m_nodes[j].m_v; - } - } + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + m_backupVelocity[counter++] = psb->m_nodes[j].m_v; + } + } } void btDeformableBodySolver::setupDeformableSolve(bool implicit) { - int counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - counter += psb->m_nodes.size(); - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - if (implicit) - { - if ((psb->m_nodes[j].m_v - m_backupVelocity[counter]).norm() < SIMD_EPSILON) - m_dv[counter] = psb->m_nodes[j].m_v - m_backupVelocity[counter]; - else - m_dv[counter] = psb->m_nodes[j].m_v - psb->m_nodes[j].m_vn; - m_backupVelocity[counter] = psb->m_nodes[j].m_vn; - } - else - { - m_dv[counter] = psb->m_nodes[j].m_v - m_backupVelocity[counter]; - } - psb->m_nodes[j].m_v = m_backupVelocity[counter]; - ++counter; - } - } + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + counter += psb->m_nodes.size(); + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + if (implicit) + { + // setting the initial guess for newton, need m_dv = v_{n+1} - v_n for dofs that are in constraint. + if (psb->m_nodes[j].m_v == m_backupVelocity[counter]) + m_dv[counter].setZero(); + else + m_dv[counter] = psb->m_nodes[j].m_v - psb->m_nodes[j].m_vn; + m_backupVelocity[counter] = psb->m_nodes[j].m_vn; + } + else + { + m_dv[counter] = psb->m_nodes[j].m_v + psb->m_nodes[j].m_splitv - m_backupVelocity[counter]; + } + psb->m_nodes[j].m_v = m_backupVelocity[counter]; + ++counter; + } + } } void btDeformableBodySolver::revertVelocity() { - int counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_v = m_backupVelocity[counter++]; - } - } + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_v = m_backupVelocity[counter++]; + } + } } bool btDeformableBodySolver::updateNodes() { - int numNodes = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - numNodes += m_softBodies[i]->m_nodes.size(); - if (numNodes != m_numNodes) - { - m_numNodes = numNodes; - return true; - } - return false; + int numNodes = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + numNodes += m_softBodies[i]->m_nodes.size(); + if (numNodes != m_numNodes) + { + m_numNodes = numNodes; + return true; + } + return false; } - void btDeformableBodySolver::predictMotion(btScalar solverdt) { - // apply explicit forces to velocity - m_objective->applyExplicitForce(m_residual); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody *psb = m_softBodies[i]; - - if (psb->isActive()) - { - // predict motion for collision detection - predictDeformableMotion(psb, solverdt); - } - } + // apply explicit forces to velocity + if (m_implicit) + { + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (psb->isActive()) + { + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = psb->m_nodes[j].m_x + psb->m_nodes[j].m_v * solverdt; + } + } + } + } + m_objective->applyExplicitForce(m_residual); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + + if (psb->isActive()) + { + // predict motion for collision detection + predictDeformableMotion(psb, solverdt); + } + } } void btDeformableBodySolver::predictDeformableMotion(btSoftBody* psb, btScalar dt) { - BT_PROFILE("btDeformableBodySolver::predictDeformableMotion"); - int i, ni; - - /* Update */ - if (psb->m_bUpdateRtCst) - { - psb->m_bUpdateRtCst = false; - psb->updateConstants(); - psb->m_fdbvt.clear(); - if (psb->m_cfg.collisions & btSoftBody::fCollision::SDF_RD) - { - psb->initializeFaceTree(); - } - } - - /* Prepare */ - psb->m_sst.sdt = dt * psb->m_cfg.timescale; - psb->m_sst.isdt = 1 / psb->m_sst.sdt; - psb->m_sst.velmrg = psb->m_sst.sdt * 3; - psb->m_sst.radmrg = psb->getCollisionShape()->getMargin(); - psb->m_sst.updmrg = psb->m_sst.radmrg * (btScalar)0.25; - /* Bounds */ - psb->updateBounds(); - - /* Integrate */ - // do not allow particles to move more than the bounding box size - btScalar max_v = (psb->m_bounds[1]-psb->m_bounds[0]).norm() / dt; - for (i = 0, ni = psb->m_nodes.size(); i < ni; ++i) - { - btSoftBody::Node& n = psb->m_nodes[i]; - // apply drag - n.m_v *= (1 - psb->m_cfg.drag); - // scale velocity back - if (n.m_v.norm() > max_v) - { - n.m_v.safeNormalize(); - n.m_v *= max_v; - } - n.m_q = n.m_x + n.m_v * dt; - n.m_penetration = 0; - } - - /* Nodes */ - psb->updateNodeTree(true, true); - if (!psb->m_fdbvt.empty()) - { - psb->updateFaceTree(true, true); - } - /* Clear contacts */ - psb->m_nodeRigidContacts.resize(0); - psb->m_faceRigidContacts.resize(0); - psb->m_faceNodeContacts.resize(0); - /* Optimize dbvt's */ -// psb->m_ndbvt.optimizeIncremental(1); -// psb->m_fdbvt.optimizeIncremental(1); -} + BT_PROFILE("btDeformableBodySolver::predictDeformableMotion"); + int i, ni; + + /* Update */ + if (psb->m_bUpdateRtCst) + { + psb->m_bUpdateRtCst = false; + psb->updateConstants(); + psb->m_fdbvt.clear(); + if (psb->m_cfg.collisions & btSoftBody::fCollision::SDF_RD) + { + psb->initializeFaceTree(); + } + } + /* Prepare */ + psb->m_sst.sdt = dt * psb->m_cfg.timescale; + psb->m_sst.isdt = 1 / psb->m_sst.sdt; + psb->m_sst.velmrg = psb->m_sst.sdt * 3; + psb->m_sst.radmrg = psb->getCollisionShape()->getMargin(); + psb->m_sst.updmrg = psb->m_sst.radmrg * (btScalar)0.25; + /* Bounds */ + psb->updateBounds(); + + /* Integrate */ + // do not allow particles to move more than the bounding box size + btScalar max_v = (psb->m_bounds[1] - psb->m_bounds[0]).norm() / dt; + for (i = 0, ni = psb->m_nodes.size(); i < ni; ++i) + { + btSoftBody::Node& n = psb->m_nodes[i]; + // apply drag + n.m_v *= (1 - psb->m_cfg.drag); + // scale velocity back + if (m_implicit) + { + n.m_q = n.m_x; + } + else + { + if (n.m_v.norm() > max_v) + { + n.m_v.safeNormalize(); + n.m_v *= max_v; + } + n.m_q = n.m_x + n.m_v * dt; + } + n.m_splitv.setZero(); + n.m_constrained = false; + } + + /* Nodes */ + psb->updateNodeTree(true, true); + if (!psb->m_fdbvt.empty()) + { + psb->updateFaceTree(true, true); + } + /* Clear contacts */ + psb->m_nodeRigidContacts.resize(0); + psb->m_faceRigidContacts.resize(0); + psb->m_faceNodeContacts.resize(0); + /* Optimize dbvt's */ + // psb->m_ndbvt.optimizeIncremental(1); + // psb->m_fdbvt.optimizeIncremental(1); +} void btDeformableBodySolver::updateSoftBodies() { - BT_PROFILE("updateSoftBodies"); - for (int i = 0; i < m_softBodies.size(); i++) - { - btSoftBody *psb = (btSoftBody *)m_softBodies[i]; - if (psb->isActive()) - { - psb->updateNormals(); - } - } + BT_PROFILE("updateSoftBodies"); + for (int i = 0; i < m_softBodies.size(); i++) + { + btSoftBody* psb = (btSoftBody*)m_softBodies[i]; + if (psb->isActive()) + { + psb->updateNormals(); + } + } } void btDeformableBodySolver::setImplicit(bool implicit) { - m_implicit = implicit; - m_objective->setImplicit(implicit); + m_implicit = implicit; + m_objective->setImplicit(implicit); } void btDeformableBodySolver::setLineSearch(bool lineSearch) { - m_lineSearch = lineSearch; + m_lineSearch = lineSearch; } diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h index d4e5f4c603..ae674d6e89 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableBodySolver.h @@ -16,7 +16,6 @@ #ifndef BT_DEFORMABLE_BODY_SOLVERS_H #define BT_DEFORMABLE_BODY_SOLVERS_H - #include "btSoftBodySolvers.h" #include "btDeformableBackwardEulerObjective.h" #include "btDeformableMultiBodyDynamicsWorld.h" @@ -30,133 +29,132 @@ class btDeformableMultiBodyDynamicsWorld; class btDeformableBodySolver : public btSoftBodySolver { - typedef btAlignedObjectArray<btVector3> TVStack; + typedef btAlignedObjectArray<btVector3> TVStack; + protected: - int m_numNodes; // total number of deformable body nodes - TVStack m_dv; // v_{n+1} - v_n - TVStack m_backup_dv; // backed up dv - TVStack m_ddv; // incremental dv - TVStack m_residual; // rhs of the linear solve - btAlignedObjectArray<btSoftBody *> m_softBodies; // all deformable bodies - TVStack m_backupVelocity; // backed up v, equals v_n for implicit, equals v_{n+1}^* for explicit - btScalar m_dt; // dt - btConjugateGradient<btDeformableBackwardEulerObjective> m_cg; // CG solver - btConjugateResidual<btDeformableBackwardEulerObjective> m_cr; // CR solver - bool m_implicit; // use implicit scheme if true, explicit scheme if false - int m_maxNewtonIterations; // max number of newton iterations - btScalar m_newtonTolerance; // stop newton iterations if f(x) < m_newtonTolerance - bool m_lineSearch; // If true, use newton's method with line search under implicit scheme + int m_numNodes; // total number of deformable body nodes + TVStack m_dv; // v_{n+1} - v_n + TVStack m_backup_dv; // backed up dv + TVStack m_ddv; // incremental dv + TVStack m_residual; // rhs of the linear solve + btAlignedObjectArray<btSoftBody*> m_softBodies; // all deformable bodies + TVStack m_backupVelocity; // backed up v, equals v_n for implicit, equals v_{n+1}^* for explicit + btScalar m_dt; // dt + btConjugateGradient<btDeformableBackwardEulerObjective> m_cg; // CG solver + btConjugateResidual<btDeformableBackwardEulerObjective> m_cr; // CR solver + bool m_implicit; // use implicit scheme if true, explicit scheme if false + int m_maxNewtonIterations; // max number of newton iterations + btScalar m_newtonTolerance; // stop newton iterations if f(x) < m_newtonTolerance + bool m_lineSearch; // If true, use newton's method with line search under implicit scheme public: - // handles data related to objective function - btDeformableBackwardEulerObjective* m_objective; - bool m_useProjection; - - btDeformableBodySolver(); - - virtual ~btDeformableBodySolver(); - - virtual SolverTypes getSolverType() const - { - return DEFORMABLE_SOLVER; - } - - // update soft body normals - virtual void updateSoftBodies(); - - virtual btScalar solveContactConstraints(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal); - - // solve the momentum equation - virtual void solveDeformableConstraints(btScalar solverdt); - - // set up the position error in split impulse - void splitImpulseSetup(const btContactSolverInfo& infoGlobal); - - // resize/clear data structures - void reinitialize(const btAlignedObjectArray<btSoftBody *>& softBodies, btScalar dt); - - // set up contact constraints - void setConstraints(const btContactSolverInfo& infoGlobal); - - // add in elastic forces and gravity to obtain v_{n+1}^* and calls predictDeformableMotion - virtual void predictMotion(btScalar solverdt); - - // move to temporary position x_{n+1}^* = x_n + dt * v_{n+1}^* - // x_{n+1}^* is stored in m_q - void predictDeformableMotion(btSoftBody* psb, btScalar dt); - - // save the current velocity to m_backupVelocity - void backupVelocity(); - - // set m_dv and m_backupVelocity to desired value to prepare for momentum solve - void setupDeformableSolve(bool implicit); - - // set the current velocity to that backed up in m_backupVelocity - void revertVelocity(); - - // set velocity to m_dv + m_backupVelocity - void updateVelocity(); - - // update the node count - bool updateNodes(); - - // calculate the change in dv resulting from the momentum solve - void computeStep(TVStack& ddv, const TVStack& residual); - - // calculate the change in dv resulting from the momentum solve when line search is turned on - btScalar computeDescentStep(TVStack& ddv, const TVStack& residual, bool verbose=false); - - virtual void copySoftBodyToVertexBuffer(const btSoftBody *const softBody, btVertexBufferDescriptor *vertexBuffer) {} - - // process collision between deformable and rigid - virtual void processCollision(btSoftBody * softBody, const btCollisionObjectWrapper * collisionObjectWrap) - { - softBody->defaultCollisionHandler(collisionObjectWrap); - } - - // process collision between deformable and deformable - virtual void processCollision(btSoftBody * softBody, btSoftBody * otherSoftBody) { - softBody->defaultCollisionHandler(otherSoftBody); - } - - // If true, implicit time stepping scheme is used. - // Otherwise, explicit time stepping scheme is used - void setImplicit(bool implicit); - - // If true, newton's method with line search is used when implicit time stepping scheme is turned on - void setLineSearch(bool lineSearch); - - // set temporary position x^* = x_n + dt * v - // update the deformation gradient at position x^* - void updateState(); - - // set dv = dv + scale * ddv - void updateDv(btScalar scale = 1); - - // set temporary position x^* = x_n + dt * v^* - void updateTempPosition(); - - // save the current dv to m_backup_dv; - void backupDv(); - - // set dv to the backed-up value - void revertDv(); - - // set dv = dv + scale * ddv - // set v^* = v_n + dv - // set temporary position x^* = x_n + dt * v^* - // update the deformation gradient at position x^* - void updateEnergy(btScalar scale); - - // calculates the appropriately scaled kinetic energy in the system, which is - // 1/2 * dv^T * M * dv - // used in line search - btScalar kineticEnergy(); - - // unused functions - virtual void optimize(btAlignedObjectArray<btSoftBody *> &softBodies, bool forceUpdate = false){} - virtual void solveConstraints(btScalar dt){} - virtual bool checkInitialized(){return true;} - virtual void copyBackToSoftBodies(bool bMove = true) {} + // handles data related to objective function + btDeformableBackwardEulerObjective* m_objective; + bool m_useProjection; + + btDeformableBodySolver(); + + virtual ~btDeformableBodySolver(); + + virtual SolverTypes getSolverType() const + { + return DEFORMABLE_SOLVER; + } + + // update soft body normals + virtual void updateSoftBodies(); + + virtual btScalar solveContactConstraints(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal); + + // solve the momentum equation + virtual void solveDeformableConstraints(btScalar solverdt); + + // resize/clear data structures + void reinitialize(const btAlignedObjectArray<btSoftBody*>& softBodies, btScalar dt); + + // set up contact constraints + void setConstraints(const btContactSolverInfo& infoGlobal); + + // add in elastic forces and gravity to obtain v_{n+1}^* and calls predictDeformableMotion + virtual void predictMotion(btScalar solverdt); + + // move to temporary position x_{n+1}^* = x_n + dt * v_{n+1}^* + // x_{n+1}^* is stored in m_q + void predictDeformableMotion(btSoftBody* psb, btScalar dt); + + // save the current velocity to m_backupVelocity + void backupVelocity(); + + // set m_dv and m_backupVelocity to desired value to prepare for momentum solve + void setupDeformableSolve(bool implicit); + + // set the current velocity to that backed up in m_backupVelocity + void revertVelocity(); + + // set velocity to m_dv + m_backupVelocity + void updateVelocity(); + + // update the node count + bool updateNodes(); + + // calculate the change in dv resulting from the momentum solve + void computeStep(TVStack& ddv, const TVStack& residual); + + // calculate the change in dv resulting from the momentum solve when line search is turned on + btScalar computeDescentStep(TVStack& ddv, const TVStack& residual, bool verbose = false); + + virtual void copySoftBodyToVertexBuffer(const btSoftBody* const softBody, btVertexBufferDescriptor* vertexBuffer) {} + + // process collision between deformable and rigid + virtual void processCollision(btSoftBody* softBody, const btCollisionObjectWrapper* collisionObjectWrap) + { + softBody->defaultCollisionHandler(collisionObjectWrap); + } + + // process collision between deformable and deformable + virtual void processCollision(btSoftBody* softBody, btSoftBody* otherSoftBody) + { + softBody->defaultCollisionHandler(otherSoftBody); + } + + // If true, implicit time stepping scheme is used. + // Otherwise, explicit time stepping scheme is used + void setImplicit(bool implicit); + + // If true, newton's method with line search is used when implicit time stepping scheme is turned on + void setLineSearch(bool lineSearch); + + // set temporary position x^* = x_n + dt * v + // update the deformation gradient at position x^* + void updateState(); + + // set dv = dv + scale * ddv + void updateDv(btScalar scale = 1); + + // set temporary position x^* = x_n + dt * v^* + void updateTempPosition(); + + // save the current dv to m_backup_dv; + void backupDv(); + + // set dv to the backed-up value + void revertDv(); + + // set dv = dv + scale * ddv + // set v^* = v_n + dv + // set temporary position x^* = x_n + dt * v^* + // update the deformation gradient at position x^* + void updateEnergy(btScalar scale); + + // calculates the appropriately scaled kinetic energy in the system, which is + // 1/2 * dv^T * M * dv + // used in line search + btScalar kineticEnergy(); + + // unused functions + virtual void optimize(btAlignedObjectArray<btSoftBody*>& softBodies, bool forceUpdate = false) {} + virtual void solveConstraints(btScalar dt) {} + virtual bool checkInitialized() { return true; } + virtual void copyBackToSoftBodies(bool bMove = true) {} }; #endif /* btDeformableBodySolver_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp index 2864446de6..09398d79a5 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp +++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.cpp @@ -16,387 +16,503 @@ #include "btDeformableContactConstraint.h" /* ================ Deformable Node Anchor =================== */ btDeformableNodeAnchorConstraint::btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& a, const btContactSolverInfo& infoGlobal) -: m_anchor(&a) -, btDeformableContactConstraint(a.m_cti.m_normal, infoGlobal) + : m_anchor(&a), btDeformableContactConstraint(a.m_cti.m_normal, infoGlobal) { } btDeformableNodeAnchorConstraint::btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other) -: m_anchor(other.m_anchor) -, btDeformableContactConstraint(other) + : m_anchor(other.m_anchor), btDeformableContactConstraint(other) { } btVector3 btDeformableNodeAnchorConstraint::getVa() const { - const btSoftBody::sCti& cti = m_anchor->m_cti; - btVector3 va(0, 0, 0); - if (cti.m_colObj->hasContactResponse()) - { - btRigidBody* rigidCol = 0; - btMultiBodyLinkCollider* multibodyLinkCol = 0; - - // grab the velocity of the rigid body - if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) - { - rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); - va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_anchor->m_c1)) : btVector3(0, 0, 0); - } - else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); - if (multibodyLinkCol) - { - const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; - const btScalar* J_n = &m_anchor->jacobianData_normal.m_jacobians[0]; - const btScalar* J_t1 = &m_anchor->jacobianData_t1.m_jacobians[0]; - const btScalar* J_t2 = &m_anchor->jacobianData_t2.m_jacobians[0]; - const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector(); - const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector(); - // add in the normal component of the va - btScalar vel = 0.0; - for (int k = 0; k < ndof; ++k) - { - vel += (local_v[k]+local_dv[k]) * J_n[k]; - } - va = cti.m_normal * vel; - // add in the tangential components of the va - vel = 0.0; - for (int k = 0; k < ndof; ++k) - { - vel += (local_v[k]+local_dv[k]) * J_t1[k]; - } - va += m_anchor->t1 * vel; - vel = 0.0; - for (int k = 0; k < ndof; ++k) - { - vel += (local_v[k]+local_dv[k]) * J_t2[k]; - } - va += m_anchor->t2 * vel; - } - } - } - return va; + const btSoftBody::sCti& cti = m_anchor->m_cti; + btVector3 va(0, 0, 0); + if (cti.m_colObj->hasContactResponse()) + { + btRigidBody* rigidCol = 0; + btMultiBodyLinkCollider* multibodyLinkCol = 0; + + // grab the velocity of the rigid body + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); + va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_anchor->m_c1)) : btVector3(0, 0, 0); + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + const btScalar* J_n = &m_anchor->jacobianData_normal.m_jacobians[0]; + const btScalar* J_t1 = &m_anchor->jacobianData_t1.m_jacobians[0]; + const btScalar* J_t2 = &m_anchor->jacobianData_t2.m_jacobians[0]; + const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector(); + const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector(); + // add in the normal component of the va + btScalar vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += (local_v[k] + local_dv[k]) * J_n[k]; + } + va = cti.m_normal * vel; + // add in the tangential components of the va + vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += (local_v[k] + local_dv[k]) * J_t1[k]; + } + va += m_anchor->t1 * vel; + vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += (local_v[k] + local_dv[k]) * J_t2[k]; + } + va += m_anchor->t2 * vel; + } + } + } + return va; } btScalar btDeformableNodeAnchorConstraint::solveConstraint(const btContactSolverInfo& infoGlobal) { - const btSoftBody::sCti& cti = m_anchor->m_cti; - btVector3 va = getVa(); - btVector3 vb = getVb(); - btVector3 vr = (vb - va); - // + (m_anchor->m_node->m_x - cti.m_colObj->getWorldTransform() * m_anchor->m_local) * 10.0 - const btScalar dn = btDot(vr, vr); - // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt - btScalar residualSquare = dn*dn; - btVector3 impulse = m_anchor->m_c0 * vr; - // apply impulse to deformable nodes involved and change their velocities - applyImpulse(impulse); - - // apply impulse to the rigid/multibodies involved and change their velocities - if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) - { - btRigidBody* rigidCol = 0; - rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); - if (rigidCol) - { - rigidCol->applyImpulse(impulse, m_anchor->m_c1); - } - } - else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - btMultiBodyLinkCollider* multibodyLinkCol = 0; - multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); - if (multibodyLinkCol) - { - const btScalar* deltaV_normal = &m_anchor->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; - // apply normal component of the impulse - multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal)); - // apply tangential component of the impulse - const btScalar* deltaV_t1 = &m_anchor->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; - multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_anchor->t1)); - const btScalar* deltaV_t2 = &m_anchor->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; - multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_anchor->t2)); - } - } - return residualSquare; + const btSoftBody::sCti& cti = m_anchor->m_cti; + btVector3 va = getVa(); + btVector3 vb = getVb(); + btVector3 vr = (vb - va); + // + (m_anchor->m_node->m_x - cti.m_colObj->getWorldTransform() * m_anchor->m_local) * 10.0 + const btScalar dn = btDot(vr, vr); + // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt + btScalar residualSquare = dn * dn; + btVector3 impulse = m_anchor->m_c0 * vr; + // apply impulse to deformable nodes involved and change their velocities + applyImpulse(impulse); + + // apply impulse to the rigid/multibodies involved and change their velocities + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + btRigidBody* rigidCol = 0; + rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); + if (rigidCol) + { + rigidCol->applyImpulse(impulse, m_anchor->m_c1); + } + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + btMultiBodyLinkCollider* multibodyLinkCol = 0; + multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + const btScalar* deltaV_normal = &m_anchor->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + // apply normal component of the impulse + multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal)); + // apply tangential component of the impulse + const btScalar* deltaV_t1 = &m_anchor->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; + multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_anchor->t1)); + const btScalar* deltaV_t2 = &m_anchor->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; + multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_anchor->t2)); + } + } + return residualSquare; } btVector3 btDeformableNodeAnchorConstraint::getVb() const { - return m_anchor->m_node->m_v; + return m_anchor->m_node->m_v; } void btDeformableNodeAnchorConstraint::applyImpulse(const btVector3& impulse) { - btVector3 dv = impulse * m_anchor->m_c2; - m_anchor->m_node->m_v -= dv; + btVector3 dv = impulse * m_anchor->m_c2; + m_anchor->m_node->m_v -= dv; } /* ================ Deformable vs. Rigid =================== */ btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal) -: m_contact(&c) -, btDeformableContactConstraint(c.m_cti.m_normal, infoGlobal) + : m_contact(&c), btDeformableContactConstraint(c.m_cti.m_normal, infoGlobal) { - m_total_normal_dv.setZero(); - m_total_tangent_dv.setZero(); - // The magnitude of penetration is the depth of penetration. - m_penetration = c.m_cti.m_offset; -// m_penetration = btMin(btScalar(0),c.m_cti.m_offset); + m_total_normal_dv.setZero(); + m_total_tangent_dv.setZero(); + // The magnitude of penetration is the depth of penetration. + m_penetration = c.m_cti.m_offset; + m_total_split_impulse = 0; + m_binding = false; } btDeformableRigidContactConstraint::btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other) -: m_contact(other.m_contact) -, btDeformableContactConstraint(other) -, m_penetration(other.m_penetration) + : m_contact(other.m_contact), btDeformableContactConstraint(other), m_penetration(other.m_penetration), m_total_split_impulse(other.m_total_split_impulse), m_binding(other.m_binding) { - m_total_normal_dv = other.m_total_normal_dv; - m_total_tangent_dv = other.m_total_tangent_dv; + m_total_normal_dv = other.m_total_normal_dv; + m_total_tangent_dv = other.m_total_tangent_dv; } - btVector3 btDeformableRigidContactConstraint::getVa() const { - const btSoftBody::sCti& cti = m_contact->m_cti; - btVector3 va(0, 0, 0); - if (cti.m_colObj->hasContactResponse()) - { - btRigidBody* rigidCol = 0; - btMultiBodyLinkCollider* multibodyLinkCol = 0; - - // grab the velocity of the rigid body - if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) - { - rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); - va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_contact->m_c1)) : btVector3(0, 0, 0); - } - else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); - if (multibodyLinkCol) - { - const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; - const btScalar* J_n = &m_contact->jacobianData_normal.m_jacobians[0]; - const btScalar* J_t1 = &m_contact->jacobianData_t1.m_jacobians[0]; - const btScalar* J_t2 = &m_contact->jacobianData_t2.m_jacobians[0]; - const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector(); - const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector(); - // add in the normal component of the va - btScalar vel = 0.0; - for (int k = 0; k < ndof; ++k) - { - vel += (local_v[k]+local_dv[k]) * J_n[k]; - } - va = cti.m_normal * vel; - // add in the tangential components of the va - vel = 0.0; - for (int k = 0; k < ndof; ++k) - { - vel += (local_v[k]+local_dv[k]) * J_t1[k]; - } - va += m_contact->t1 * vel; - vel = 0.0; - for (int k = 0; k < ndof; ++k) - { - vel += (local_v[k]+local_dv[k]) * J_t2[k]; - } - va += m_contact->t2 * vel; - } - } - } - return va; + const btSoftBody::sCti& cti = m_contact->m_cti; + btVector3 va(0, 0, 0); + if (cti.m_colObj->hasContactResponse()) + { + btRigidBody* rigidCol = 0; + btMultiBodyLinkCollider* multibodyLinkCol = 0; + + // grab the velocity of the rigid body + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); + va = rigidCol ? (rigidCol->getVelocityInLocalPoint(m_contact->m_c1)) : btVector3(0, 0, 0); + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + const btScalar* J_n = &m_contact->jacobianData_normal.m_jacobians[0]; + const btScalar* J_t1 = &m_contact->jacobianData_t1.m_jacobians[0]; + const btScalar* J_t2 = &m_contact->jacobianData_t2.m_jacobians[0]; + const btScalar* local_v = multibodyLinkCol->m_multiBody->getVelocityVector(); + const btScalar* local_dv = multibodyLinkCol->m_multiBody->getDeltaVelocityVector(); + // add in the normal component of the va + btScalar vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += (local_v[k] + local_dv[k]) * J_n[k]; + } + va = cti.m_normal * vel; + // add in the tangential components of the va + vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += (local_v[k] + local_dv[k]) * J_t1[k]; + } + va += m_contact->t1 * vel; + vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += (local_v[k] + local_dv[k]) * J_t2[k]; + } + va += m_contact->t2 * vel; + } + } + } + return va; +} + +btVector3 btDeformableRigidContactConstraint::getSplitVa() const +{ + const btSoftBody::sCti& cti = m_contact->m_cti; + btVector3 va(0, 0, 0); + if (cti.m_colObj->hasContactResponse()) + { + btRigidBody* rigidCol = 0; + btMultiBodyLinkCollider* multibodyLinkCol = 0; + + // grab the velocity of the rigid body + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); + va = rigidCol ? (rigidCol->getPushVelocityInLocalPoint(m_contact->m_c1)) : btVector3(0, 0, 0); + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + const btScalar* J_n = &m_contact->jacobianData_normal.m_jacobians[0]; + const btScalar* J_t1 = &m_contact->jacobianData_t1.m_jacobians[0]; + const btScalar* J_t2 = &m_contact->jacobianData_t2.m_jacobians[0]; + const btScalar* local_split_v = multibodyLinkCol->m_multiBody->getSplitVelocityVector(); + // add in the normal component of the va + btScalar vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += local_split_v[k] * J_n[k]; + } + va = cti.m_normal * vel; + // add in the tangential components of the va + vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += local_split_v[k] * J_t1[k]; + } + va += m_contact->t1 * vel; + vel = 0.0; + for (int k = 0; k < ndof; ++k) + { + vel += local_split_v[k] * J_t2[k]; + } + va += m_contact->t2 * vel; + } + } + } + return va; } btScalar btDeformableRigidContactConstraint::solveConstraint(const btContactSolverInfo& infoGlobal) { - const btSoftBody::sCti& cti = m_contact->m_cti; - btVector3 va = getVa(); - btVector3 vb = getVb(); - btVector3 vr = vb - va; - btScalar dn = btDot(vr, cti.m_normal) + m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep; - // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt - btScalar residualSquare = dn*dn; - btVector3 impulse = m_contact->m_c0 * (vr + m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep * cti.m_normal) ; - const btVector3 impulse_normal = m_contact->m_c0 * (cti.m_normal * dn); - btVector3 impulse_tangent = impulse - impulse_normal; - btVector3 old_total_tangent_dv = m_total_tangent_dv; - // m_c2 is the inverse mass of the deformable node/face - m_total_normal_dv -= impulse_normal * m_contact->m_c2; - m_total_tangent_dv -= impulse_tangent * m_contact->m_c2; - - if (m_total_normal_dv.dot(cti.m_normal) < 0) - { - // separating in the normal direction - m_static = false; - m_total_tangent_dv = btVector3(0,0,0); - impulse_tangent.setZero(); - } - else - { - if (m_total_normal_dv.norm() * m_contact->m_c3 < m_total_tangent_dv.norm()) - { - // dynamic friction - // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations. - m_static = false; - if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON) - { - m_total_tangent_dv = btVector3(0,0,0); - } - else - { - m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_c3; - } - impulse_tangent = -btScalar(1)/m_contact->m_c2 * (m_total_tangent_dv - old_total_tangent_dv); - } - else - { - // static friction - m_static = true; - } - } - impulse = impulse_normal + impulse_tangent; - // apply impulse to deformable nodes involved and change their velocities - applyImpulse(impulse); - if (residualSquare < 1e-7) - return residualSquare; - // apply impulse to the rigid/multibodies involved and change their velocities - if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) - { - btRigidBody* rigidCol = 0; - rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); - if (rigidCol) - { - rigidCol->applyImpulse(impulse, m_contact->m_c1); - } - } - else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - btMultiBodyLinkCollider* multibodyLinkCol = 0; - multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); - if (multibodyLinkCol) - { - const btScalar* deltaV_normal = &m_contact->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; - // apply normal component of the impulse - multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal)); - if (impulse_tangent.norm() > SIMD_EPSILON) - { - // apply tangential component of the impulse - const btScalar* deltaV_t1 = &m_contact->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; - multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_contact->t1)); - const btScalar* deltaV_t2 = &m_contact->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; - multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_contact->t2)); - } - } - } -// va = getVa(); -// vb = getVb(); -// vr = vb - va; -// btScalar dn1 = btDot(vr, cti.m_normal) / 150; -// m_penetration += dn1; - return residualSquare; + const btSoftBody::sCti& cti = m_contact->m_cti; + btVector3 va = getVa(); + btVector3 vb = getVb(); + btVector3 vr = vb - va; + btScalar dn = btDot(vr, cti.m_normal) + m_total_normal_dv.dot(cti.m_normal) * infoGlobal.m_deformable_cfm; + if (m_penetration > 0) + { + dn += m_penetration / infoGlobal.m_timeStep; + } + if (!infoGlobal.m_splitImpulse) + { + dn += m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep; + } + // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt + btVector3 impulse = m_contact->m_c0 * (vr + m_total_normal_dv * infoGlobal.m_deformable_cfm + ((m_penetration > 0) ? m_penetration / infoGlobal.m_timeStep * cti.m_normal : btVector3(0, 0, 0))); + if (!infoGlobal.m_splitImpulse) + { + impulse += m_contact->m_c0 * (m_penetration * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep * cti.m_normal); + } + btVector3 impulse_normal = m_contact->m_c0 * (cti.m_normal * dn); + btVector3 impulse_tangent = impulse - impulse_normal; + if (dn > 0) + { + return 0; + } + m_binding = true; + btScalar residualSquare = dn * dn; + btVector3 old_total_tangent_dv = m_total_tangent_dv; + // m_c5 is the inverse mass of the deformable node/face + m_total_normal_dv -= m_contact->m_c5 * impulse_normal; + m_total_tangent_dv -= m_contact->m_c5 * impulse_tangent; + + if (m_total_normal_dv.dot(cti.m_normal) < 0) + { + // separating in the normal direction + m_binding = false; + m_static = false; + impulse_tangent.setZero(); + } + else + { + if (m_total_normal_dv.norm() * m_contact->m_c3 < m_total_tangent_dv.norm()) + { + // dynamic friction + // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations. + m_static = false; + if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON) + { + m_total_tangent_dv = btVector3(0, 0, 0); + } + else + { + m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_c3; + } + // impulse_tangent = -btScalar(1)/m_contact->m_c2 * (m_total_tangent_dv - old_total_tangent_dv); + impulse_tangent = m_contact->m_c5.inverse() * (old_total_tangent_dv - m_total_tangent_dv); + } + else + { + // static friction + m_static = true; + } + } + impulse = impulse_normal + impulse_tangent; + // apply impulse to deformable nodes involved and change their velocities + applyImpulse(impulse); + // apply impulse to the rigid/multibodies involved and change their velocities + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + btRigidBody* rigidCol = 0; + rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); + if (rigidCol) + { + rigidCol->applyImpulse(impulse, m_contact->m_c1); + } + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + btMultiBodyLinkCollider* multibodyLinkCol = 0; + multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + const btScalar* deltaV_normal = &m_contact->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + // apply normal component of the impulse + multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_normal, impulse.dot(cti.m_normal)); + if (impulse_tangent.norm() > SIMD_EPSILON) + { + // apply tangential component of the impulse + const btScalar* deltaV_t1 = &m_contact->jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; + multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t1, impulse.dot(m_contact->t1)); + const btScalar* deltaV_t2 = &m_contact->jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; + multibodyLinkCol->m_multiBody->applyDeltaVeeMultiDof2(deltaV_t2, impulse.dot(m_contact->t2)); + } + } + } + return residualSquare; +} + +btScalar btDeformableRigidContactConstraint::solveSplitImpulse(const btContactSolverInfo& infoGlobal) +{ + btScalar MAX_PENETRATION_CORRECTION = infoGlobal.m_deformable_maxErrorReduction; + const btSoftBody::sCti& cti = m_contact->m_cti; + btVector3 vb = getSplitVb(); + btVector3 va = getSplitVa(); + btScalar p = m_penetration; + if (p > 0) + { + return 0; + } + btVector3 vr = vb - va; + btScalar dn = btDot(vr, cti.m_normal) + p * infoGlobal.m_deformable_erp / infoGlobal.m_timeStep; + if (dn > 0) + { + return 0; + } + if (m_total_split_impulse + dn > MAX_PENETRATION_CORRECTION) + { + dn = MAX_PENETRATION_CORRECTION - m_total_split_impulse; + } + if (m_total_split_impulse + dn < -MAX_PENETRATION_CORRECTION) + { + dn = -MAX_PENETRATION_CORRECTION - m_total_split_impulse; + } + m_total_split_impulse += dn; + + btScalar residualSquare = dn * dn; + const btVector3 impulse = m_contact->m_c0 * (cti.m_normal * dn); + applySplitImpulse(impulse); + + // apply split impulse to the rigid/multibodies involved and change their velocities + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + btRigidBody* rigidCol = 0; + rigidCol = (btRigidBody*)btRigidBody::upcast(cti.m_colObj); + if (rigidCol) + { + rigidCol->applyPushImpulse(impulse, m_contact->m_c1); + } + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + btMultiBodyLinkCollider* multibodyLinkCol = 0; + multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + const btScalar* deltaV_normal = &m_contact->jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + // apply normal component of the impulse + multibodyLinkCol->m_multiBody->applyDeltaSplitVeeMultiDof(deltaV_normal, impulse.dot(cti.m_normal)); + } + } + return residualSquare; } /* ================ Node vs. Rigid =================== */ btDeformableNodeRigidContactConstraint::btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal) - : m_node(contact.m_node) - , btDeformableRigidContactConstraint(contact, infoGlobal) - { - } + : m_node(contact.m_node), btDeformableRigidContactConstraint(contact, infoGlobal) +{ +} btDeformableNodeRigidContactConstraint::btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other) -: m_node(other.m_node) -, btDeformableRigidContactConstraint(other) + : m_node(other.m_node), btDeformableRigidContactConstraint(other) { } btVector3 btDeformableNodeRigidContactConstraint::getVb() const { - return m_node->m_v; + return m_node->m_v; } +btVector3 btDeformableNodeRigidContactConstraint::getSplitVb() const +{ + return m_node->m_splitv; +} btVector3 btDeformableNodeRigidContactConstraint::getDv(const btSoftBody::Node* node) const { - return m_total_normal_dv + m_total_tangent_dv; + return m_total_normal_dv + m_total_tangent_dv; } void btDeformableNodeRigidContactConstraint::applyImpulse(const btVector3& impulse) { - const btSoftBody::DeformableNodeRigidContact* contact = getContact(); - btVector3 dv = impulse * contact->m_c2; - contact->m_node->m_v -= dv; + const btSoftBody::DeformableNodeRigidContact* contact = getContact(); + btVector3 dv = contact->m_c5 * impulse; + contact->m_node->m_v -= dv; +} + +void btDeformableNodeRigidContactConstraint::applySplitImpulse(const btVector3& impulse) +{ + const btSoftBody::DeformableNodeRigidContact* contact = getContact(); + btVector3 dv = contact->m_c5 * impulse; + contact->m_node->m_splitv -= dv; } /* ================ Face vs. Rigid =================== */ btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting) -: m_face(contact.m_face) -, m_useStrainLimiting(useStrainLimiting) -, btDeformableRigidContactConstraint(contact, infoGlobal) + : m_face(contact.m_face), m_useStrainLimiting(useStrainLimiting), btDeformableRigidContactConstraint(contact, infoGlobal) { } btDeformableFaceRigidContactConstraint::btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other) -: m_face(other.m_face) -, m_useStrainLimiting(other.m_useStrainLimiting) -, btDeformableRigidContactConstraint(other) + : m_face(other.m_face), m_useStrainLimiting(other.m_useStrainLimiting), btDeformableRigidContactConstraint(other) { } btVector3 btDeformableFaceRigidContactConstraint::getVb() const { - const btSoftBody::DeformableFaceRigidContact* contact = getContact(); - btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2]; - return vb; + const btSoftBody::DeformableFaceRigidContact* contact = getContact(); + btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2]; + return vb; } - btVector3 btDeformableFaceRigidContactConstraint::getDv(const btSoftBody::Node* node) const { - btVector3 face_dv = m_total_normal_dv + m_total_tangent_dv; - const btSoftBody::DeformableFaceRigidContact* contact = getContact(); - if (m_face->m_n[0] == node) - { - return face_dv * contact->m_weights[0]; - } - if (m_face->m_n[1] == node) - { - return face_dv * contact->m_weights[1]; - } - btAssert(node == m_face->m_n[2]); - return face_dv * contact->m_weights[2]; + btVector3 face_dv = m_total_normal_dv + m_total_tangent_dv; + const btSoftBody::DeformableFaceRigidContact* contact = getContact(); + if (m_face->m_n[0] == node) + { + return face_dv * contact->m_weights[0]; + } + if (m_face->m_n[1] == node) + { + return face_dv * contact->m_weights[1]; + } + btAssert(node == m_face->m_n[2]); + return face_dv * contact->m_weights[2]; } void btDeformableFaceRigidContactConstraint::applyImpulse(const btVector3& impulse) { - const btSoftBody::DeformableFaceRigidContact* contact = getContact(); - btVector3 dv = impulse * contact->m_c2; - btSoftBody::Face* face = contact->m_face; - - btVector3& v0 = face->m_n[0]->m_v; - btVector3& v1 = face->m_n[1]->m_v; - btVector3& v2 = face->m_n[2]->m_v; - const btScalar& im0 = face->m_n[0]->m_im; - const btScalar& im1 = face->m_n[1]->m_im; - const btScalar& im2 = face->m_n[2]->m_im; - if (im0 > 0) - v0 -= dv * contact->m_weights[0]; - if (im1 > 0) - v1 -= dv * contact->m_weights[1]; - if (im2 > 0) - v2 -= dv * contact->m_weights[2]; + const btSoftBody::DeformableFaceRigidContact* contact = getContact(); + btVector3 dv = impulse * contact->m_c2; + btSoftBody::Face* face = contact->m_face; + + btVector3& v0 = face->m_n[0]->m_v; + btVector3& v1 = face->m_n[1]->m_v; + btVector3& v2 = face->m_n[2]->m_v; + const btScalar& im0 = face->m_n[0]->m_im; + const btScalar& im1 = face->m_n[1]->m_im; + const btScalar& im2 = face->m_n[2]->m_im; + if (im0 > 0) + v0 -= dv * contact->m_weights[0]; + if (im1 > 0) + v1 -= dv * contact->m_weights[1]; + if (im2 > 0) + v2 -= dv * contact->m_weights[2]; if (m_useStrainLimiting) { - btScalar relaxation = 1./btScalar(m_infoGlobal->m_numIterations); - btScalar m01 = (relaxation/(im0 + im1)); - btScalar m02 = (relaxation/(im0 + im2)); - btScalar m12 = (relaxation/(im1 + im2)); - #ifdef USE_STRAIN_RATE_LIMITING + btScalar relaxation = 1. / btScalar(m_infoGlobal->m_numIterations); + btScalar m01 = (relaxation / (im0 + im1)); + btScalar m02 = (relaxation / (im0 + im2)); + btScalar m12 = (relaxation / (im1 + im2)); +#ifdef USE_STRAIN_RATE_LIMITING // apply strain limiting to prevent the new velocity to change the current length of the edge by more than 1%. btScalar p = 0.01; btVector3& x0 = face->m_n[0]->m_x; btVector3& x1 = face->m_n[1]->m_x; btVector3& x2 = face->m_n[2]->m_x; - const btVector3 x_diff[3] = {x1-x0, x2-x0, x2-x1}; - const btVector3 v_diff[3] = {v1-v0, v2-v0, v2-v1}; + const btVector3 x_diff[3] = {x1 - x0, x2 - x0, x2 - x1}; + const btVector3 v_diff[3] = {v1 - v0, v2 - v0, v2 - v1}; btVector3 u[3]; btScalar x_diff_dot_u, dn[3]; btScalar dt = m_infoGlobal->m_timeStep; @@ -404,172 +520,201 @@ void btDeformableFaceRigidContactConstraint::applyImpulse(const btVector3& impul { btScalar x_diff_norm = x_diff[i].safeNorm(); btScalar x_diff_norm_new = (x_diff[i] + v_diff[i] * dt).safeNorm(); - btScalar strainRate = x_diff_norm_new/x_diff_norm; + btScalar strainRate = x_diff_norm_new / x_diff_norm; u[i] = v_diff[i]; u[i].safeNormalize(); - if (x_diff_norm == 0 || (1-p <= strainRate && strainRate <= 1+p)) + if (x_diff_norm == 0 || (1 - p <= strainRate && strainRate <= 1 + p)) { dn[i] = 0; continue; } x_diff_dot_u = btDot(x_diff[i], u[i]); btScalar s; - if (1-p > strainRate) + if (1 - p > strainRate) { - s = 1/dt * (-x_diff_dot_u - btSqrt(x_diff_dot_u*x_diff_dot_u + (p*p-2*p) * x_diff_norm * x_diff_norm)); + s = 1 / dt * (-x_diff_dot_u - btSqrt(x_diff_dot_u * x_diff_dot_u + (p * p - 2 * p) * x_diff_norm * x_diff_norm)); } else { - s = 1/dt * (-x_diff_dot_u + btSqrt(x_diff_dot_u*x_diff_dot_u + (p*p+2*p) * x_diff_norm * x_diff_norm)); + s = 1 / dt * (-x_diff_dot_u + btSqrt(x_diff_dot_u * x_diff_dot_u + (p * p + 2 * p) * x_diff_norm * x_diff_norm)); } // x_diff_norm_new = (x_diff[i] + s * u[i] * dt).safeNorm(); // strainRate = x_diff_norm_new/x_diff_norm; dn[i] = s - v_diff[i].safeNorm(); } - btVector3 dv0 = im0 * (m01 * u[0]*(-dn[0]) + m02 * u[1]*-(dn[1])); - btVector3 dv1 = im1 * (m01 * u[0]*(dn[0]) + m12 * u[2]*(-dn[2])); - btVector3 dv2 = im2 * (m12 * u[2]*(dn[2]) + m02 * u[1]*(dn[1])); - #else + btVector3 dv0 = im0 * (m01 * u[0] * (-dn[0]) + m02 * u[1] * -(dn[1])); + btVector3 dv1 = im1 * (m01 * u[0] * (dn[0]) + m12 * u[2] * (-dn[2])); + btVector3 dv2 = im2 * (m12 * u[2] * (dn[2]) + m02 * u[1] * (dn[1])); +#else // apply strain limiting to prevent undamped modes - btVector3 dv0 = im0 * (m01 * (v1-v0) + m02 * (v2-v0)); - btVector3 dv1 = im1 * (m01 * (v0-v1) + m12 * (v2-v1)); - btVector3 dv2 = im2 * (m12 * (v1-v2) + m02 * (v0-v2)); - #endif + btVector3 dv0 = im0 * (m01 * (v1 - v0) + m02 * (v2 - v0)); + btVector3 dv1 = im1 * (m01 * (v0 - v1) + m12 * (v2 - v1)); + btVector3 dv2 = im2 * (m12 * (v1 - v2) + m02 * (v0 - v2)); +#endif v0 += dv0; v1 += dv1; v2 += dv2; } } +btVector3 btDeformableFaceRigidContactConstraint::getSplitVb() const +{ + const btSoftBody::DeformableFaceRigidContact* contact = getContact(); + btVector3 vb = (m_face->m_n[0]->m_splitv) * contact->m_bary[0] + (m_face->m_n[1]->m_splitv) * contact->m_bary[1] + (m_face->m_n[2]->m_splitv) * contact->m_bary[2]; + return vb; +} + +void btDeformableFaceRigidContactConstraint::applySplitImpulse(const btVector3& impulse) +{ + const btSoftBody::DeformableFaceRigidContact* contact = getContact(); + btVector3 dv = impulse * contact->m_c2; + btSoftBody::Face* face = contact->m_face; + btVector3& v0 = face->m_n[0]->m_splitv; + btVector3& v1 = face->m_n[1]->m_splitv; + btVector3& v2 = face->m_n[2]->m_splitv; + const btScalar& im0 = face->m_n[0]->m_im; + const btScalar& im1 = face->m_n[1]->m_im; + const btScalar& im2 = face->m_n[2]->m_im; + if (im0 > 0) + { + v0 -= dv * contact->m_weights[0]; + } + if (im1 > 0) + { + v1 -= dv * contact->m_weights[1]; + } + if (im2 > 0) + { + v2 -= dv * contact->m_weights[2]; + } +} + /* ================ Face vs. Node =================== */ btDeformableFaceNodeContactConstraint::btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal) -: m_node(contact.m_node) -, m_face(contact.m_face) -, m_contact(&contact) -, btDeformableContactConstraint(contact.m_normal, infoGlobal) + : m_node(contact.m_node), m_face(contact.m_face), m_contact(&contact), btDeformableContactConstraint(contact.m_normal, infoGlobal) { - m_total_normal_dv.setZero(); - m_total_tangent_dv.setZero(); + m_total_normal_dv.setZero(); + m_total_tangent_dv.setZero(); } btVector3 btDeformableFaceNodeContactConstraint::getVa() const { - return m_node->m_v; + return m_node->m_v; } btVector3 btDeformableFaceNodeContactConstraint::getVb() const { - const btSoftBody::DeformableFaceNodeContact* contact = getContact(); - btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2]; - return vb; + const btSoftBody::DeformableFaceNodeContact* contact = getContact(); + btVector3 vb = m_face->m_n[0]->m_v * contact->m_bary[0] + m_face->m_n[1]->m_v * contact->m_bary[1] + m_face->m_n[2]->m_v * contact->m_bary[2]; + return vb; } btVector3 btDeformableFaceNodeContactConstraint::getDv(const btSoftBody::Node* n) const { - btVector3 dv = m_total_normal_dv + m_total_tangent_dv; - if (n == m_node) - return dv; - const btSoftBody::DeformableFaceNodeContact* contact = getContact(); - if (m_face->m_n[0] == n) - { - return dv * contact->m_weights[0]; - } - if (m_face->m_n[1] == n) - { - return dv * contact->m_weights[1]; - } - btAssert(n == m_face->m_n[2]); - return dv * contact->m_weights[2]; + btVector3 dv = m_total_normal_dv + m_total_tangent_dv; + if (n == m_node) + return dv; + const btSoftBody::DeformableFaceNodeContact* contact = getContact(); + if (m_face->m_n[0] == n) + { + return dv * contact->m_weights[0]; + } + if (m_face->m_n[1] == n) + { + return dv * contact->m_weights[1]; + } + btAssert(n == m_face->m_n[2]); + return dv * contact->m_weights[2]; } btScalar btDeformableFaceNodeContactConstraint::solveConstraint(const btContactSolverInfo& infoGlobal) { - btVector3 va = getVa(); - btVector3 vb = getVb(); - btVector3 vr = vb - va; - const btScalar dn = btDot(vr, m_contact->m_normal); - // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt - btScalar residualSquare = dn*dn; - btVector3 impulse = m_contact->m_c0 * vr; - const btVector3 impulse_normal = m_contact->m_c0 * (m_contact->m_normal * dn); - btVector3 impulse_tangent = impulse - impulse_normal; - - btVector3 old_total_tangent_dv = m_total_tangent_dv; - // m_c2 is the inverse mass of the deformable node/face - if (m_node->m_im > 0) - { - m_total_normal_dv -= impulse_normal * m_node->m_im; - m_total_tangent_dv -= impulse_tangent * m_node->m_im; - } - else - { - m_total_normal_dv -= impulse_normal * m_contact->m_imf; - m_total_tangent_dv -= impulse_tangent * m_contact->m_imf; - } - - if (m_total_normal_dv.dot(m_contact->m_normal) > 0) - { - // separating in the normal direction - m_static = false; - m_total_tangent_dv = btVector3(0,0,0); - impulse_tangent.setZero(); - } - else - { - if (m_total_normal_dv.norm() * m_contact->m_friction < m_total_tangent_dv.norm()) - { - // dynamic friction - // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations. - m_static = false; - if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON) - { - m_total_tangent_dv = btVector3(0,0,0); - } - else - { - m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_friction; - } - impulse_tangent = -btScalar(1)/m_node->m_im * (m_total_tangent_dv - old_total_tangent_dv); - } - else - { - // static friction - m_static = true; - } - } - impulse = impulse_normal + impulse_tangent; - // apply impulse to deformable nodes involved and change their velocities - applyImpulse(impulse); - return residualSquare; + btVector3 va = getVa(); + btVector3 vb = getVb(); + btVector3 vr = vb - va; + const btScalar dn = btDot(vr, m_contact->m_normal); + // dn is the normal component of velocity diffrerence. Approximates the residual. // todo xuchenhan@: this prob needs to be scaled by dt + btScalar residualSquare = dn * dn; + btVector3 impulse = m_contact->m_c0 * vr; + const btVector3 impulse_normal = m_contact->m_c0 * (m_contact->m_normal * dn); + btVector3 impulse_tangent = impulse - impulse_normal; + + btVector3 old_total_tangent_dv = m_total_tangent_dv; + // m_c2 is the inverse mass of the deformable node/face + if (m_node->m_im > 0) + { + m_total_normal_dv -= impulse_normal * m_node->m_im; + m_total_tangent_dv -= impulse_tangent * m_node->m_im; + } + else + { + m_total_normal_dv -= impulse_normal * m_contact->m_imf; + m_total_tangent_dv -= impulse_tangent * m_contact->m_imf; + } + + if (m_total_normal_dv.dot(m_contact->m_normal) > 0) + { + // separating in the normal direction + m_static = false; + m_total_tangent_dv = btVector3(0, 0, 0); + impulse_tangent.setZero(); + } + else + { + if (m_total_normal_dv.norm() * m_contact->m_friction < m_total_tangent_dv.norm()) + { + // dynamic friction + // with dynamic friction, the impulse are still applied to the two objects colliding, however, it does not pose a constraint in the cg solve, hence the change to dv merely serves to update velocity in the contact iterations. + m_static = false; + if (m_total_tangent_dv.safeNorm() < SIMD_EPSILON) + { + m_total_tangent_dv = btVector3(0, 0, 0); + } + else + { + m_total_tangent_dv = m_total_tangent_dv.normalized() * m_total_normal_dv.safeNorm() * m_contact->m_friction; + } + impulse_tangent = -btScalar(1) / m_node->m_im * (m_total_tangent_dv - old_total_tangent_dv); + } + else + { + // static friction + m_static = true; + } + } + impulse = impulse_normal + impulse_tangent; + // apply impulse to deformable nodes involved and change their velocities + applyImpulse(impulse); + return residualSquare; } void btDeformableFaceNodeContactConstraint::applyImpulse(const btVector3& impulse) { - const btSoftBody::DeformableFaceNodeContact* contact = getContact(); - btVector3 dva = impulse * contact->m_node->m_im; - btVector3 dvb = impulse * contact->m_imf; - if (contact->m_node->m_im > 0) - { - contact->m_node->m_v += dva; - } - - btSoftBody::Face* face = contact->m_face; - btVector3& v0 = face->m_n[0]->m_v; - btVector3& v1 = face->m_n[1]->m_v; - btVector3& v2 = face->m_n[2]->m_v; - const btScalar& im0 = face->m_n[0]->m_im; - const btScalar& im1 = face->m_n[1]->m_im; - const btScalar& im2 = face->m_n[2]->m_im; - if (im0 > 0) - { - v0 -= dvb * contact->m_weights[0]; - } - if (im1 > 0) - { - v1 -= dvb * contact->m_weights[1]; - } - if (im2 > 0) - { - v2 -= dvb * contact->m_weights[2]; - } + const btSoftBody::DeformableFaceNodeContact* contact = getContact(); + btVector3 dva = impulse * contact->m_node->m_im; + btVector3 dvb = impulse * contact->m_imf; + if (contact->m_node->m_im > 0) + { + contact->m_node->m_v += dva; + } + + btSoftBody::Face* face = contact->m_face; + btVector3& v0 = face->m_n[0]->m_v; + btVector3& v1 = face->m_n[1]->m_v; + btVector3& v2 = face->m_n[2]->m_v; + const btScalar& im0 = face->m_n[0]->m_im; + const btScalar& im1 = face->m_n[1]->m_im; + const btScalar& im2 = face->m_n[2]->m_im; + if (im0 > 0) + { + v0 -= dvb * contact->m_weights[0]; + } + if (im1 > 0) + { + v1 -= dvb * contact->m_weights[1]; + } + if (im2 > 0) + { + v2 -= dvb * contact->m_weights[2]; + } } diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h index 9f9d5bf0a3..1e2c9f5bce 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactConstraint.h @@ -21,51 +21,49 @@ class btDeformableContactConstraint { public: - // True if the friction is static - // False if the friction is dynamic - bool m_static; + // True if the friction is static + // False if the friction is dynamic + bool m_static; const btContactSolverInfo* m_infoGlobal; // normal of the contact btVector3 m_normal; - btDeformableContactConstraint(const btVector3& normal, const btContactSolverInfo& infoGlobal): m_static(false), m_normal(normal), m_infoGlobal(&infoGlobal) + btDeformableContactConstraint(const btVector3& normal, const btContactSolverInfo& infoGlobal) : m_static(false), m_normal(normal), m_infoGlobal(&infoGlobal) { } - btDeformableContactConstraint(bool isStatic, const btVector3& normal, const btContactSolverInfo& infoGlobal): m_static(isStatic), m_normal(normal), m_infoGlobal(&infoGlobal) + btDeformableContactConstraint(bool isStatic, const btVector3& normal, const btContactSolverInfo& infoGlobal) : m_static(isStatic), m_normal(normal), m_infoGlobal(&infoGlobal) { } - - btDeformableContactConstraint(){} + + btDeformableContactConstraint() {} btDeformableContactConstraint(const btDeformableContactConstraint& other) - : m_static(other.m_static) - , m_normal(other.m_normal) - , m_infoGlobal(other.m_infoGlobal) + : m_static(other.m_static), m_normal(other.m_normal), m_infoGlobal(other.m_infoGlobal) { } - virtual ~btDeformableContactConstraint(){} - - // solve the constraint with inelastic impulse and return the error, which is the square of normal component of velocity diffrerence - // the constraint is solved by calculating the impulse between object A and B in the contact and apply the impulse to both objects involved in the contact - virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) = 0; - - // get the velocity of the object A in the contact - virtual btVector3 getVa() const = 0; - - // get the velocity of the object B in the contact - virtual btVector3 getVb() const = 0; - - // get the velocity change of the soft body node in the constraint - virtual btVector3 getDv(const btSoftBody::Node*) const = 0; - - // apply impulse to the soft body node and/or face involved - virtual void applyImpulse(const btVector3& impulse) = 0; - - // scale the penetration depth by erp - virtual void setPenetrationScale(btScalar scale) = 0; + virtual ~btDeformableContactConstraint() {} + + // solve the constraint with inelastic impulse and return the error, which is the square of normal component of velocity diffrerence + // the constraint is solved by calculating the impulse between object A and B in the contact and apply the impulse to both objects involved in the contact + virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) = 0; + + // get the velocity of the object A in the contact + virtual btVector3 getVa() const = 0; + + // get the velocity of the object B in the contact + virtual btVector3 getVb() const = 0; + + // get the velocity change of the soft body node in the constraint + virtual btVector3 getDv(const btSoftBody::Node*) const = 0; + + // apply impulse to the soft body node and/or face involved + virtual void applyImpulse(const btVector3& impulse) = 0; + + // scale the penetration depth by erp + virtual void setPenetrationScale(btScalar scale) = 0; }; // @@ -73,42 +71,41 @@ public: class btDeformableStaticConstraint : public btDeformableContactConstraint { public: - btSoftBody::Node* m_node; - - btDeformableStaticConstraint(btSoftBody::Node* node, const btContactSolverInfo& infoGlobal): m_node(node), btDeformableContactConstraint(false, btVector3(0,0,0), infoGlobal) - { - } - btDeformableStaticConstraint(){} - btDeformableStaticConstraint(const btDeformableStaticConstraint& other) - : m_node(other.m_node) - , btDeformableContactConstraint(other) - { - } - - virtual ~btDeformableStaticConstraint(){} - - virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) - { - return 0; - } - - virtual btVector3 getVa() const - { - return btVector3(0,0,0); - } - - virtual btVector3 getVb() const - { - return btVector3(0,0,0); - } - - virtual btVector3 getDv(const btSoftBody::Node* n) const - { - return btVector3(0,0,0); - } - - virtual void applyImpulse(const btVector3& impulse){} - virtual void setPenetrationScale(btScalar scale){} + btSoftBody::Node* m_node; + + btDeformableStaticConstraint(btSoftBody::Node* node, const btContactSolverInfo& infoGlobal) : m_node(node), btDeformableContactConstraint(false, btVector3(0, 0, 0), infoGlobal) + { + } + btDeformableStaticConstraint() {} + btDeformableStaticConstraint(const btDeformableStaticConstraint& other) + : m_node(other.m_node), btDeformableContactConstraint(other) + { + } + + virtual ~btDeformableStaticConstraint() {} + + virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal) + { + return 0; + } + + virtual btVector3 getVa() const + { + return btVector3(0, 0, 0); + } + + virtual btVector3 getVb() const + { + return btVector3(0, 0, 0); + } + + virtual btVector3 getDv(const btSoftBody::Node* n) const + { + return btVector3(0, 0, 0); + } + + virtual void applyImpulse(const btVector3& impulse) {} + virtual void setPenetrationScale(btScalar scale) {} }; // @@ -116,56 +113,67 @@ public: class btDeformableNodeAnchorConstraint : public btDeformableContactConstraint { public: - const btSoftBody::DeformableNodeRigidAnchor* m_anchor; - - btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& c, const btContactSolverInfo& infoGlobal); - btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other); - btDeformableNodeAnchorConstraint(){} - virtual ~btDeformableNodeAnchorConstraint() - { - } - virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal); - - // object A is the rigid/multi body, and object B is the deformable node/face - virtual btVector3 getVa() const; - // get the velocity of the deformable node in contact - virtual btVector3 getVb() const; - virtual btVector3 getDv(const btSoftBody::Node* n) const - { - return btVector3(0,0,0); - } - virtual void applyImpulse(const btVector3& impulse); - - virtual void setPenetrationScale(btScalar scale){} -}; + const btSoftBody::DeformableNodeRigidAnchor* m_anchor; + btDeformableNodeAnchorConstraint(const btSoftBody::DeformableNodeRigidAnchor& c, const btContactSolverInfo& infoGlobal); + btDeformableNodeAnchorConstraint(const btDeformableNodeAnchorConstraint& other); + btDeformableNodeAnchorConstraint() {} + virtual ~btDeformableNodeAnchorConstraint() + { + } + virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal); + + // object A is the rigid/multi body, and object B is the deformable node/face + virtual btVector3 getVa() const; + // get the velocity of the deformable node in contact + virtual btVector3 getVb() const; + virtual btVector3 getDv(const btSoftBody::Node* n) const + { + return btVector3(0, 0, 0); + } + virtual void applyImpulse(const btVector3& impulse); + + virtual void setPenetrationScale(btScalar scale) {} +}; // // Constraint between rigid/multi body and deformable objects class btDeformableRigidContactConstraint : public btDeformableContactConstraint { public: - btVector3 m_total_normal_dv; - btVector3 m_total_tangent_dv; - btScalar m_penetration; - const btSoftBody::DeformableRigidContact* m_contact; - - btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal); - btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other); - btDeformableRigidContactConstraint(){} - virtual ~btDeformableRigidContactConstraint() - { - } - - // object A is the rigid/multi body, and object B is the deformable node/face - virtual btVector3 getVa() const; - - virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal); - - virtual void setPenetrationScale(btScalar scale) - { - m_penetration *= scale; - } + btVector3 m_total_normal_dv; + btVector3 m_total_tangent_dv; + btScalar m_penetration; + btScalar m_total_split_impulse; + bool m_binding; + const btSoftBody::DeformableRigidContact* m_contact; + + btDeformableRigidContactConstraint(const btSoftBody::DeformableRigidContact& c, const btContactSolverInfo& infoGlobal); + btDeformableRigidContactConstraint(const btDeformableRigidContactConstraint& other); + btDeformableRigidContactConstraint() {} + virtual ~btDeformableRigidContactConstraint() + { + } + + // object A is the rigid/multi body, and object B is the deformable node/face + virtual btVector3 getVa() const; + + // get the split impulse velocity of the deformable face at the contact point + virtual btVector3 getSplitVb() const = 0; + + // get the split impulse velocity of the rigid/multibdoy at the contaft + virtual btVector3 getSplitVa() const; + + virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal); + + virtual void setPenetrationScale(btScalar scale) + { + m_penetration *= scale; + } + + btScalar solveSplitImpulse(const btContactSolverInfo& infoGlobal); + + virtual void applySplitImpulse(const btVector3& impulse) = 0; }; // @@ -173,29 +181,34 @@ public: class btDeformableNodeRigidContactConstraint : public btDeformableRigidContactConstraint { public: - // the deformable node in contact - btSoftBody::Node* m_node; - - btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal); - btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other); - btDeformableNodeRigidContactConstraint(){} - virtual ~btDeformableNodeRigidContactConstraint() - { - } - - // get the velocity of the deformable node in contact - virtual btVector3 getVb() const; - - // get the velocity change of the input soft body node in the constraint - virtual btVector3 getDv(const btSoftBody::Node*) const; - - // cast the contact to the desired type - const btSoftBody::DeformableNodeRigidContact* getContact() const - { - return static_cast<const btSoftBody::DeformableNodeRigidContact*>(m_contact); - } - - virtual void applyImpulse(const btVector3& impulse); + // the deformable node in contact + btSoftBody::Node* m_node; + + btDeformableNodeRigidContactConstraint(const btSoftBody::DeformableNodeRigidContact& contact, const btContactSolverInfo& infoGlobal); + btDeformableNodeRigidContactConstraint(const btDeformableNodeRigidContactConstraint& other); + btDeformableNodeRigidContactConstraint() {} + virtual ~btDeformableNodeRigidContactConstraint() + { + } + + // get the velocity of the deformable node in contact + virtual btVector3 getVb() const; + + // get the split impulse velocity of the deformable face at the contact point + virtual btVector3 getSplitVb() const; + + // get the velocity change of the input soft body node in the constraint + virtual btVector3 getDv(const btSoftBody::Node*) const; + + // cast the contact to the desired type + const btSoftBody::DeformableNodeRigidContact* getContact() const + { + return static_cast<const btSoftBody::DeformableNodeRigidContact*>(m_contact); + } + + virtual void applyImpulse(const btVector3& impulse); + + virtual void applySplitImpulse(const btVector3& impulse); }; // @@ -203,28 +216,33 @@ public: class btDeformableFaceRigidContactConstraint : public btDeformableRigidContactConstraint { public: - const btSoftBody::Face* m_face; - bool m_useStrainLimiting; - btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting); - btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other); - btDeformableFaceRigidContactConstraint(): m_useStrainLimiting(false) {} - virtual ~btDeformableFaceRigidContactConstraint() - { - } - - // get the velocity of the deformable face at the contact point - virtual btVector3 getVb() const; - - // get the velocity change of the input soft body node in the constraint - virtual btVector3 getDv(const btSoftBody::Node*) const; - - // cast the contact to the desired type - const btSoftBody::DeformableFaceRigidContact* getContact() const - { - return static_cast<const btSoftBody::DeformableFaceRigidContact*>(m_contact); - } - - virtual void applyImpulse(const btVector3& impulse); + btSoftBody::Face* m_face; + bool m_useStrainLimiting; + btDeformableFaceRigidContactConstraint(const btSoftBody::DeformableFaceRigidContact& contact, const btContactSolverInfo& infoGlobal, bool useStrainLimiting); + btDeformableFaceRigidContactConstraint(const btDeformableFaceRigidContactConstraint& other); + btDeformableFaceRigidContactConstraint() : m_useStrainLimiting(false) {} + virtual ~btDeformableFaceRigidContactConstraint() + { + } + + // get the velocity of the deformable face at the contact point + virtual btVector3 getVb() const; + + // get the split impulse velocity of the deformable face at the contact point + virtual btVector3 getSplitVb() const; + + // get the velocity change of the input soft body node in the constraint + virtual btVector3 getDv(const btSoftBody::Node*) const; + + // cast the contact to the desired type + const btSoftBody::DeformableFaceRigidContact* getContact() const + { + return static_cast<const btSoftBody::DeformableFaceRigidContact*>(m_contact); + } + + virtual void applyImpulse(const btVector3& impulse); + + virtual void applySplitImpulse(const btVector3& impulse); }; // @@ -232,35 +250,35 @@ public: class btDeformableFaceNodeContactConstraint : public btDeformableContactConstraint { public: - btSoftBody::Node* m_node; - btSoftBody::Face* m_face; - const btSoftBody::DeformableFaceNodeContact* m_contact; - btVector3 m_total_normal_dv; - btVector3 m_total_tangent_dv; - - btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal); - btDeformableFaceNodeContactConstraint(){} - virtual ~btDeformableFaceNodeContactConstraint(){} - - virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal); - - // get the velocity of the object A in the contact - virtual btVector3 getVa() const; - - // get the velocity of the object B in the contact - virtual btVector3 getVb() const; - - // get the velocity change of the input soft body node in the constraint - virtual btVector3 getDv(const btSoftBody::Node*) const; - - // cast the contact to the desired type - const btSoftBody::DeformableFaceNodeContact* getContact() const - { - return static_cast<const btSoftBody::DeformableFaceNodeContact*>(m_contact); - } - - virtual void applyImpulse(const btVector3& impulse); - - virtual void setPenetrationScale(btScalar scale){} + btSoftBody::Node* m_node; + btSoftBody::Face* m_face; + const btSoftBody::DeformableFaceNodeContact* m_contact; + btVector3 m_total_normal_dv; + btVector3 m_total_tangent_dv; + + btDeformableFaceNodeContactConstraint(const btSoftBody::DeformableFaceNodeContact& contact, const btContactSolverInfo& infoGlobal); + btDeformableFaceNodeContactConstraint() {} + virtual ~btDeformableFaceNodeContactConstraint() {} + + virtual btScalar solveConstraint(const btContactSolverInfo& infoGlobal); + + // get the velocity of the object A in the contact + virtual btVector3 getVa() const; + + // get the velocity of the object B in the contact + virtual btVector3 getVb() const; + + // get the velocity change of the input soft body node in the constraint + virtual btVector3 getDv(const btSoftBody::Node*) const; + + // cast the contact to the desired type + const btSoftBody::DeformableFaceNodeContact* getContact() const + { + return static_cast<const btSoftBody::DeformableFaceNodeContact*>(m_contact); + } + + virtual void applyImpulse(const btVector3& impulse); + + virtual void setPenetrationScale(btScalar scale) {} }; #endif /* BT_DEFORMABLE_CONTACT_CONSTRAINT_H */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp index 22ca8bf582..7f67260ce6 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp +++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.cpp @@ -17,7 +17,7 @@ #include "btDeformableMultiBodyDynamicsWorld.h" #include <algorithm> #include <cmath> -btScalar btDeformableContactProjection::update(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal) +btScalar btDeformableContactProjection::update(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal) { btScalar residualSquare = 0; for (int i = 0; i < numDeformableBodies; ++i) @@ -58,27 +58,37 @@ btScalar btDeformableContactProjection::update(btCollisionObject** deformableBod return residualSquare; } -void btDeformableContactProjection::splitImpulseSetup(const btContactSolverInfo& infoGlobal) +btScalar btDeformableContactProjection::solveSplitImpulse(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal) { - for (int i = 0; i < m_softBodies.size(); ++i) + btScalar residualSquare = 0; + for (int i = 0; i < numDeformableBodies; ++i) { - // node constraints - for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) - { - btDeformableNodeRigidContactConstraint& constraint = m_nodeRigidConstraints[i][j]; - constraint.setPenetrationScale(infoGlobal.m_deformable_erp); - } - // face constraints - for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) + for (int j = 0; j < m_softBodies.size(); ++j) { - btDeformableFaceRigidContactConstraint& constraint = m_faceRigidConstraints[i][j]; - constraint.setPenetrationScale(infoGlobal.m_deformable_erp); + btCollisionObject* psb = m_softBodies[j]; + if (psb != deformableBodies[i]) + { + continue; + } + for (int k = 0; k < m_nodeRigidConstraints[j].size(); ++k) + { + btDeformableNodeRigidContactConstraint& constraint = m_nodeRigidConstraints[j][k]; + btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal); + residualSquare = btMax(residualSquare, localResidualSquare); + } + for (int k = 0; k < m_faceRigidConstraints[j].size(); ++k) + { + btDeformableFaceRigidContactConstraint& constraint = m_faceRigidConstraints[j][k]; + btScalar localResidualSquare = constraint.solveSplitImpulse(infoGlobal); + residualSquare = btMax(residualSquare, localResidualSquare); + } } } + return residualSquare; } void btDeformableContactProjection::setConstraints(const btContactSolverInfo& infoGlobal) -{ +{ BT_PROFILE("setConstraints"); for (int i = 0; i < m_softBodies.size(); ++i) { @@ -97,7 +107,7 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in m_staticConstraints[i].push_back(static_constraint); } } - + // set up deformable anchors for (int j = 0; j < psb->m_deformableAnchors.size(); ++j) { @@ -111,7 +121,7 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in btDeformableNodeAnchorConstraint constraint(anchor, infoGlobal); m_nodeAnchorConstraints[i].push_back(constraint); } - + // set Deformable Node vs. Rigid constraint for (int j = 0; j < psb->m_nodeRigidContacts.size(); ++j) { @@ -122,17 +132,9 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in continue; } btDeformableNodeRigidContactConstraint constraint(contact, infoGlobal); - btVector3 va = constraint.getVa(); - btVector3 vb = constraint.getVb(); - const btVector3 vr = vb - va; - const btSoftBody::sCti& cti = contact.m_cti; - const btScalar dn = btDot(vr, cti.m_normal); - if (dn < SIMD_EPSILON) - { - m_nodeRigidConstraints[i].push_back(constraint); - } + m_nodeRigidConstraints[i].push_back(constraint); } - + // set Deformable Face vs. Rigid constraint for (int j = 0; j < psb->m_faceRigidContacts.size(); ++j) { @@ -143,15 +145,7 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in continue; } btDeformableFaceRigidContactConstraint constraint(contact, infoGlobal, m_useStrainLimiting); - btVector3 va = constraint.getVa(); - btVector3 vb = constraint.getVb(); - const btVector3 vr = vb - va; - const btSoftBody::sCti& cti = contact.m_cti; - const btScalar dn = btDot(vr, cti.m_normal); - if (dn < SIMD_EPSILON) - { - m_faceRigidConstraints[i].push_back(constraint); - } + m_faceRigidConstraints[i].push_back(constraint); } } } @@ -159,267 +153,269 @@ void btDeformableContactProjection::setConstraints(const btContactSolverInfo& in void btDeformableContactProjection::project(TVStack& x) { #ifndef USE_MGS - const int dim = 3; - for (int index = 0; index < m_projectionsDict.size(); ++index) - { - btAlignedObjectArray<btVector3>& projectionDirs = *m_projectionsDict.getAtIndex(index); - size_t i = m_projectionsDict.getKeyAtIndex(index).getUid1(); - if (projectionDirs.size() >= dim) - { - // static node - x[i].setZero(); - continue; - } - else if (projectionDirs.size() == 2) - { - btVector3 dir0 = projectionDirs[0]; - btVector3 dir1 = projectionDirs[1]; - btVector3 free_dir = btCross(dir0, dir1); - if (free_dir.safeNorm() < SIMD_EPSILON) - { - x[i] -= x[i].dot(dir0) * dir0; - x[i] -= x[i].dot(dir1) * dir1; - } - else - { - free_dir.normalize(); - x[i] = x[i].dot(free_dir) * free_dir; - } - } - else - { - btAssert(projectionDirs.size() == 1); - btVector3 dir0 = projectionDirs[0]; - x[i] -= x[i].dot(dir0) * dir0; - } - } + const int dim = 3; + for (int index = 0; index < m_projectionsDict.size(); ++index) + { + btAlignedObjectArray<btVector3>& projectionDirs = *m_projectionsDict.getAtIndex(index); + size_t i = m_projectionsDict.getKeyAtIndex(index).getUid1(); + if (projectionDirs.size() >= dim) + { + // static node + x[i].setZero(); + continue; + } + else if (projectionDirs.size() == 2) + { + btVector3 dir0 = projectionDirs[0]; + btVector3 dir1 = projectionDirs[1]; + btVector3 free_dir = btCross(dir0, dir1); + if (free_dir.safeNorm() < SIMD_EPSILON) + { + x[i] -= x[i].dot(dir0) * dir0; + } + else + { + free_dir.normalize(); + x[i] = x[i].dot(free_dir) * free_dir; + } + } + else + { + btAssert(projectionDirs.size() == 1); + btVector3 dir0 = projectionDirs[0]; + x[i] -= x[i].dot(dir0) * dir0; + } + } #else - btReducedVector p(x.size()); - for (int i = 0; i < m_projections.size(); ++i) - { - p += (m_projections[i].dot(x) * m_projections[i]); - } - for (int i = 0; i < p.m_indices.size(); ++i) - { - x[p.m_indices[i]] -= p.m_vecs[i]; - } + btReducedVector p(x.size()); + for (int i = 0; i < m_projections.size(); ++i) + { + p += (m_projections[i].dot(x) * m_projections[i]); + } + for (int i = 0; i < p.m_indices.size(); ++i) + { + x[p.m_indices[i]] -= p.m_vecs[i]; + } #endif } void btDeformableContactProjection::setProjection() { #ifndef USE_MGS - BT_PROFILE("btDeformableContactProjection::setProjection"); - btAlignedObjectArray<btVector3> units; - units.push_back(btVector3(1,0,0)); - units.push_back(btVector3(0,1,0)); - units.push_back(btVector3(0,0,1)); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < m_staticConstraints[i].size(); ++j) - { - int index = m_staticConstraints[i][j].m_node->index; - m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY; - if (m_projectionsDict.find(index) == NULL) - { - m_projectionsDict.insert(index, units); - } - else - { - btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; - for (int k = 0; k < 3; ++k) - { - projections.push_back(units[k]); - } - } - } - for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j) - { - int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index; - m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY; - if (m_projectionsDict.find(index) == NULL) - { - m_projectionsDict.insert(index, units); - } - else - { - btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; - for (int k = 0; k < 3; ++k) - { - projections.push_back(units[k]); - } - } - } - for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) - { - int index = m_nodeRigidConstraints[i][j].m_node->index; - m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset; - if (m_nodeRigidConstraints[i][j].m_static) - { - if (m_projectionsDict.find(index) == NULL) - { - m_projectionsDict.insert(index, units); - } - else - { - btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; - for (int k = 0; k < 3; ++k) - { - projections.push_back(units[k]); - } - } - } - else - { - if (m_projectionsDict.find(index) == NULL) - { - btAlignedObjectArray<btVector3> projections; - projections.push_back(m_nodeRigidConstraints[i][j].m_normal); - m_projectionsDict.insert(index, projections); - } - else - { - btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; - projections.push_back(m_nodeRigidConstraints[i][j].m_normal); - } - } - } - for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) - { - const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face; - btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset; - for (int k = 0; k < 3; ++k) - { - face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration); - } - for (int k = 0; k < 3; ++k) - { - btSoftBody::Node* node = face->m_n[k]; - node->m_penetration = true; - int index = node->index; - if (m_faceRigidConstraints[i][j].m_static) - { - if (m_projectionsDict.find(index) == NULL) - { - m_projectionsDict.insert(index, units); - } - else - { - btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; - for (int k = 0; k < 3; ++k) - { - projections.push_back(units[k]); - } - } - } - else - { - if (m_projectionsDict.find(index) == NULL) - { - btAlignedObjectArray<btVector3> projections; - projections.push_back(m_faceRigidConstraints[i][j].m_normal); - m_projectionsDict.insert(index, projections); - } - else - { - btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; - projections.push_back(m_faceRigidConstraints[i][j].m_normal); - } - } - } - } - } + BT_PROFILE("btDeformableContactProjection::setProjection"); + btAlignedObjectArray<btVector3> units; + units.push_back(btVector3(1, 0, 0)); + units.push_back(btVector3(0, 1, 0)); + units.push_back(btVector3(0, 0, 1)); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < m_staticConstraints[i].size(); ++j) + { + int index = m_staticConstraints[i][j].m_node->index; + m_staticConstraints[i][j].m_node->m_constrained = true; + if (m_projectionsDict.find(index) == NULL) + { + m_projectionsDict.insert(index, units); + } + else + { + btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; + for (int k = 0; k < 3; ++k) + { + projections.push_back(units[k]); + } + } + } + for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j) + { + int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index; + m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_constrained = true; + if (m_projectionsDict.find(index) == NULL) + { + m_projectionsDict.insert(index, units); + } + else + { + btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; + for (int k = 0; k < 3; ++k) + { + projections.push_back(units[k]); + } + } + } + for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) + { + int index = m_nodeRigidConstraints[i][j].m_node->index; + m_nodeRigidConstraints[i][j].m_node->m_constrained = true; + if (m_nodeRigidConstraints[i][j].m_binding) + { + if (m_nodeRigidConstraints[i][j].m_static) + { + if (m_projectionsDict.find(index) == NULL) + { + m_projectionsDict.insert(index, units); + } + else + { + btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; + for (int k = 0; k < 3; ++k) + { + projections.push_back(units[k]); + } + } + } + else + { + if (m_projectionsDict.find(index) == NULL) + { + btAlignedObjectArray<btVector3> projections; + projections.push_back(m_nodeRigidConstraints[i][j].m_normal); + m_projectionsDict.insert(index, projections); + } + else + { + btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; + projections.push_back(m_nodeRigidConstraints[i][j].m_normal); + } + } + } + } + for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) + { + const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face; + if (m_faceRigidConstraints[i][j].m_binding) + { + for (int k = 0; k < 3; ++k) + { + face->m_n[k]->m_constrained = true; + } + } + for (int k = 0; k < 3; ++k) + { + btSoftBody::Node* node = face->m_n[k]; + int index = node->index; + if (m_faceRigidConstraints[i][j].m_static) + { + if (m_projectionsDict.find(index) == NULL) + { + m_projectionsDict.insert(index, units); + } + else + { + btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; + for (int l = 0; l < 3; ++l) + { + projections.push_back(units[l]); + } + } + } + else + { + if (m_projectionsDict.find(index) == NULL) + { + btAlignedObjectArray<btVector3> projections; + projections.push_back(m_faceRigidConstraints[i][j].m_normal); + m_projectionsDict.insert(index, projections); + } + else + { + btAlignedObjectArray<btVector3>& projections = *m_projectionsDict[index]; + projections.push_back(m_faceRigidConstraints[i][j].m_normal); + } + } + } + } + } #else - int dof = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - dof += m_softBodies[i]->m_nodes.size(); - } - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < m_staticConstraints[i].size(); ++j) - { - int index = m_staticConstraints[i][j].m_node->index; - m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY; - btAlignedObjectArray<int> indices; - btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3; - indices.push_back(index); - vecs1.push_back(btVector3(1,0,0)); - vecs2.push_back(btVector3(0,1,0)); - vecs3.push_back(btVector3(0,0,1)); - m_projections.push_back(btReducedVector(dof, indices, vecs1)); - m_projections.push_back(btReducedVector(dof, indices, vecs2)); - m_projections.push_back(btReducedVector(dof, indices, vecs3)); - } - - for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j) - { - int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index; - m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY; - btAlignedObjectArray<int> indices; - btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3; - indices.push_back(index); - vecs1.push_back(btVector3(1,0,0)); - vecs2.push_back(btVector3(0,1,0)); - vecs3.push_back(btVector3(0,0,1)); - m_projections.push_back(btReducedVector(dof, indices, vecs1)); - m_projections.push_back(btReducedVector(dof, indices, vecs2)); - m_projections.push_back(btReducedVector(dof, indices, vecs3)); - } - for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) - { - int index = m_nodeRigidConstraints[i][j].m_node->index; - m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset; - btAlignedObjectArray<int> indices; - indices.push_back(index); - btAlignedObjectArray<btVector3> vecs1,vecs2,vecs3; - if (m_nodeRigidConstraints[i][j].m_static) - { - vecs1.push_back(btVector3(1,0,0)); - vecs2.push_back(btVector3(0,1,0)); - vecs3.push_back(btVector3(0,0,1)); - m_projections.push_back(btReducedVector(dof, indices, vecs1)); - m_projections.push_back(btReducedVector(dof, indices, vecs2)); - m_projections.push_back(btReducedVector(dof, indices, vecs3)); - } - else - { - vecs1.push_back(m_nodeRigidConstraints[i][j].m_normal); - m_projections.push_back(btReducedVector(dof, indices, vecs1)); - } - } - for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) - { - const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face; + int dof = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + dof += m_softBodies[i]->m_nodes.size(); + } + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < m_staticConstraints[i].size(); ++j) + { + int index = m_staticConstraints[i][j].m_node->index; + m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY; + btAlignedObjectArray<int> indices; + btAlignedObjectArray<btVector3> vecs1, vecs2, vecs3; + indices.push_back(index); + vecs1.push_back(btVector3(1, 0, 0)); + vecs2.push_back(btVector3(0, 1, 0)); + vecs3.push_back(btVector3(0, 0, 1)); + m_projections.push_back(btReducedVector(dof, indices, vecs1)); + m_projections.push_back(btReducedVector(dof, indices, vecs2)); + m_projections.push_back(btReducedVector(dof, indices, vecs3)); + } + + for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j) + { + int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index; + m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY; + btAlignedObjectArray<int> indices; + btAlignedObjectArray<btVector3> vecs1, vecs2, vecs3; + indices.push_back(index); + vecs1.push_back(btVector3(1, 0, 0)); + vecs2.push_back(btVector3(0, 1, 0)); + vecs3.push_back(btVector3(0, 0, 1)); + m_projections.push_back(btReducedVector(dof, indices, vecs1)); + m_projections.push_back(btReducedVector(dof, indices, vecs2)); + m_projections.push_back(btReducedVector(dof, indices, vecs3)); + } + for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) + { + int index = m_nodeRigidConstraints[i][j].m_node->index; + m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset; + btAlignedObjectArray<int> indices; + indices.push_back(index); + btAlignedObjectArray<btVector3> vecs1, vecs2, vecs3; + if (m_nodeRigidConstraints[i][j].m_static) + { + vecs1.push_back(btVector3(1, 0, 0)); + vecs2.push_back(btVector3(0, 1, 0)); + vecs3.push_back(btVector3(0, 0, 1)); + m_projections.push_back(btReducedVector(dof, indices, vecs1)); + m_projections.push_back(btReducedVector(dof, indices, vecs2)); + m_projections.push_back(btReducedVector(dof, indices, vecs3)); + } + else + { + vecs1.push_back(m_nodeRigidConstraints[i][j].m_normal); + m_projections.push_back(btReducedVector(dof, indices, vecs1)); + } + } + for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) + { + const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face; btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary; - btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset; - for (int k = 0; k < 3; ++k) - { - face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration); - } + btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset; + for (int k = 0; k < 3; ++k) + { + face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration); + } if (m_faceRigidConstraints[i][j].m_static) { for (int l = 0; l < 3; ++l) { - btReducedVector rv(dof); for (int k = 0; k < 3; ++k) { rv.m_indices.push_back(face->m_n[k]->index); - btVector3 v(0,0,0); + btVector3 v(0, 0, 0); v[l] = bary[k]; rv.m_vecs.push_back(v); - rv.sort(); + rv.sort(); } m_projections.push_back(rv); } @@ -431,121 +427,134 @@ void btDeformableContactProjection::setProjection() { rv.m_indices.push_back(face->m_n[k]->index); rv.m_vecs.push_back(bary[k] * m_faceRigidConstraints[i][j].m_normal); - rv.sort(); + rv.sort(); } m_projections.push_back(rv); } } - } - btModifiedGramSchmidt<btReducedVector> mgs(m_projections); - mgs.solve(); - m_projections = mgs.m_out; + } + btModifiedGramSchmidt<btReducedVector> mgs(m_projections); + mgs.solve(); + m_projections = mgs.m_out; #endif } void btDeformableContactProjection::checkConstraints(const TVStack& x) { - for (int i = 0; i < m_lagrangeMultipliers.size(); ++i) - { - btVector3 d(0,0,0); - const LagrangeMultiplier& lm = m_lagrangeMultipliers[i]; - for (int j = 0; j < lm.m_num_constraints; ++j) - { - for (int k = 0; k < lm.m_num_nodes; ++k) - { - d[j] += lm.m_weights[k] * x[lm.m_indices[k]].dot(lm.m_dirs[j]); - } - } - printf("d = %f, %f, %f\n",d[0],d[1],d[2]); - } + for (int i = 0; i < m_lagrangeMultipliers.size(); ++i) + { + btVector3 d(0, 0, 0); + const LagrangeMultiplier& lm = m_lagrangeMultipliers[i]; + for (int j = 0; j < lm.m_num_constraints; ++j) + { + for (int k = 0; k < lm.m_num_nodes; ++k) + { + d[j] += lm.m_weights[k] * x[lm.m_indices[k]].dot(lm.m_dirs[j]); + } + } + // printf("d = %f, %f, %f\n", d[0], d[1], d[2]); + // printf("val = %f, %f, %f\n", lm.m_vals[0], lm.m_vals[1], lm.m_vals[2]); + } } void btDeformableContactProjection::setLagrangeMultiplier() { - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < m_staticConstraints[i].size(); ++j) - { - int index = m_staticConstraints[i][j].m_node->index; - m_staticConstraints[i][j].m_node->m_penetration = SIMD_INFINITY; - LagrangeMultiplier lm; - lm.m_num_nodes = 1; - lm.m_indices[0] = index; - lm.m_weights[0] = 1.0; - lm.m_num_constraints = 3; - lm.m_dirs[0] = btVector3(1,0,0); - lm.m_dirs[1] = btVector3(0,1,0); - lm.m_dirs[2] = btVector3(0,0,1); - m_lagrangeMultipliers.push_back(lm); - } - for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j) - { - int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index; - m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_penetration = SIMD_INFINITY; - LagrangeMultiplier lm; - lm.m_num_nodes = 1; - lm.m_indices[0] = index; - lm.m_weights[0] = 1.0; - lm.m_num_constraints = 3; - lm.m_dirs[0] = btVector3(1,0,0); - lm.m_dirs[1] = btVector3(0,1,0); - lm.m_dirs[2] = btVector3(0,0,1); - m_lagrangeMultipliers.push_back(lm); - } - for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) - { - int index = m_nodeRigidConstraints[i][j].m_node->index; - m_nodeRigidConstraints[i][j].m_node->m_penetration = -m_nodeRigidConstraints[i][j].getContact()->m_cti.m_offset; - LagrangeMultiplier lm; - lm.m_num_nodes = 1; - lm.m_indices[0] = index; - lm.m_weights[0] = 1.0; - if (m_nodeRigidConstraints[i][j].m_static) - { - lm.m_num_constraints = 3; - lm.m_dirs[0] = btVector3(1,0,0); - lm.m_dirs[1] = btVector3(0,1,0); - lm.m_dirs[2] = btVector3(0,0,1); - } - else - { - lm.m_num_constraints = 1; - lm.m_dirs[0] = m_nodeRigidConstraints[i][j].m_normal; - } - m_lagrangeMultipliers.push_back(lm); - } - for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) - { - const btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face; - - btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary; - btScalar penetration = -m_faceRigidConstraints[i][j].getContact()->m_cti.m_offset; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < m_staticConstraints[i].size(); ++j) + { + int index = m_staticConstraints[i][j].m_node->index; + m_staticConstraints[i][j].m_node->m_constrained = true; + LagrangeMultiplier lm; + lm.m_num_nodes = 1; + lm.m_indices[0] = index; + lm.m_weights[0] = 1.0; + lm.m_num_constraints = 3; + lm.m_dirs[0] = btVector3(1, 0, 0); + lm.m_dirs[1] = btVector3(0, 1, 0); + lm.m_dirs[2] = btVector3(0, 0, 1); + m_lagrangeMultipliers.push_back(lm); + } + for (int j = 0; j < m_nodeAnchorConstraints[i].size(); ++j) + { + int index = m_nodeAnchorConstraints[i][j].m_anchor->m_node->index; + m_nodeAnchorConstraints[i][j].m_anchor->m_node->m_constrained = true; + LagrangeMultiplier lm; + lm.m_num_nodes = 1; + lm.m_indices[0] = index; + lm.m_weights[0] = 1.0; + lm.m_num_constraints = 3; + lm.m_dirs[0] = btVector3(1, 0, 0); + lm.m_dirs[1] = btVector3(0, 1, 0); + lm.m_dirs[2] = btVector3(0, 0, 1); + m_lagrangeMultipliers.push_back(lm); + } + + for (int j = 0; j < m_nodeRigidConstraints[i].size(); ++j) + { + if (!m_nodeRigidConstraints[i][j].m_binding) + { + continue; + } + int index = m_nodeRigidConstraints[i][j].m_node->index; + m_nodeRigidConstraints[i][j].m_node->m_constrained = true; + LagrangeMultiplier lm; + lm.m_num_nodes = 1; + lm.m_indices[0] = index; + lm.m_weights[0] = 1.0; + if (m_nodeRigidConstraints[i][j].m_static) + { + lm.m_num_constraints = 3; + lm.m_dirs[0] = btVector3(1, 0, 0); + lm.m_dirs[1] = btVector3(0, 1, 0); + lm.m_dirs[2] = btVector3(0, 0, 1); + } + else + { + lm.m_num_constraints = 1; + lm.m_dirs[0] = m_nodeRigidConstraints[i][j].m_normal; + } + m_lagrangeMultipliers.push_back(lm); + } + + for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) + { + if (!m_faceRigidConstraints[i][j].m_binding) + { + continue; + } + btSoftBody::Face* face = m_faceRigidConstraints[i][j].m_face; + + btVector3 bary = m_faceRigidConstraints[i][j].getContact()->m_bary; LagrangeMultiplier lm; lm.m_num_nodes = 3; - for (int k = 0; k<3; ++k) + + for (int k = 0; k < 3; ++k) { - face->m_n[k]->m_penetration = btMax(face->m_n[k]->m_penetration, penetration); + face->m_n[k]->m_constrained = true; lm.m_indices[k] = face->m_n[k]->index; lm.m_weights[k] = bary[k]; } - if (m_faceRigidConstraints[i][j].m_static) - { + if (m_faceRigidConstraints[i][j].m_static) + { + face->m_pcontact[3] = 1; lm.m_num_constraints = 3; - lm.m_dirs[0] = btVector3(1,0,0); - lm.m_dirs[1] = btVector3(0,1,0); - lm.m_dirs[2] = btVector3(0,0,1); + lm.m_dirs[0] = btVector3(1, 0, 0); + lm.m_dirs[1] = btVector3(0, 1, 0); + lm.m_dirs[2] = btVector3(0, 0, 1); } else { + face->m_pcontact[3] = 0; lm.m_num_constraints = 1; lm.m_dirs[0] = m_faceRigidConstraints[i][j].m_normal; } - m_lagrangeMultipliers.push_back(lm); + m_lagrangeMultipliers.push_back(lm); } } } @@ -562,7 +571,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f) if (node->m_im != 0) { int index = node->index; - f[index] += constraint.getDv(node)* (1./node->m_im); + f[index] += constraint.getDv(node) * (1. / node->m_im); } } for (int j = 0; j < m_faceRigidConstraints[i].size(); ++j) @@ -575,7 +584,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f) if (node->m_im != 0) { int index = node->index; - f[index] += constraint.getDv(node)* (1./node->m_im); + f[index] += constraint.getDv(node) * (1. / node->m_im); } } } @@ -587,7 +596,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f) if (node->m_im != 0) { int index = node->index; - f[index] += constraint.getDv(node)* (1./node->m_im); + f[index] += constraint.getDv(node) * (1. / node->m_im); } for (int k = 0; k < 3; ++k) { @@ -595,7 +604,7 @@ void btDeformableContactProjection::applyDynamicFriction(TVStack& f) if (node->m_im != 0) { int index = node->index; - f[index] += constraint.getDv(node)* (1./node->m_im); + f[index] += constraint.getDv(node) * (1. / node->m_im); } } } @@ -612,9 +621,8 @@ void btDeformableContactProjection::reinitialize(bool nodeUpdated) m_nodeRigidConstraints.resize(N); m_faceRigidConstraints.resize(N); m_deformableConstraints.resize(N); - } - for (int i = 0 ; i < N; ++i) + for (int i = 0; i < N; ++i) { m_staticConstraints[i].clear(); m_nodeAnchorConstraints[i].clear(); @@ -623,12 +631,9 @@ void btDeformableContactProjection::reinitialize(bool nodeUpdated) m_deformableConstraints[i].clear(); } #ifndef USE_MGS - m_projectionsDict.clear(); + m_projectionsDict.clear(); #else - m_projections.clear(); + m_projections.clear(); #endif - m_lagrangeMultipliers.clear(); + m_lagrangeMultipliers.clear(); } - - - diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h index 8d7e94d4fb..4964eaf990 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableContactProjection.h @@ -27,31 +27,30 @@ struct LagrangeMultiplier { - int m_num_constraints; // Number of constraints - int m_num_nodes; // Number of nodes in these constraints - btScalar m_weights[3]; // weights of the nodes involved, same size as m_num_nodes - btVector3 m_dirs[3]; // Constraint directions, same size of m_num_constraints; - int m_indices[3]; // indices of the nodes involved, same size as m_num_nodes; + int m_num_constraints; // Number of constraints + int m_num_nodes; // Number of nodes in these constraints + btScalar m_weights[3]; // weights of the nodes involved, same size as m_num_nodes + btVector3 m_dirs[3]; // Constraint directions, same size of m_num_constraints; + int m_indices[3]; // indices of the nodes involved, same size as m_num_nodes; }; - class btDeformableContactProjection { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btAlignedObjectArray<btSoftBody *>& m_softBodies; - - // all constraints involving face - btAlignedObjectArray<btDeformableContactConstraint*> m_allFaceConstraints; + typedef btAlignedObjectArray<btVector3> TVStack; + btAlignedObjectArray<btSoftBody*>& m_softBodies; + + // all constraints involving face + btAlignedObjectArray<btDeformableContactConstraint*> m_allFaceConstraints; #ifndef USE_MGS - // map from node index to projection directions - btHashMap<btHashInt, btAlignedObjectArray<btVector3> > m_projectionsDict; + // map from node index to projection directions + btHashMap<btHashInt, btAlignedObjectArray<btVector3> > m_projectionsDict; #else - btAlignedObjectArray<btReducedVector> m_projections; + btAlignedObjectArray<btReducedVector> m_projections; #endif - - btAlignedObjectArray<LagrangeMultiplier> m_lagrangeMultipliers; - + + btAlignedObjectArray<LagrangeMultiplier> m_lagrangeMultipliers; + // map from node index to static constraint btAlignedObjectArray<btAlignedObjectArray<btDeformableStaticConstraint> > m_staticConstraints; // map from node index to node rigid constraint @@ -62,39 +61,39 @@ public: btAlignedObjectArray<btAlignedObjectArray<btDeformableFaceNodeContactConstraint> > m_deformableConstraints; // map from node index to node anchor constraint btAlignedObjectArray<btAlignedObjectArray<btDeformableNodeAnchorConstraint> > m_nodeAnchorConstraints; - - bool m_useStrainLimiting; - - btDeformableContactProjection(btAlignedObjectArray<btSoftBody *>& softBodies) - : m_softBodies(softBodies) - { - } - - virtual ~btDeformableContactProjection() - { - } - - // apply the constraints to the rhs of the linear solve - virtual void project(TVStack& x); - - // add friction force to the rhs of the linear solve - virtual void applyDynamicFriction(TVStack& f); - - // update and solve the constraints - virtual btScalar update(btCollisionObject** deformableBodies,int numDeformableBodies, const btContactSolverInfo& infoGlobal); - - // Add constraints to m_constraints. In addition, the constraints that each vertex own are recorded in m_constraintsDict. - virtual void setConstraints(const btContactSolverInfo& infoGlobal); - - // Set up projections for each vertex by adding the projection direction to - virtual void setProjection(); - - virtual void reinitialize(bool nodeUpdated); - - virtual void splitImpulseSetup(const btContactSolverInfo& infoGlobal); - - virtual void setLagrangeMultiplier(); - - void checkConstraints(const TVStack& x); + + bool m_useStrainLimiting; + + btDeformableContactProjection(btAlignedObjectArray<btSoftBody*>& softBodies) + : m_softBodies(softBodies) + { + } + + virtual ~btDeformableContactProjection() + { + } + + // apply the constraints to the rhs of the linear solve + virtual void project(TVStack& x); + + // add friction force to the rhs of the linear solve + virtual void applyDynamicFriction(TVStack& f); + + // update and solve the constraints + virtual btScalar update(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal); + + // Add constraints to m_constraints. In addition, the constraints that each vertex own are recorded in m_constraintsDict. + virtual void setConstraints(const btContactSolverInfo& infoGlobal); + + // Set up projections for each vertex by adding the projection direction to + virtual void setProjection(); + + virtual void reinitialize(bool nodeUpdated); + + btScalar solveSplitImpulse(btCollisionObject** deformableBodies, int numDeformableBodies, const btContactSolverInfo& infoGlobal); + + virtual void setLagrangeMultiplier(); + + void checkConstraints(const TVStack& x); }; #endif /* btDeformableContactProjection_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h index 2d042df729..dfd85523bc 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableCorotatedForce.h @@ -21,107 +21,104 @@ static inline int PolarDecomposition(const btMatrix3x3& m, btMatrix3x3& q, btMatrix3x3& s) { - static const btPolarDecomposition polar; - return polar.decompose(m, q, s); + static const btPolarDecomposition polar; + return polar.decompose(m, q, s); } class btDeformableCorotatedForce : public btDeformableLagrangianForce { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btScalar m_mu, m_lambda; - btDeformableCorotatedForce(): m_mu(1), m_lambda(1) - { - - } - - btDeformableCorotatedForce(btScalar mu, btScalar lambda): m_mu(mu), m_lambda(lambda) - { - } - - virtual void addScaledForces(btScalar scale, TVStack& force) - { - addScaledElasticForce(scale, force); - } - - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) - { - addScaledElasticForce(scale, force); - } - - virtual void addScaledDampingForce(btScalar scale, TVStack& force) - { - } - - virtual void addScaledElasticForce(btScalar scale, TVStack& force) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btMatrix3x3 P; - firstPiola(tetra.m_F,P); - btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose(); - - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - - // elastic force - // explicit elastic force - btScalar scale1 = scale * tetra.m_element_measure; - force[id0] -= scale1 * force_on_node0; - force[id1] -= scale1 * force_on_node123.getColumn(0); - force[id2] -= scale1 * force_on_node123.getColumn(1); - force[id3] -= scale1 * force_on_node123.getColumn(2); - } - } - } - - void firstPiola(const btMatrix3x3& F, btMatrix3x3& P) - { - // btMatrix3x3 JFinvT = F.adjoint(); - btScalar J = F.determinant(); - P = F.adjoint().transpose() * (m_lambda * (J-1)); - if (m_mu > SIMD_EPSILON) - { - btMatrix3x3 R,S; - if (J < 1024 * SIMD_EPSILON) - R.setIdentity(); - else - PolarDecomposition(F, R, S); // this QR is not robust, consider using implicit shift svd - /*https://fuchuyuan.github.io/research/svd/paper.pdf*/ - P += (F-R) * 2 * m_mu; - } - } - - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) - { - } - - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) - { - } - - virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){} - - virtual btDeformableLagrangianForceType getForceType() - { - return BT_COROTATED_FORCE; - } - -}; + typedef btAlignedObjectArray<btVector3> TVStack; + btScalar m_mu, m_lambda; + btDeformableCorotatedForce() : m_mu(1), m_lambda(1) + { + } + + btDeformableCorotatedForce(btScalar mu, btScalar lambda) : m_mu(mu), m_lambda(lambda) + { + } + + virtual void addScaledForces(btScalar scale, TVStack& force) + { + addScaledElasticForce(scale, force); + } + + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) + { + addScaledElasticForce(scale, force); + } + + virtual void addScaledDampingForce(btScalar scale, TVStack& force) + { + } + + virtual void addScaledElasticForce(btScalar scale, TVStack& force) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btMatrix3x3 P; + firstPiola(tetra.m_F, P); + btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose() * grad_N_hat_1st_col); + btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose(); + + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + // elastic force + // explicit elastic force + btScalar scale1 = scale * tetra.m_element_measure; + force[id0] -= scale1 * force_on_node0; + force[id1] -= scale1 * force_on_node123.getColumn(0); + force[id2] -= scale1 * force_on_node123.getColumn(1); + force[id3] -= scale1 * force_on_node123.getColumn(2); + } + } + } + + void firstPiola(const btMatrix3x3& F, btMatrix3x3& P) + { + // btMatrix3x3 JFinvT = F.adjoint(); + btScalar J = F.determinant(); + P = F.adjoint().transpose() * (m_lambda * (J - 1)); + if (m_mu > SIMD_EPSILON) + { + btMatrix3x3 R, S; + if (J < 1024 * SIMD_EPSILON) + R.setIdentity(); + else + PolarDecomposition(F, R, S); // this QR is not robust, consider using implicit shift svd + /*https://fuchuyuan.github.io/research/svd/paper.pdf*/ + P += (F - R) * 2 * m_mu; + } + } + + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) + { + } + + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) + { + } + + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {} + + virtual btDeformableLagrangianForceType getForceType() + { + return BT_COROTATED_FORCE; + } +}; #endif /* btCorotated_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h index 13ee3eacb6..d91867f457 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableGravityForce.h @@ -21,87 +21,85 @@ class btDeformableGravityForce : public btDeformableLagrangianForce { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btVector3 m_gravity; - - btDeformableGravityForce(const btVector3& g) : m_gravity(g) - { - } - - virtual void addScaledForces(btScalar scale, TVStack& force) - { - addScaledGravityForce(scale, force); - } - - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) - { - addScaledGravityForce(scale, force); - } - - virtual void addScaledDampingForce(btScalar scale, TVStack& force) - { - } - - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) - { - } - - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) - { - } - - virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){} - - virtual void addScaledGravityForce(btScalar scale, TVStack& force) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - btSoftBody::Node& n = psb->m_nodes[j]; - size_t id = n.index; - btScalar mass = (n.m_im == 0) ? 0 : 1. / n.m_im; - btVector3 scaled_force = scale * m_gravity * mass; - force[id] += scaled_force; - } - } - } - - virtual btDeformableLagrangianForceType getForceType() - { - return BT_GRAVITY_FORCE; - } + typedef btAlignedObjectArray<btVector3> TVStack; + btVector3 m_gravity; - // the gravitational potential energy - virtual double totalEnergy(btScalar dt) - { - double e = 0; - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - const btSoftBody::Node& node = psb->m_nodes[j]; - if (node.m_im > 0) - { - e -= m_gravity.dot(node.m_q)/node.m_im; - } - } - } - return e; - } - - + btDeformableGravityForce(const btVector3& g) : m_gravity(g) + { + } + + virtual void addScaledForces(btScalar scale, TVStack& force) + { + addScaledGravityForce(scale, force); + } + + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) + { + addScaledGravityForce(scale, force); + } + + virtual void addScaledDampingForce(btScalar scale, TVStack& force) + { + } + + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) + { + } + + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) + { + } + + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {} + + virtual void addScaledGravityForce(btScalar scale, TVStack& force) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + btSoftBody::Node& n = psb->m_nodes[j]; + size_t id = n.index; + btScalar mass = (n.m_im == 0) ? 0 : 1. / n.m_im; + btVector3 scaled_force = scale * m_gravity * mass * m_softBodies[i]->m_gravityFactor; + force[id] += scaled_force; + } + } + } + + virtual btDeformableLagrangianForceType getForceType() + { + return BT_GRAVITY_FORCE; + } + + // the gravitational potential energy + virtual double totalEnergy(btScalar dt) + { + double e = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + if (node.m_im > 0) + { + e -= m_gravity.dot(node.m_q) / node.m_im; + } + } + } + return e; + } }; #endif /* BT_DEFORMABLE_GRAVITY_FORCE_H */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h index 0b6447442d..d58d825d1c 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableLagrangianForce.h @@ -22,352 +22,351 @@ enum btDeformableLagrangianForceType { - BT_GRAVITY_FORCE = 1, - BT_MASSSPRING_FORCE = 2, - BT_COROTATED_FORCE = 3, - BT_NEOHOOKEAN_FORCE = 4, - BT_LINEAR_ELASTICITY_FORCE = 5, - BT_MOUSE_PICKING_FORCE = 6 + BT_GRAVITY_FORCE = 1, + BT_MASSSPRING_FORCE = 2, + BT_COROTATED_FORCE = 3, + BT_NEOHOOKEAN_FORCE = 4, + BT_LINEAR_ELASTICITY_FORCE = 5, + BT_MOUSE_PICKING_FORCE = 6 }; static inline double randomDouble(double low, double high) { - return low + static_cast<double>(rand()) / RAND_MAX * (high - low); + return low + static_cast<double>(rand()) / RAND_MAX * (high - low); } class btDeformableLagrangianForce { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btAlignedObjectArray<btSoftBody *> m_softBodies; - const btAlignedObjectArray<btSoftBody::Node*>* m_nodes; - - btDeformableLagrangianForce() - { - } - - virtual ~btDeformableLagrangianForce(){} - - // add all forces - virtual void addScaledForces(btScalar scale, TVStack& force) = 0; - - // add damping df - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) = 0; - - // build diagonal of A matrix - virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) = 0; - - // add elastic df - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) = 0; - - // add all forces that are explicit in explicit solve - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) = 0; - - // add all damping forces - virtual void addScaledDampingForce(btScalar scale, TVStack& force) = 0; - - virtual btDeformableLagrangianForceType getForceType() = 0; - - virtual void reinitialize(bool nodeUpdated) - { - } - - // get number of nodes that have the force - virtual int getNumNodes() - { - int numNodes = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - numNodes += m_softBodies[i]->m_nodes.size(); - } - return numNodes; - } - - // add a soft body to be affected by the particular lagrangian force - virtual void addSoftBody(btSoftBody* psb) - { - m_softBodies.push_back(psb); - } - - virtual void removeSoftBody(btSoftBody* psb) - { - m_softBodies.remove(psb); - } - - virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes) - { - m_nodes = nodes; - } - - // Calculate the incremental deformable generated from the input dx - virtual btMatrix3x3 Ds(int id0, int id1, int id2, int id3, const TVStack& dx) - { - btVector3 c1 = dx[id1] - dx[id0]; - btVector3 c2 = dx[id2] - dx[id0]; - btVector3 c3 = dx[id3] - dx[id0]; - return btMatrix3x3(c1,c2,c3).transpose(); - } - - // Calculate the incremental deformable generated from the current velocity - virtual btMatrix3x3 DsFromVelocity(const btSoftBody::Node* n0, const btSoftBody::Node* n1, const btSoftBody::Node* n2, const btSoftBody::Node* n3) - { - btVector3 c1 = n1->m_v - n0->m_v; - btVector3 c2 = n2->m_v - n0->m_v; - btVector3 c3 = n3->m_v - n0->m_v; - return btMatrix3x3(c1,c2,c3).transpose(); - } - - // test for addScaledElasticForce function - virtual void testDerivative() - { - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1)); - } - psb->updateDeformation(); - } - - TVStack dx; - dx.resize(getNumNodes()); - TVStack dphi_dx; - dphi_dx.resize(dx.size()); - for (int i =0; i < dphi_dx.size();++i) - { - dphi_dx[i].setZero(); - } - addScaledForces(-1, dphi_dx); - - // write down the current position - TVStack x; - x.resize(dx.size()); - int counter = 0; - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - x[counter] = psb->m_nodes[j].m_q; - counter++; - } - } - counter = 0; - - // populate dx with random vectors - for (int i = 0; i < dx.size(); ++i) - { - dx[i].setX(randomDouble(-1, 1)); - dx[i].setY(randomDouble(-1, 1)); - dx[i].setZ(randomDouble(-1, 1)); - } - - btAlignedObjectArray<double> errors; - for (int it = 0; it < 10; ++it) - { - for (int i = 0; i < dx.size(); ++i) - { - dx[i] *= 0.5; - } - - // get dphi/dx * dx - double dphi = 0; - for (int i = 0; i < dx.size(); ++i) - { - dphi += dphi_dx[i].dot(dx[i]); - } - - - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = x[counter] + dx[counter]; - counter++; - } - psb->updateDeformation(); - } - counter = 0; - double f1 = totalElasticEnergy(0); - - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = x[counter] - dx[counter]; - counter++; - } - psb->updateDeformation(); - } - counter = 0; - - double f2 = totalElasticEnergy(0); - - //restore m_q - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = x[counter]; - counter++; - } - psb->updateDeformation(); - } - counter = 0; - double error = f1-f2-2*dphi; - errors.push_back(error); - std::cout << "Iteration = " << it <<", f1 = " << f1 << ", f2 = " << f2 << ", error = " << error << std::endl; - } - for (int i = 1; i < errors.size(); ++i) - { - std::cout << "Iteration = " << i << ", ratio = " << errors[i-1]/errors[i] << std::endl; - } - } - - // test for addScaledElasticForce function - virtual void testHessian() - { - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1)); - } - psb->updateDeformation(); - } - - - TVStack dx; - dx.resize(getNumNodes()); - TVStack df; - df.resize(dx.size()); - TVStack f1; - f1.resize(dx.size()); - TVStack f2; - f2.resize(dx.size()); - - - // write down the current position - TVStack x; - x.resize(dx.size()); - int counter = 0; - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - x[counter] = psb->m_nodes[j].m_q; - counter++; - } - } - counter = 0; - - // populate dx with random vectors - for (int i = 0; i < dx.size(); ++i) - { - dx[i].setX(randomDouble(-1, 1)); - dx[i].setY(randomDouble(-1, 1)); - dx[i].setZ(randomDouble(-1, 1)); - } - - btAlignedObjectArray<double> errors; - for (int it = 0; it < 10; ++it) - { - for (int i = 0; i < dx.size(); ++i) - { - dx[i] *= 0.5; - } - - // get df - for (int i =0; i < df.size();++i) - { - df[i].setZero(); - f1[i].setZero(); - f2[i].setZero(); - } - - //set df - addScaledElasticForceDifferential(-1, dx, df); - - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = x[counter] + dx[counter]; - counter++; - } - psb->updateDeformation(); - } - counter = 0; - - //set f1 - addScaledForces(-1, f1); - - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = x[counter] - dx[counter]; - counter++; - } - psb->updateDeformation(); - } - counter = 0; - - //set f2 - addScaledForces(-1, f2); - - //restore m_q - for (int i = 0; i<m_softBodies.size();++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - psb->m_nodes[j].m_q = x[counter]; - counter++; - } - psb->updateDeformation(); - } - counter = 0; - double error = 0; - for (int i = 0; i < df.size();++i) - { - btVector3 error_vector = f1[i]-f2[i]-2*df[i]; - error += error_vector.length2(); - } - error = btSqrt(error); - errors.push_back(error); - std::cout << "Iteration = " << it << ", error = " << error << std::endl; - } - for (int i = 1; i < errors.size(); ++i) - { - std::cout << "Iteration = " << i << ", ratio = " << errors[i-1]/errors[i] << std::endl; - } - } - - // - virtual double totalElasticEnergy(btScalar dt) - { - return 0; - } - - // - virtual double totalDampingEnergy(btScalar dt) - { - return 0; - } - - // total Energy takes dt as input because certain energies depend on dt - virtual double totalEnergy(btScalar dt) - { - return totalElasticEnergy(dt) + totalDampingEnergy(dt); - } + typedef btAlignedObjectArray<btVector3> TVStack; + btAlignedObjectArray<btSoftBody*> m_softBodies; + const btAlignedObjectArray<btSoftBody::Node*>* m_nodes; + + btDeformableLagrangianForce() + { + } + + virtual ~btDeformableLagrangianForce() {} + + // add all forces + virtual void addScaledForces(btScalar scale, TVStack& force) = 0; + + // add damping df + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) = 0; + + // build diagonal of A matrix + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) = 0; + + // add elastic df + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) = 0; + + // add all forces that are explicit in explicit solve + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) = 0; + + // add all damping forces + virtual void addScaledDampingForce(btScalar scale, TVStack& force) = 0; + + virtual void addScaledHessian(btScalar scale) {} + + virtual btDeformableLagrangianForceType getForceType() = 0; + + virtual void reinitialize(bool nodeUpdated) + { + } + + // get number of nodes that have the force + virtual int getNumNodes() + { + int numNodes = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + numNodes += m_softBodies[i]->m_nodes.size(); + } + return numNodes; + } + + // add a soft body to be affected by the particular lagrangian force + virtual void addSoftBody(btSoftBody* psb) + { + m_softBodies.push_back(psb); + } + + virtual void removeSoftBody(btSoftBody* psb) + { + m_softBodies.remove(psb); + } + + virtual void setIndices(const btAlignedObjectArray<btSoftBody::Node*>* nodes) + { + m_nodes = nodes; + } + + // Calculate the incremental deformable generated from the input dx + virtual btMatrix3x3 Ds(int id0, int id1, int id2, int id3, const TVStack& dx) + { + btVector3 c1 = dx[id1] - dx[id0]; + btVector3 c2 = dx[id2] - dx[id0]; + btVector3 c3 = dx[id3] - dx[id0]; + return btMatrix3x3(c1, c2, c3).transpose(); + } + + // Calculate the incremental deformable generated from the current velocity + virtual btMatrix3x3 DsFromVelocity(const btSoftBody::Node* n0, const btSoftBody::Node* n1, const btSoftBody::Node* n2, const btSoftBody::Node* n3) + { + btVector3 c1 = n1->m_v - n0->m_v; + btVector3 c2 = n2->m_v - n0->m_v; + btVector3 c3 = n3->m_v - n0->m_v; + return btMatrix3x3(c1, c2, c3).transpose(); + } + + // test for addScaledElasticForce function + virtual void testDerivative() + { + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1)); + } + psb->updateDeformation(); + } + + TVStack dx; + dx.resize(getNumNodes()); + TVStack dphi_dx; + dphi_dx.resize(dx.size()); + for (int i = 0; i < dphi_dx.size(); ++i) + { + dphi_dx[i].setZero(); + } + addScaledForces(-1, dphi_dx); + + // write down the current position + TVStack x; + x.resize(dx.size()); + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + x[counter] = psb->m_nodes[j].m_q; + counter++; + } + } + counter = 0; + + // populate dx with random vectors + for (int i = 0; i < dx.size(); ++i) + { + dx[i].setX(randomDouble(-1, 1)); + dx[i].setY(randomDouble(-1, 1)); + dx[i].setZ(randomDouble(-1, 1)); + } + + btAlignedObjectArray<double> errors; + for (int it = 0; it < 10; ++it) + { + for (int i = 0; i < dx.size(); ++i) + { + dx[i] *= 0.5; + } + + // get dphi/dx * dx + double dphi = 0; + for (int i = 0; i < dx.size(); ++i) + { + dphi += dphi_dx[i].dot(dx[i]); + } + + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = x[counter] + dx[counter]; + counter++; + } + psb->updateDeformation(); + } + counter = 0; + double f1 = totalElasticEnergy(0); + + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = x[counter] - dx[counter]; + counter++; + } + psb->updateDeformation(); + } + counter = 0; + + double f2 = totalElasticEnergy(0); + + //restore m_q + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = x[counter]; + counter++; + } + psb->updateDeformation(); + } + counter = 0; + double error = f1 - f2 - 2 * dphi; + errors.push_back(error); + std::cout << "Iteration = " << it << ", f1 = " << f1 << ", f2 = " << f2 << ", error = " << error << std::endl; + } + for (int i = 1; i < errors.size(); ++i) + { + std::cout << "Iteration = " << i << ", ratio = " << errors[i - 1] / errors[i] << std::endl; + } + } + + // test for addScaledElasticForce function + virtual void testHessian() + { + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q += btVector3(randomDouble(-.1, .1), randomDouble(-.1, .1), randomDouble(-.1, .1)); + } + psb->updateDeformation(); + } + + TVStack dx; + dx.resize(getNumNodes()); + TVStack df; + df.resize(dx.size()); + TVStack f1; + f1.resize(dx.size()); + TVStack f2; + f2.resize(dx.size()); + + // write down the current position + TVStack x; + x.resize(dx.size()); + int counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + x[counter] = psb->m_nodes[j].m_q; + counter++; + } + } + counter = 0; + + // populate dx with random vectors + for (int i = 0; i < dx.size(); ++i) + { + dx[i].setX(randomDouble(-1, 1)); + dx[i].setY(randomDouble(-1, 1)); + dx[i].setZ(randomDouble(-1, 1)); + } + + btAlignedObjectArray<double> errors; + for (int it = 0; it < 10; ++it) + { + for (int i = 0; i < dx.size(); ++i) + { + dx[i] *= 0.5; + } + + // get df + for (int i = 0; i < df.size(); ++i) + { + df[i].setZero(); + f1[i].setZero(); + f2[i].setZero(); + } + + //set df + addScaledElasticForceDifferential(-1, dx, df); + + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = x[counter] + dx[counter]; + counter++; + } + psb->updateDeformation(); + } + counter = 0; + + //set f1 + addScaledForces(-1, f1); + + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = x[counter] - dx[counter]; + counter++; + } + psb->updateDeformation(); + } + counter = 0; + + //set f2 + addScaledForces(-1, f2); + + //restore m_q + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + psb->m_nodes[j].m_q = x[counter]; + counter++; + } + psb->updateDeformation(); + } + counter = 0; + double error = 0; + for (int i = 0; i < df.size(); ++i) + { + btVector3 error_vector = f1[i] - f2[i] - 2 * df[i]; + error += error_vector.length2(); + } + error = btSqrt(error); + errors.push_back(error); + std::cout << "Iteration = " << it << ", error = " << error << std::endl; + } + for (int i = 1; i < errors.size(); ++i) + { + std::cout << "Iteration = " << i << ", ratio = " << errors[i - 1] / errors[i] << std::endl; + } + } + + // + virtual double totalElasticEnergy(btScalar dt) + { + return 0; + } + + // + virtual double totalDampingEnergy(btScalar dt) + { + return 0; + } + + // total Energy takes dt as input because certain energies depend on dt + virtual double totalEnergy(btScalar dt) + { + return totalElasticEnergy(dt) + totalDampingEnergy(dt); + } }; #endif /* BT_DEFORMABLE_LAGRANGIAN_FORCE */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h index 106dc10ad6..971192050b 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableLinearElasticityForce.h @@ -18,323 +18,445 @@ #include "btDeformableLagrangianForce.h" #include "LinearMath/btQuickprof.h" +#include "btSoftBodyInternals.h" +#define TETRA_FLAT_THRESHOLD 0.01 class btDeformableLinearElasticityForce : public btDeformableLagrangianForce { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btScalar m_mu, m_lambda; - btScalar m_mu_damp, m_lambda_damp; - btDeformableLinearElasticityForce(): m_mu(1), m_lambda(1) - { - btScalar damping = 0.05; - m_mu_damp = damping * m_mu; - m_lambda_damp = damping * m_lambda; - } - - btDeformableLinearElasticityForce(btScalar mu, btScalar lambda, btScalar damping = 0.05): m_mu(mu), m_lambda(lambda) - { - m_mu_damp = damping * m_mu; - m_lambda_damp = damping * m_lambda; - } - - virtual void addScaledForces(btScalar scale, TVStack& force) - { - addScaledDampingForce(scale, force); - addScaledElasticForce(scale, force); - } - - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) - { - addScaledElasticForce(scale, force); - } - - // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search - virtual void addScaledDampingForce(btScalar scale, TVStack& force) - { - if (m_mu_damp == 0 && m_lambda_damp == 0) - return; - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse; - btMatrix3x3 I; - I.setIdentity(); - btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp; - // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP); - btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); - - // damping force differential - btScalar scale1 = scale * tetra.m_element_measure; - force[id0] -= scale1 * df_on_node0; - force[id1] -= scale1 * df_on_node123.getColumn(0); - force[id2] -= scale1 * df_on_node123.getColumn(1); - force[id3] -= scale1 * df_on_node123.getColumn(2); - } - } - } - - virtual double totalElasticEnergy(btScalar dt) - { - double energy = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetraScratches.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::TetraScratch& s = psb->m_tetraScratches[j]; - energy += tetra.m_element_measure * elasticEnergyDensity(s); - } - } - return energy; - } - - // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search - virtual double totalDampingEnergy(btScalar dt) - { - double energy = 0; - int sz = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - sz = btMax(sz, psb->m_nodes[j].index); - } - } - TVStack dampingForce; - dampingForce.resize(sz+1); - for (int i = 0; i < dampingForce.size(); ++i) - dampingForce[i].setZero(); - addScaledDampingForce(0.5, dampingForce); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - const btSoftBody::Node& node = psb->m_nodes[j]; - energy -= dampingForce[node.index].dot(node.m_v) / dt; - } - } - return energy; - } - - double elasticEnergyDensity(const btSoftBody::TetraScratch& s) - { - double density = 0; - btMatrix3x3 epsilon = (s.m_F + s.m_F.transpose()) * 0.5 - btMatrix3x3::getIdentity(); - btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2]; - density += m_mu * (epsilon[0].length2() + epsilon[1].length2() + epsilon[2].length2()); - density += m_lambda * trace * trace * 0.5; - return density; - } - - virtual void addScaledElasticForce(btScalar scale, TVStack& force) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - btScalar max_p = psb->m_cfg.m_maxStress; - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btMatrix3x3 P; - firstPiola(psb->m_tetraScratches[j],P); + typedef btAlignedObjectArray<btVector3> TVStack; + btScalar m_mu, m_lambda; + btScalar m_E, m_nu; // Young's modulus and Poisson ratio + btScalar m_damping_alpha, m_damping_beta; + btDeformableLinearElasticityForce() : m_mu(1), m_lambda(1), m_damping_alpha(0.01), m_damping_beta(0.01) + { + updateYoungsModulusAndPoissonRatio(); + } + + btDeformableLinearElasticityForce(btScalar mu, btScalar lambda, btScalar damping_alpha = 0.01, btScalar damping_beta = 0.01) : m_mu(mu), m_lambda(lambda), m_damping_alpha(damping_alpha), m_damping_beta(damping_beta) + { + updateYoungsModulusAndPoissonRatio(); + } + + void updateYoungsModulusAndPoissonRatio() + { + // conversion from Lame Parameters to Young's modulus and Poisson ratio + // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters + m_E = m_mu * (3 * m_lambda + 2 * m_mu) / (m_lambda + m_mu); + m_nu = m_lambda * 0.5 / (m_mu + m_lambda); + } + + void updateLameParameters() + { + // conversion from Young's modulus and Poisson ratio to Lame Parameters + // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters + m_mu = m_E * 0.5 / (1 + m_nu); + m_lambda = m_E * m_nu / ((1 + m_nu) * (1 - 2 * m_nu)); + } + + void setYoungsModulus(btScalar E) + { + m_E = E; + updateLameParameters(); + } + + void setPoissonRatio(btScalar nu) + { + m_nu = nu; + updateLameParameters(); + } + + void setDamping(btScalar damping_alpha, btScalar damping_beta) + { + m_damping_alpha = damping_alpha; + m_damping_beta = damping_beta; + } + + void setLameParameters(btScalar mu, btScalar lambda) + { + m_mu = mu; + m_lambda = lambda; + updateYoungsModulusAndPoissonRatio(); + } + + virtual void addScaledForces(btScalar scale, TVStack& force) + { + addScaledDampingForce(scale, force); + addScaledElasticForce(scale, force); + } + + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) + { + addScaledElasticForce(scale, force); + } + + // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search + virtual void addScaledDampingForce(btScalar scale, TVStack& force) + { + if (m_damping_alpha == 0 && m_damping_beta == 0) + return; + btScalar mu_damp = m_damping_beta * m_mu; + btScalar lambda_damp = m_damping_beta * m_lambda; + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + bool close_to_flat = (psb->m_tetraScratches[j].m_J < TETRA_FLAT_THRESHOLD); + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse; + if (!close_to_flat) + { + dF = psb->m_tetraScratches[j].m_corotation.transpose() * dF; + } + btMatrix3x3 I; + I.setIdentity(); + btMatrix3x3 dP = (dF + dF.transpose()) * mu_damp + I * ((dF[0][0] + dF[1][1] + dF[2][2]) * lambda_damp); + btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); + if (!close_to_flat) + { + df_on_node123 = psb->m_tetraScratches[j].m_corotation * df_on_node123; + } + btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; + // damping force differential + btScalar scale1 = scale * tetra.m_element_measure; + force[id0] -= scale1 * df_on_node0; + force[id1] -= scale1 * df_on_node123.getColumn(0); + force[id2] -= scale1 * df_on_node123.getColumn(1); + force[id3] -= scale1 * df_on_node123.getColumn(2); + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + size_t id = node.index; + if (node.m_im > 0) + { + force[id] -= scale * node.m_v / node.m_im * m_damping_alpha; + } + } + } + } + + virtual double totalElasticEnergy(btScalar dt) + { + double energy = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetraScratches.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::TetraScratch& s = psb->m_tetraScratches[j]; + energy += tetra.m_element_measure * elasticEnergyDensity(s); + } + } + return energy; + } + + // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search + virtual double totalDampingEnergy(btScalar dt) + { + double energy = 0; + int sz = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + sz = btMax(sz, psb->m_nodes[j].index); + } + } + TVStack dampingForce; + dampingForce.resize(sz + 1); + for (int i = 0; i < dampingForce.size(); ++i) + dampingForce[i].setZero(); + addScaledDampingForce(0.5, dampingForce); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + energy -= dampingForce[node.index].dot(node.m_v) / dt; + } + } + return energy; + } + + double elasticEnergyDensity(const btSoftBody::TetraScratch& s) + { + double density = 0; + btMatrix3x3 epsilon = (s.m_F + s.m_F.transpose()) * 0.5 - btMatrix3x3::getIdentity(); + btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2]; + density += m_mu * (epsilon[0].length2() + epsilon[1].length2() + epsilon[2].length2()); + density += m_lambda * trace * trace * 0.5; + return density; + } + + virtual void addScaledElasticForce(btScalar scale, TVStack& force) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + btScalar max_p = psb->m_cfg.m_maxStress; + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btMatrix3x3 P; + firstPiola(psb->m_tetraScratches[j], P); #if USE_SVD - if (max_p > 0) - { - // since we want to clamp the principal stress to max_p, we only need to - // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p - btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2()); - if (trPTP > max_p * max_p) - { - btMatrix3x3 U, V; - btVector3 sigma; - singularValueDecomposition(P, U, sigma, V); - sigma[0] = btMin(sigma[0], max_p); - sigma[1] = btMin(sigma[1], max_p); - sigma[2] = btMin(sigma[2], max_p); - sigma[0] = btMax(sigma[0], -max_p); - sigma[1] = btMax(sigma[1], -max_p); - sigma[2] = btMax(sigma[2], -max_p); - btMatrix3x3 Sigma; - Sigma.setIdentity(); - Sigma[0][0] = sigma[0]; - Sigma[1][1] = sigma[1]; - Sigma[2][2] = sigma[2]; - P = U * Sigma * V.transpose(); - } - } + if (max_p > 0) + { + // since we want to clamp the principal stress to max_p, we only need to + // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p + btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2()); + if (trPTP > max_p * max_p) + { + btMatrix3x3 U, V; + btVector3 sigma; + singularValueDecomposition(P, U, sigma, V); + sigma[0] = btMin(sigma[0], max_p); + sigma[1] = btMin(sigma[1], max_p); + sigma[2] = btMin(sigma[2], max_p); + sigma[0] = btMax(sigma[0], -max_p); + sigma[1] = btMax(sigma[1], -max_p); + sigma[2] = btMax(sigma[2], -max_p); + btMatrix3x3 Sigma; + Sigma.setIdentity(); + Sigma[0][0] = sigma[0]; + Sigma[1][1] = sigma[1]; + Sigma[2][2] = sigma[2]; + P = U * Sigma * V.transpose(); + } + } #endif - // btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose(); - btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col; - - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - - // elastic force - btScalar scale1 = scale * tetra.m_element_measure; - force[id0] -= scale1 * force_on_node0; - force[id1] -= scale1 * force_on_node123.getColumn(0); - force[id2] -= scale1 * force_on_node123.getColumn(1); - force[id3] -= scale1 * force_on_node123.getColumn(2); - } - } - } - - // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) - { - if (m_mu_damp == 0 && m_lambda_damp == 0) - return; - int numNodes = getNumNodes(); - btAssert(numNodes <= df.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse; - btMatrix3x3 I; - I.setIdentity(); - btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp; - // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP); - // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); - btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; - - // damping force differential - btScalar scale1 = scale * tetra.m_element_measure; - df[id0] -= scale1 * df_on_node0; - df[id1] -= scale1 * df_on_node123.getColumn(0); - df[id2] -= scale1 * df_on_node123.getColumn(1); - df[id3] -= scale1 * df_on_node123.getColumn(2); - } - } - } - - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= df.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - btMatrix3x3 dF = Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse; - btMatrix3x3 dP; - firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP); - // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); - btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; - - // elastic force differential - btScalar scale1 = scale * tetra.m_element_measure; - df[id0] -= scale1 * df_on_node0; - df[id1] -= scale1 * df_on_node123.getColumn(0); - df[id2] -= scale1 * df_on_node123.getColumn(1); - df[id3] -= scale1 * df_on_node123.getColumn(2); - } - } - } - - void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P) - { - btMatrix3x3 epsilon = (s.m_F + s.m_F.transpose()) * 0.5 - btMatrix3x3::getIdentity(); - btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2]; - P = epsilon * btScalar(2) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace; - } - - // Let P be the first piola stress. - // This function calculates the dP = dP/dF * dF - void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) - { - btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]); - dP = (dF + dF.transpose()) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace; - } - - // Let Q be the damping stress. - // This function calculates the dP = dQ/dF * dF - void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) - { - btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]); - dP = (dF + dF.transpose()) * m_mu_damp + btMatrix3x3::getIdentity() * m_lambda_damp * trace; - } - - virtual btDeformableLagrangianForceType getForceType() - { - return BT_LINEAR_ELASTICITY_FORCE; - } - + // btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); + btMatrix3x3 force_on_node123 = psb->m_tetraScratches[j].m_corotation * P * tetra.m_Dm_inverse.transpose(); + btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col; + + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + + // elastic force + btScalar scale1 = scale * tetra.m_element_measure; + force[id0] -= scale1 * force_on_node0; + force[id1] -= scale1 * force_on_node123.getColumn(0); + force[id2] -= scale1 * force_on_node123.getColumn(1); + force[id3] -= scale1 * force_on_node123.getColumn(2); + } + } + } + + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {} + + // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) + { + if (m_damping_alpha == 0 && m_damping_beta == 0) + return; + btScalar mu_damp = m_damping_beta * m_mu; + btScalar lambda_damp = m_damping_beta * m_lambda; + int numNodes = getNumNodes(); + btAssert(numNodes <= df.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + bool close_to_flat = (psb->m_tetraScratches[j].m_J < TETRA_FLAT_THRESHOLD); + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse; + if (!close_to_flat) + { + dF = psb->m_tetraScratches[j].m_corotation.transpose() * dF; + } + btMatrix3x3 I; + I.setIdentity(); + btMatrix3x3 dP = (dF + dF.transpose()) * mu_damp + I * ((dF[0][0] + dF[1][1] + dF[2][2]) * lambda_damp); + btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); + if (!close_to_flat) + { + df_on_node123 = psb->m_tetraScratches[j].m_corotation * df_on_node123; + } + btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; + + // damping force differential + btScalar scale1 = scale * tetra.m_element_measure; + df[id0] -= scale1 * df_on_node0; + df[id1] -= scale1 * df_on_node123.getColumn(0); + df[id2] -= scale1 * df_on_node123.getColumn(1); + df[id3] -= scale1 * df_on_node123.getColumn(2); + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + size_t id = node.index; + if (node.m_im > 0) + { + df[id] -= scale * dv[id] / node.m_im * m_damping_alpha; + } + } + } + } + + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= df.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + btMatrix3x3 dF = psb->m_tetraScratches[j].m_corotation.transpose() * Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse; + btMatrix3x3 dP; + firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP); + // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); + btMatrix3x3 df_on_node123 = psb->m_tetraScratches[j].m_corotation * dP * tetra.m_Dm_inverse.transpose(); + btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; + + // elastic force differential + btScalar scale1 = scale * tetra.m_element_measure; + df[id0] -= scale1 * df_on_node0; + df[id1] -= scale1 * df_on_node123.getColumn(0); + df[id2] -= scale1 * df_on_node123.getColumn(1); + df[id3] -= scale1 * df_on_node123.getColumn(2); + } + } + } + + void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P) + { + btMatrix3x3 corotated_F = s.m_corotation.transpose() * s.m_F; + + btMatrix3x3 epsilon = (corotated_F + corotated_F.transpose()) * 0.5 - btMatrix3x3::getIdentity(); + btScalar trace = epsilon[0][0] + epsilon[1][1] + epsilon[2][2]; + P = epsilon * btScalar(2) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace; + } + + // Let P be the first piola stress. + // This function calculates the dP = dP/dF * dF + void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) + { + btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]); + dP = (dF + dF.transpose()) * m_mu + btMatrix3x3::getIdentity() * m_lambda * trace; + } + + // Let Q be the damping stress. + // This function calculates the dP = dQ/dF * dF + void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) + { + btScalar mu_damp = m_damping_beta * m_mu; + btScalar lambda_damp = m_damping_beta * m_lambda; + btScalar trace = (dF[0][0] + dF[1][1] + dF[2][2]); + dP = (dF + dF.transpose()) * mu_damp + btMatrix3x3::getIdentity() * lambda_damp * trace; + } + + virtual void addScaledHessian(btScalar scale) + { + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btMatrix3x3 P; + firstPiola(psb->m_tetraScratches[j], P); // make sure scratch is evaluated at x_n + dt * vn + btMatrix3x3 force_on_node123 = psb->m_tetraScratches[j].m_corotation * P * tetra.m_Dm_inverse.transpose(); + btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + btScalar scale1 = scale * (scale + m_damping_beta) * tetra.m_element_measure; // stiff and stiffness-damping terms; + node0->m_effectiveMass += OuterProduct(force_on_node0, force_on_node0) * scale1; + node1->m_effectiveMass += OuterProduct(force_on_node123.getColumn(0), force_on_node123.getColumn(0)) * scale1; + node2->m_effectiveMass += OuterProduct(force_on_node123.getColumn(1), force_on_node123.getColumn(1)) * scale1; + node3->m_effectiveMass += OuterProduct(force_on_node123.getColumn(2), force_on_node123.getColumn(2)) * scale1; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + btSoftBody::Node& node = psb->m_nodes[j]; + if (node.m_im > 0) + { + btMatrix3x3 I; + I.setIdentity(); + node.m_effectiveMass += I * (scale * (1.0 / node.m_im) * m_damping_alpha); + } + } + } + } + + virtual btDeformableLagrangianForceType getForceType() + { + return BT_LINEAR_ELASTICITY_FORCE; + } }; #endif /* BT_LINEAR_ELASTICITY_H */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h index b128df92cc..8c97bd1ba8 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableMassSpringForce.h @@ -20,282 +20,282 @@ class btDeformableMassSpringForce : public btDeformableLagrangianForce { - // If true, the damping force will be in the direction of the spring - // If false, the damping force will be in the direction of the velocity - bool m_momentum_conserving; - btScalar m_elasticStiffness, m_dampingStiffness, m_bendingStiffness; + // If true, the damping force will be in the direction of the spring + // If false, the damping force will be in the direction of the velocity + bool m_momentum_conserving; + btScalar m_elasticStiffness, m_dampingStiffness, m_bendingStiffness; + public: - typedef btAlignedObjectArray<btVector3> TVStack; - btDeformableMassSpringForce() : m_momentum_conserving(false), m_elasticStiffness(1), m_dampingStiffness(0.05) - { - } - btDeformableMassSpringForce(btScalar k, btScalar d, bool conserve_angular = true, double bending_k = -1) : m_momentum_conserving(conserve_angular), m_elasticStiffness(k), m_dampingStiffness(d), m_bendingStiffness(bending_k) - { - if (m_bendingStiffness < btScalar(0)) - { - m_bendingStiffness = m_elasticStiffness; - } - } - - virtual void addScaledForces(btScalar scale, TVStack& force) - { - addScaledDampingForce(scale, force); - addScaledElasticForce(scale, force); - } - - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) - { - addScaledElasticForce(scale, force); - } - - virtual void addScaledDampingForce(btScalar scale, TVStack& force) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - for (int i = 0; i < m_softBodies.size(); ++i) - { - const btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_links.size(); ++j) - { - const btSoftBody::Link& link = psb->m_links[j]; - btSoftBody::Node* node1 = link.m_n[0]; - btSoftBody::Node* node2 = link.m_n[1]; - size_t id1 = node1->index; - size_t id2 = node2->index; - - // damping force - btVector3 v_diff = (node2->m_v - node1->m_v); - btVector3 scaled_force = scale * m_dampingStiffness * v_diff; - if (m_momentum_conserving) - { - if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON) - { - btVector3 dir = (node2->m_x - node1->m_x).normalized(); - scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir; - } - } - force[id1] += scaled_force; - force[id2] -= scaled_force; - } - } - } - - virtual void addScaledElasticForce(btScalar scale, TVStack& force) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - for (int i = 0; i < m_softBodies.size(); ++i) - { - const btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_links.size(); ++j) - { - const btSoftBody::Link& link = psb->m_links[j]; - btSoftBody::Node* node1 = link.m_n[0]; - btSoftBody::Node* node2 = link.m_n[1]; - btScalar r = link.m_rl; - size_t id1 = node1->index; - size_t id2 = node2->index; - - // elastic force - btVector3 dir = (node2->m_q - node1->m_q); - btVector3 dir_normalized = (dir.norm() > SIMD_EPSILON) ? dir.normalized() : btVector3(0,0,0); - btScalar scaled_stiffness = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness); - btVector3 scaled_force = scaled_stiffness * (dir - dir_normalized * r); - force[id1] += scaled_force; - force[id2] -= scaled_force; - } - } - } - - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) - { - // implicit damping force differential - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - btScalar scaled_k_damp = m_dampingStiffness * scale; - for (int j = 0; j < psb->m_links.size(); ++j) - { - const btSoftBody::Link& link = psb->m_links[j]; - btSoftBody::Node* node1 = link.m_n[0]; - btSoftBody::Node* node2 = link.m_n[1]; - size_t id1 = node1->index; - size_t id2 = node2->index; + typedef btAlignedObjectArray<btVector3> TVStack; + btDeformableMassSpringForce() : m_momentum_conserving(false), m_elasticStiffness(1), m_dampingStiffness(0.05) + { + } + btDeformableMassSpringForce(btScalar k, btScalar d, bool conserve_angular = true, double bending_k = -1) : m_momentum_conserving(conserve_angular), m_elasticStiffness(k), m_dampingStiffness(d), m_bendingStiffness(bending_k) + { + if (m_bendingStiffness < btScalar(0)) + { + m_bendingStiffness = m_elasticStiffness; + } + } + + virtual void addScaledForces(btScalar scale, TVStack& force) + { + addScaledDampingForce(scale, force); + addScaledElasticForce(scale, force); + } + + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) + { + addScaledElasticForce(scale, force); + } + + virtual void addScaledDampingForce(btScalar scale, TVStack& force) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + for (int i = 0; i < m_softBodies.size(); ++i) + { + const btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_links.size(); ++j) + { + const btSoftBody::Link& link = psb->m_links[j]; + btSoftBody::Node* node1 = link.m_n[0]; + btSoftBody::Node* node2 = link.m_n[1]; + size_t id1 = node1->index; + size_t id2 = node2->index; + + // damping force + btVector3 v_diff = (node2->m_v - node1->m_v); + btVector3 scaled_force = scale * m_dampingStiffness * v_diff; + if (m_momentum_conserving) + { + if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON) + { + btVector3 dir = (node2->m_x - node1->m_x).normalized(); + scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir; + } + } + force[id1] += scaled_force; + force[id2] -= scaled_force; + } + } + } + + virtual void addScaledElasticForce(btScalar scale, TVStack& force) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + for (int i = 0; i < m_softBodies.size(); ++i) + { + const btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_links.size(); ++j) + { + const btSoftBody::Link& link = psb->m_links[j]; + btSoftBody::Node* node1 = link.m_n[0]; + btSoftBody::Node* node2 = link.m_n[1]; + btScalar r = link.m_rl; + size_t id1 = node1->index; + size_t id2 = node2->index; + + // elastic force + btVector3 dir = (node2->m_q - node1->m_q); + btVector3 dir_normalized = (dir.norm() > SIMD_EPSILON) ? dir.normalized() : btVector3(0, 0, 0); + btScalar scaled_stiffness = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness); + btVector3 scaled_force = scaled_stiffness * (dir - dir_normalized * r); + force[id1] += scaled_force; + force[id2] -= scaled_force; + } + } + } + + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) + { + // implicit damping force differential + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + btScalar scaled_k_damp = m_dampingStiffness * scale; + for (int j = 0; j < psb->m_links.size(); ++j) + { + const btSoftBody::Link& link = psb->m_links[j]; + btSoftBody::Node* node1 = link.m_n[0]; + btSoftBody::Node* node2 = link.m_n[1]; + size_t id1 = node1->index; + size_t id2 = node2->index; + + btVector3 local_scaled_df = scaled_k_damp * (dv[id2] - dv[id1]); + if (m_momentum_conserving) + { + if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON) + { + btVector3 dir = (node2->m_x - node1->m_x).normalized(); + local_scaled_df = scaled_k_damp * (dv[id2] - dv[id1]).dot(dir) * dir; + } + } + df[id1] += local_scaled_df; + df[id2] -= local_scaled_df; + } + } + } + + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) + { + // implicit damping force differential + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + btScalar scaled_k_damp = m_dampingStiffness * scale; + for (int j = 0; j < psb->m_links.size(); ++j) + { + const btSoftBody::Link& link = psb->m_links[j]; + btSoftBody::Node* node1 = link.m_n[0]; + btSoftBody::Node* node2 = link.m_n[1]; + size_t id1 = node1->index; + size_t id2 = node2->index; + if (m_momentum_conserving) + { + if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON) + { + btVector3 dir = (node2->m_x - node1->m_x).normalized(); + for (int d = 0; d < 3; ++d) + { + if (node1->m_im > 0) + diagA[id1][d] -= scaled_k_damp * dir[d] * dir[d]; + if (node2->m_im > 0) + diagA[id2][d] -= scaled_k_damp * dir[d] * dir[d]; + } + } + } + else + { + for (int d = 0; d < 3; ++d) + { + if (node1->m_im > 0) + diagA[id1][d] -= scaled_k_damp; + if (node2->m_im > 0) + diagA[id2][d] -= scaled_k_damp; + } + } + } + } + } + + virtual double totalElasticEnergy(btScalar dt) + { + double energy = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + const btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_links.size(); ++j) + { + const btSoftBody::Link& link = psb->m_links[j]; + btSoftBody::Node* node1 = link.m_n[0]; + btSoftBody::Node* node2 = link.m_n[1]; + btScalar r = link.m_rl; + + // elastic force + btVector3 dir = (node2->m_q - node1->m_q); + energy += 0.5 * m_elasticStiffness * (dir.norm() - r) * (dir.norm() - r); + } + } + return energy; + } + + virtual double totalDampingEnergy(btScalar dt) + { + double energy = 0; + int sz = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + sz = btMax(sz, psb->m_nodes[j].index); + } + } + TVStack dampingForce; + dampingForce.resize(sz + 1); + for (int i = 0; i < dampingForce.size(); ++i) + dampingForce[i].setZero(); + addScaledDampingForce(0.5, dampingForce); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + energy -= dampingForce[node.index].dot(node.m_v) / dt; + } + } + return energy; + } + + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) + { + // implicit damping force differential + for (int i = 0; i < m_softBodies.size(); ++i) + { + const btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_links.size(); ++j) + { + const btSoftBody::Link& link = psb->m_links[j]; + btSoftBody::Node* node1 = link.m_n[0]; + btSoftBody::Node* node2 = link.m_n[1]; + size_t id1 = node1->index; + size_t id2 = node2->index; + btScalar r = link.m_rl; - btVector3 local_scaled_df = scaled_k_damp * (dv[id2] - dv[id1]); - if (m_momentum_conserving) - { - if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON) - { - btVector3 dir = (node2->m_x - node1->m_x).normalized(); - local_scaled_df= scaled_k_damp * (dv[id2] - dv[id1]).dot(dir) * dir; - } - } - df[id1] += local_scaled_df; - df[id2] -= local_scaled_df; - } - } - } - - virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) - { - // implicit damping force differential - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - btScalar scaled_k_damp = m_dampingStiffness * scale; - for (int j = 0; j < psb->m_links.size(); ++j) - { - const btSoftBody::Link& link = psb->m_links[j]; - btSoftBody::Node* node1 = link.m_n[0]; - btSoftBody::Node* node2 = link.m_n[1]; - size_t id1 = node1->index; - size_t id2 = node2->index; - if (m_momentum_conserving) - { - if ((node2->m_x - node1->m_x).norm() > SIMD_EPSILON) - { - btVector3 dir = (node2->m_x - node1->m_x).normalized(); - for (int d = 0; d < 3; ++d) - { - if (node1->m_im > 0) - diagA[id1][d] -= scaled_k_damp * dir[d] * dir[d]; - if (node2->m_im > 0) - diagA[id2][d] -= scaled_k_damp * dir[d] * dir[d]; - } - } - } - else - { - for (int d = 0; d < 3; ++d) - { - if (node1->m_im > 0) - diagA[id1][d] -= scaled_k_damp; - if (node2->m_im > 0) - diagA[id2][d] -= scaled_k_damp; - } - } - } - } - } - - virtual double totalElasticEnergy(btScalar dt) - { - double energy = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - const btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_links.size(); ++j) - { - const btSoftBody::Link& link = psb->m_links[j]; - btSoftBody::Node* node1 = link.m_n[0]; - btSoftBody::Node* node2 = link.m_n[1]; - btScalar r = link.m_rl; + btVector3 dir = (node1->m_q - node2->m_q); + btScalar dir_norm = dir.norm(); + btVector3 dir_normalized = (dir_norm > SIMD_EPSILON) ? dir.normalized() : btVector3(0, 0, 0); + btVector3 dx_diff = dx[id1] - dx[id2]; + btVector3 scaled_df = btVector3(0, 0, 0); + btScalar scaled_k = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness); + if (dir_norm > SIMD_EPSILON) + { + scaled_df -= scaled_k * dir_normalized.dot(dx_diff) * dir_normalized; + scaled_df += scaled_k * dir_normalized.dot(dx_diff) * ((dir_norm - r) / dir_norm) * dir_normalized; + scaled_df -= scaled_k * ((dir_norm - r) / dir_norm) * dx_diff; + } - // elastic force - btVector3 dir = (node2->m_q - node1->m_q); - energy += 0.5 * m_elasticStiffness * (dir.norm() - r) * (dir.norm() -r); - } - } - return energy; - } - - virtual double totalDampingEnergy(btScalar dt) - { - double energy = 0; - int sz = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - sz = btMax(sz, psb->m_nodes[j].index); - } - } - TVStack dampingForce; - dampingForce.resize(sz+1); - for (int i = 0; i < dampingForce.size(); ++i) - dampingForce[i].setZero(); - addScaledDampingForce(0.5, dampingForce); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - const btSoftBody::Node& node = psb->m_nodes[j]; - energy -= dampingForce[node.index].dot(node.m_v) / dt; - } - } - return energy; - } - - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) - { - // implicit damping force differential - for (int i = 0; i < m_softBodies.size(); ++i) - { - const btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_links.size(); ++j) - { - const btSoftBody::Link& link = psb->m_links[j]; - btSoftBody::Node* node1 = link.m_n[0]; - btSoftBody::Node* node2 = link.m_n[1]; - size_t id1 = node1->index; - size_t id2 = node2->index; - btScalar r = link.m_rl; + df[id1] += scaled_df; + df[id2] -= scaled_df; + } + } + } - btVector3 dir = (node1->m_q - node2->m_q); - btScalar dir_norm = dir.norm(); - btVector3 dir_normalized = (dir_norm > SIMD_EPSILON) ? dir.normalized() : btVector3(0,0,0); - btVector3 dx_diff = dx[id1] - dx[id2]; - btVector3 scaled_df = btVector3(0,0,0); - btScalar scaled_k = scale * (link.m_bbending ? m_bendingStiffness : m_elasticStiffness); - if (dir_norm > SIMD_EPSILON) - { - scaled_df -= scaled_k * dir_normalized.dot(dx_diff) * dir_normalized; - scaled_df += scaled_k * dir_normalized.dot(dx_diff) * ((dir_norm-r)/dir_norm) * dir_normalized; - scaled_df -= scaled_k * ((dir_norm-r)/dir_norm) * dx_diff; - } - - df[id1] += scaled_df; - df[id2] -= scaled_df; - } - } - } - - virtual btDeformableLagrangianForceType getForceType() - { - return BT_MASSSPRING_FORCE; - } - + virtual btDeformableLagrangianForceType getForceType() + { + return BT_MASSSPRING_FORCE; + } }; #endif /* btMassSpring_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h index 07c10935f4..d218d96214 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableMousePickingForce.h @@ -20,126 +20,143 @@ class btDeformableMousePickingForce : public btDeformableLagrangianForce { - // If true, the damping force will be in the direction of the spring - // If false, the damping force will be in the direction of the velocity - btScalar m_elasticStiffness, m_dampingStiffness; - const btSoftBody::Face& m_face; - btVector3 m_mouse_pos; - btScalar m_maxForce; + // If true, the damping force will be in the direction of the spring + // If false, the damping force will be in the direction of the velocity + btScalar m_elasticStiffness, m_dampingStiffness; + const btSoftBody::Face& m_face; + btVector3 m_mouse_pos; + btScalar m_maxForce; + public: - typedef btAlignedObjectArray<btVector3> TVStack; - btDeformableMousePickingForce(btScalar k, btScalar d, const btSoftBody::Face& face, btVector3 mouse_pos, btScalar maxForce = 0.3) : m_elasticStiffness(k), m_dampingStiffness(d), m_face(face), m_mouse_pos(mouse_pos), m_maxForce(maxForce) - { - } - - virtual void addScaledForces(btScalar scale, TVStack& force) - { - addScaledDampingForce(scale, force); - addScaledElasticForce(scale, force); - } - - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) - { - addScaledElasticForce(scale, force); - } - - virtual void addScaledDampingForce(btScalar scale, TVStack& force) - { - for (int i = 0; i < 3; ++i) - { - btVector3 v_diff = m_face.m_n[i]->m_v; - btVector3 scaled_force = scale * m_dampingStiffness * v_diff; - if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON) - { - btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized(); - scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir; - } - force[m_face.m_n[i]->index] -= scaled_force; - } - } - - virtual void addScaledElasticForce(btScalar scale, TVStack& force) - { - btScalar scaled_stiffness = scale * m_elasticStiffness; - for (int i = 0; i < 3; ++i) - { - btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos); - btVector3 scaled_force = scaled_stiffness * dir; - if (scaled_force.safeNorm() > m_maxForce) - { - scaled_force.safeNormalize(); - scaled_force *= m_maxForce; - } - force[m_face.m_n[i]->index] -= scaled_force; - } - } - - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) - { - btScalar scaled_k_damp = m_dampingStiffness * scale; - for (int i = 0; i < 3; ++i) - { - btVector3 local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index]; - if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON) - { - btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized(); - local_scaled_df= scaled_k_damp * dv[m_face.m_n[i]->index].dot(dir) * dir; - } - df[m_face.m_n[i]->index] -= local_scaled_df; - } - } - - virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){} - - virtual double totalElasticEnergy(btScalar dt) - { - double energy = 0; - for (int i = 0; i < 3; ++i) - { - btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos); - btVector3 scaled_force = m_elasticStiffness * dir; - if (scaled_force.safeNorm() > m_maxForce) - { - scaled_force.safeNormalize(); - scaled_force *= m_maxForce; - } - energy += 0.5 * scaled_force.dot(dir); - } - return energy; - } - - virtual double totalDampingEnergy(btScalar dt) - { - double energy = 0; - for (int i = 0; i < 3; ++i) - { - btVector3 v_diff = m_face.m_n[i]->m_v; - btVector3 scaled_force = m_dampingStiffness * v_diff; - if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON) - { - btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized(); - scaled_force = m_dampingStiffness * v_diff.dot(dir) * dir; - } - energy -= scaled_force.dot(m_face.m_n[i]->m_v) / dt; - } - return energy; - } - - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) - { - //TODO - } - - void setMousePos(const btVector3& p) - { - m_mouse_pos = p; - } - - virtual btDeformableLagrangianForceType getForceType() - { - return BT_MOUSE_PICKING_FORCE; - } - + typedef btAlignedObjectArray<btVector3> TVStack; + btDeformableMousePickingForce(btScalar k, btScalar d, const btSoftBody::Face& face, btVector3 mouse_pos, btScalar maxForce = 0.3) : m_elasticStiffness(k), m_dampingStiffness(d), m_face(face), m_mouse_pos(mouse_pos), m_maxForce(maxForce) + { + } + + virtual void addScaledForces(btScalar scale, TVStack& force) + { + addScaledDampingForce(scale, force); + addScaledElasticForce(scale, force); + } + + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) + { + addScaledElasticForce(scale, force); + } + + virtual void addScaledDampingForce(btScalar scale, TVStack& force) + { + for (int i = 0; i < 3; ++i) + { + btVector3 v_diff = m_face.m_n[i]->m_v; + btVector3 scaled_force = scale * m_dampingStiffness * v_diff; + if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON) + { + btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized(); + scaled_force = scale * m_dampingStiffness * v_diff.dot(dir) * dir; + } + force[m_face.m_n[i]->index] -= scaled_force; + } + } + + virtual void addScaledElasticForce(btScalar scale, TVStack& force) + { + btScalar scaled_stiffness = scale * m_elasticStiffness; + for (int i = 0; i < 3; ++i) + { + btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos); + btVector3 scaled_force = scaled_stiffness * dir; + if (scaled_force.safeNorm() > m_maxForce) + { + scaled_force.safeNormalize(); + scaled_force *= m_maxForce; + } + force[m_face.m_n[i]->index] -= scaled_force; + } + } + + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) + { + btScalar scaled_k_damp = m_dampingStiffness * scale; + for (int i = 0; i < 3; ++i) + { + btVector3 local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index]; + if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON) + { + btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized(); + local_scaled_df = scaled_k_damp * dv[m_face.m_n[i]->index].dot(dir) * dir; + } + df[m_face.m_n[i]->index] -= local_scaled_df; + } + } + + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {} + + virtual double totalElasticEnergy(btScalar dt) + { + double energy = 0; + for (int i = 0; i < 3; ++i) + { + btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos); + btVector3 scaled_force = m_elasticStiffness * dir; + if (scaled_force.safeNorm() > m_maxForce) + { + scaled_force.safeNormalize(); + scaled_force *= m_maxForce; + } + energy += 0.5 * scaled_force.dot(dir); + } + return energy; + } + + virtual double totalDampingEnergy(btScalar dt) + { + double energy = 0; + for (int i = 0; i < 3; ++i) + { + btVector3 v_diff = m_face.m_n[i]->m_v; + btVector3 scaled_force = m_dampingStiffness * v_diff; + if ((m_face.m_n[i]->m_x - m_mouse_pos).norm() > SIMD_EPSILON) + { + btVector3 dir = (m_face.m_n[i]->m_x - m_mouse_pos).normalized(); + scaled_force = m_dampingStiffness * v_diff.dot(dir) * dir; + } + energy -= scaled_force.dot(m_face.m_n[i]->m_v) / dt; + } + return energy; + } + + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) + { + btScalar scaled_stiffness = scale * m_elasticStiffness; + for (int i = 0; i < 3; ++i) + { + btVector3 dir = (m_face.m_n[i]->m_q - m_mouse_pos); + btScalar dir_norm = dir.norm(); + btVector3 dir_normalized = (dir_norm > SIMD_EPSILON) ? dir.normalized() : btVector3(0, 0, 0); + int id = m_face.m_n[i]->index; + btVector3 dx_diff = dx[id]; + btScalar r = 0; // rest length is 0 for picking spring + btVector3 scaled_df = btVector3(0, 0, 0); + if (dir_norm > SIMD_EPSILON) + { + scaled_df -= scaled_stiffness * dir_normalized.dot(dx_diff) * dir_normalized; + scaled_df += scaled_stiffness * dir_normalized.dot(dx_diff) * ((dir_norm - r) / dir_norm) * dir_normalized; + scaled_df -= scaled_stiffness * ((dir_norm - r) / dir_norm) * dx_diff; + } + df[id] += scaled_df; + } + } + + void setMousePos(const btVector3& p) + { + m_mouse_pos = p; + } + + virtual btDeformableLagrangianForceType getForceType() + { + return BT_MOUSE_PICKING_FORCE; + } }; #endif /* btMassSpring_h */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp index c8cc47923e..631fd5fbed 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp +++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.cpp @@ -13,131 +13,132 @@ 3. This notice may not be removed or altered from any source distribution. */ - #include "btDeformableMultiBodyConstraintSolver.h" #include <iostream> // override the iterations method to include deformable/multibody contact -btScalar btDeformableMultiBodyConstraintSolver::solveDeformableGroupIterations(btCollisionObject** bodies,int numBodies,btCollisionObject** deformableBodies,int numDeformableBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer) +btScalar btDeformableMultiBodyConstraintSolver::solveDeformableGroupIterations(btCollisionObject** bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer) { - { - ///this is a special step to resolve penetrations (just for contacts) - solveGroupCacheFriendlySplitImpulseIterations(bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer); + { + ///this is a special step to resolve penetrations (just for contacts) + solveGroupCacheFriendlySplitImpulseIterations(bodies, numBodies, deformableBodies, numDeformableBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer); + + int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations ? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations; + for (int iteration = 0; iteration < maxIterations; iteration++) + { + // rigid bodies are solved using solver body velocity, but rigid/deformable contact directly uses the velocity of the actual rigid body. So we have to do the following: Solve one iteration of the rigid/rigid contact, get the updated velocity in the solver body and update the velocity of the underlying rigid body. Then solve the rigid/deformable contact. Finally, grab the (once again) updated rigid velocity and update the velocity of the wrapping solver body - int maxIterations = m_maxOverrideNumSolverIterations > infoGlobal.m_numIterations ? m_maxOverrideNumSolverIterations : infoGlobal.m_numIterations; - for (int iteration = 0; iteration < maxIterations; iteration++) - { - // rigid bodies are solved using solver body velocity, but rigid/deformable contact directly uses the velocity of the actual rigid body. So we have to do the following: Solve one iteration of the rigid/rigid contact, get the updated velocity in the solver body and update the velocity of the underlying rigid body. Then solve the rigid/deformable contact. Finally, grab the (once again) updated rigid velocity and update the velocity of the wrapping solver body - - // solve rigid/rigid in solver body - m_leastSquaresResidual = solveSingleIteration(iteration, bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer); - // solver body velocity -> rigid body velocity - solverBodyWriteBack(infoGlobal); - btScalar deformableResidual = m_deformableSolver->solveContactConstraints(deformableBodies,numDeformableBodies, infoGlobal); - // update rigid body velocity in rigid/deformable contact - m_leastSquaresResidual = btMax(m_leastSquaresResidual, deformableResidual); - // solver body velocity <- rigid body velocity - writeToSolverBody(bodies, numBodies, infoGlobal); - - if (m_leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || (iteration >= (maxIterations - 1))) - { + // solve rigid/rigid in solver body + m_leastSquaresResidual = solveSingleIteration(iteration, bodies, numBodies, manifoldPtr, numManifolds, constraints, numConstraints, infoGlobal, debugDrawer); + // solver body velocity -> rigid body velocity + solverBodyWriteBack(infoGlobal); + btScalar deformableResidual = m_deformableSolver->solveContactConstraints(deformableBodies, numDeformableBodies, infoGlobal); + // update rigid body velocity in rigid/deformable contact + m_leastSquaresResidual = btMax(m_leastSquaresResidual, deformableResidual); + // solver body velocity <- rigid body velocity + writeToSolverBody(bodies, numBodies, infoGlobal); + + if (m_leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || (iteration >= (maxIterations - 1))) + { #ifdef VERBOSE_RESIDUAL_PRINTF - printf("residual = %f at iteration #%d\n", m_leastSquaresResidual, iteration); + if (iteration >= (maxIterations - 1)) + printf("residual = %f at iteration #%d\n", m_leastSquaresResidual, iteration); #endif - m_analyticsData.m_numSolverCalls++; - m_analyticsData.m_numIterationsUsed = iteration+1; - m_analyticsData.m_islandId = -2; - if (numBodies>0) - m_analyticsData.m_islandId = bodies[0]->getCompanionId(); - m_analyticsData.m_numBodies = numBodies; - m_analyticsData.m_numContactManifolds = numManifolds; - m_analyticsData.m_remainingLeastSquaresResidual = m_leastSquaresResidual; - break; - } - } - } - return 0.f; + m_analyticsData.m_numSolverCalls++; + m_analyticsData.m_numIterationsUsed = iteration + 1; + m_analyticsData.m_islandId = -2; + if (numBodies > 0) + m_analyticsData.m_islandId = bodies[0]->getCompanionId(); + m_analyticsData.m_numBodies = numBodies; + m_analyticsData.m_numContactManifolds = numManifolds; + m_analyticsData.m_remainingLeastSquaresResidual = m_leastSquaresResidual; + break; + } + } + } + return 0.f; } -void btDeformableMultiBodyConstraintSolver::solveDeformableBodyGroup(btCollisionObject * *bodies, int numBodies, btCollisionObject * *deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher) +void btDeformableMultiBodyConstraintSolver::solveDeformableBodyGroup(btCollisionObject** bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher) { - m_tmpMultiBodyConstraints = multiBodyConstraints; - m_tmpNumMultiBodyConstraints = numMultiBodyConstraints; - - // inherited from MultiBodyConstraintSolver - solveGroupCacheFriendlySetup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer); - - // overriden - solveDeformableGroupIterations(bodies, numBodies, deformableBodies, numDeformableBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer); - - // inherited from MultiBodyConstraintSolver - solveGroupCacheFriendlyFinish(bodies, numBodies, info); - - m_tmpMultiBodyConstraints = 0; - m_tmpNumMultiBodyConstraints = 0; + m_tmpMultiBodyConstraints = multiBodyConstraints; + m_tmpNumMultiBodyConstraints = numMultiBodyConstraints; + + // inherited from MultiBodyConstraintSolver + solveGroupCacheFriendlySetup(bodies, numBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer); + + // overriden + solveDeformableGroupIterations(bodies, numBodies, deformableBodies, numDeformableBodies, manifold, numManifolds, constraints, numConstraints, info, debugDrawer); + + // inherited from MultiBodyConstraintSolver + solveGroupCacheFriendlyFinish(bodies, numBodies, info); + + m_tmpMultiBodyConstraints = 0; + m_tmpNumMultiBodyConstraints = 0; } void btDeformableMultiBodyConstraintSolver::writeToSolverBody(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal) { - for (int i = 0; i < numBodies; i++) - { - int bodyId = getOrInitSolverBody(*bodies[i], infoGlobal.m_timeStep); + for (int i = 0; i < numBodies; i++) + { + int bodyId = getOrInitSolverBody(*bodies[i], infoGlobal.m_timeStep); - btRigidBody* body = btRigidBody::upcast(bodies[i]); - if (body && body->getInvMass()) - { - btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId]; - solverBody.m_linearVelocity = body->getLinearVelocity() - solverBody.m_deltaLinearVelocity; - solverBody.m_angularVelocity = body->getAngularVelocity() - solverBody.m_deltaAngularVelocity; - } - } + btRigidBody* body = btRigidBody::upcast(bodies[i]); + if (body && body->getInvMass()) + { + btSolverBody& solverBody = m_tmpSolverBodyPool[bodyId]; + solverBody.m_linearVelocity = body->getLinearVelocity() - solverBody.m_deltaLinearVelocity; + solverBody.m_angularVelocity = body->getAngularVelocity() - solverBody.m_deltaAngularVelocity; + } + } } void btDeformableMultiBodyConstraintSolver::solverBodyWriteBack(const btContactSolverInfo& infoGlobal) { - for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) - { - btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody; - if (body) - { - m_tmpSolverBodyPool[i].m_originalBody->setLinearVelocity(m_tmpSolverBodyPool[i].m_linearVelocity + m_tmpSolverBodyPool[i].m_deltaLinearVelocity); - m_tmpSolverBodyPool[i].m_originalBody->setAngularVelocity(m_tmpSolverBodyPool[i].m_angularVelocity+m_tmpSolverBodyPool[i].m_deltaAngularVelocity); - } - } + for (int i = 0; i < m_tmpSolverBodyPool.size(); i++) + { + btRigidBody* body = m_tmpSolverBodyPool[i].m_originalBody; + if (body) + { + m_tmpSolverBodyPool[i].m_originalBody->setLinearVelocity(m_tmpSolverBodyPool[i].m_linearVelocity + m_tmpSolverBodyPool[i].m_deltaLinearVelocity); + m_tmpSolverBodyPool[i].m_originalBody->setAngularVelocity(m_tmpSolverBodyPool[i].m_angularVelocity + m_tmpSolverBodyPool[i].m_deltaAngularVelocity); + } + } } -void btDeformableMultiBodyConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer) +void btDeformableMultiBodyConstraintSolver::solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer) { - BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations"); - int iteration; - if (infoGlobal.m_splitImpulse) - { - { -// m_deformableSolver->splitImpulseSetup(infoGlobal); - for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++) - { - btScalar leastSquaresResidual = 0.f; - { - int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); - int j; - for (j = 0; j < numPoolConstraints; j++) - { - const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]]; - - btScalar residual = resolveSplitPenetrationImpulse(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); - leastSquaresResidual = btMax(leastSquaresResidual, residual * residual); - } - // solve the position correction between deformable and rigid/multibody -// btScalar residual = m_deformableSolver->solveSplitImpulse(infoGlobal); -// leastSquaresResidual = btMax(leastSquaresResidual, residual * residual); - } - if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.m_numIterations - 1)) - { + BT_PROFILE("solveGroupCacheFriendlySplitImpulseIterations"); + int iteration; + if (infoGlobal.m_splitImpulse) + { + { + for (iteration = 0; iteration < infoGlobal.m_numIterations; iteration++) + { + btScalar leastSquaresResidual = 0.f; + { + int numPoolConstraints = m_tmpSolverContactConstraintPool.size(); + int j; + for (j = 0; j < numPoolConstraints; j++) + { + const btSolverConstraint& solveManifold = m_tmpSolverContactConstraintPool[m_orderTmpConstraintPool[j]]; + + btScalar residual = resolveSplitPenetrationImpulse(m_tmpSolverBodyPool[solveManifold.m_solverBodyIdA], m_tmpSolverBodyPool[solveManifold.m_solverBodyIdB], solveManifold); + leastSquaresResidual = btMax(leastSquaresResidual, residual * residual); + } + // solve the position correction between deformable and rigid/multibody + // btScalar residual = m_deformableSolver->solveSplitImpulse(infoGlobal); + btScalar residual = m_deformableSolver->m_objective->m_projection.solveSplitImpulse(deformableBodies, numDeformableBodies, infoGlobal); + leastSquaresResidual = btMax(leastSquaresResidual, residual * residual); + } + if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.m_numIterations - 1)) + { #ifdef VERBOSE_RESIDUAL_PRINTF - printf("residual = %f at iteration #%d\n", leastSquaresResidual, iteration); + if (iteration >= (infoGlobal.m_numIterations - 1)) + printf("split impulse residual = %f at iteration #%d\n", leastSquaresResidual, iteration); #endif - break; - } - } - } - } + break; + } + } + } + } } diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h index 0c7cc26a83..94aabce838 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyConstraintSolver.h @@ -13,7 +13,6 @@ 3. This notice may not be removed or altered from any source distribution. */ - #ifndef BT_DEFORMABLE_MULTIBODY_CONSTRAINT_SOLVER_H #define BT_DEFORMABLE_MULTIBODY_CONSTRAINT_SOLVER_H @@ -32,30 +31,31 @@ class btDeformableBodySolver; ATTRIBUTE_ALIGNED16(class) btDeformableMultiBodyConstraintSolver : public btMultiBodyConstraintSolver { - btDeformableBodySolver* m_deformableSolver; - + btDeformableBodySolver* m_deformableSolver; + protected: - // override the iterations method to include deformable/multibody contact -// virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer); - - // write the velocity of the the solver body to the underlying rigid body - void solverBodyWriteBack(const btContactSolverInfo& infoGlobal); - - // write the velocity of the underlying rigid body to the the the solver body - void writeToSolverBody(btCollisionObject** bodies, int numBodies, const btContactSolverInfo& infoGlobal); - - virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject** bodies, int numBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer); - - virtual btScalar solveDeformableGroupIterations(btCollisionObject** bodies,int numBodies,btCollisionObject** deformableBodies,int numDeformableBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer); + // override the iterations method to include deformable/multibody contact + // virtual btScalar solveGroupCacheFriendlyIterations(btCollisionObject** bodies,int numBodies,btPersistentManifold** manifoldPtr, int numManifolds,btTypedConstraint** constraints,int numConstraints,const btContactSolverInfo& infoGlobal,btIDebugDraw* debugDrawer); + + // write the velocity of the the solver body to the underlying rigid body + void solverBodyWriteBack(const btContactSolverInfo& infoGlobal); + + // write the velocity of the underlying rigid body to the the the solver body + void writeToSolverBody(btCollisionObject * *bodies, int numBodies, const btContactSolverInfo& infoGlobal); + + virtual void solveGroupCacheFriendlySplitImpulseIterations(btCollisionObject * *bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer); + + virtual btScalar solveDeformableGroupIterations(btCollisionObject * *bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifoldPtr, int numManifolds, btTypedConstraint** constraints, int numConstraints, const btContactSolverInfo& infoGlobal, btIDebugDraw* debugDrawer); + public: - BT_DECLARE_ALIGNED_ALLOCATOR(); - - void setDeformableSolver(btDeformableBodySolver* deformableSolver) - { - m_deformableSolver = deformableSolver; - } - - virtual void solveDeformableBodyGroup(btCollisionObject * *bodies, int numBodies, btCollisionObject * *deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher); + BT_DECLARE_ALIGNED_ALLOCATOR(); + + void setDeformableSolver(btDeformableBodySolver * deformableSolver) + { + m_deformableSolver = deformableSolver; + } + + virtual void solveDeformableBodyGroup(btCollisionObject * *bodies, int numBodies, btCollisionObject** deformableBodies, int numDeformableBodies, btPersistentManifold** manifold, int numManifolds, btTypedConstraint** constraints, int numConstraints, btMultiBodyConstraint** multiBodyConstraints, int numMultiBodyConstraints, const btContactSolverInfo& info, btIDebugDraw* debugDrawer, btDispatcher* dispatcher); }; #endif /* BT_DEFORMABLE_MULTIBODY_CONSTRAINT_SOLVER_H */ diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp index 6b742978ef..983e622b5f 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp +++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.cpp @@ -40,8 +40,9 @@ The algorithm also closely resembles the one in http://physbam.stanford.edu/~fed #include "LinearMath/btQuickprof.h" #include "btSoftBodyInternals.h" btDeformableMultiBodyDynamicsWorld::btDeformableMultiBodyDynamicsWorld(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btDeformableMultiBodyConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration, btDeformableBodySolver* deformableBodySolver) -: btMultiBodyDynamicsWorld(dispatcher, pairCache, (btMultiBodyConstraintSolver*)constraintSolver, collisionConfiguration), -m_deformableBodySolver(deformableBodySolver), m_solverCallback(0) + : btMultiBodyDynamicsWorld(dispatcher, pairCache, (btMultiBodyConstraintSolver*)constraintSolver, collisionConfiguration), + m_deformableBodySolver(deformableBodySolver), + m_solverCallback(0) { m_drawFlags = fDrawFlags::Std; m_drawNodeTree = true; @@ -52,7 +53,7 @@ m_deformableBodySolver(deformableBodySolver), m_solverCallback(0) m_sbi.m_sparsesdf.Initialize(); m_sbi.m_sparsesdf.setDefaultVoxelsz(0.005); m_sbi.m_sparsesdf.Reset(); - + m_sbi.air_density = (btScalar)1.2; m_sbi.water_density = 0; m_sbi.water_offset = 0; @@ -61,57 +62,57 @@ m_deformableBodySolver(deformableBodySolver), m_solverCallback(0) m_internalTime = 0.0; m_implicit = false; m_lineSearch = false; - m_useProjection = true; + m_useProjection = false; m_ccdIterations = 5; m_solverDeformableBodyIslandCallback = new DeformableBodyInplaceSolverIslandCallback(constraintSolver, dispatcher); } btDeformableMultiBodyDynamicsWorld::~btDeformableMultiBodyDynamicsWorld() { - delete m_solverDeformableBodyIslandCallback; + delete m_solverDeformableBodyIslandCallback; } void btDeformableMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar timeStep) { - BT_PROFILE("internalSingleStepSimulation"); - if (0 != m_internalPreTickCallback) - { - (*m_internalPreTickCallback)(this, timeStep); - } - reinitialize(timeStep); - - // add gravity to velocity of rigid and multi bodys - applyRigidBodyGravity(timeStep); - - ///apply gravity and explicit force to velocity, predict motion - predictUnconstraintMotion(timeStep); - - ///perform collision detection that involves rigid/multi bodies - btMultiBodyDynamicsWorld::performDiscreteCollisionDetection(); - - btMultiBodyDynamicsWorld::calculateSimulationIslands(); - - beforeSolverCallbacks(timeStep); - - ///solve contact constraints and then deformable bodies momemtum equation - solveConstraints(timeStep); - - afterSolverCallbacks(timeStep); + BT_PROFILE("internalSingleStepSimulation"); + if (0 != m_internalPreTickCallback) + { + (*m_internalPreTickCallback)(this, timeStep); + } + reinitialize(timeStep); + + // add gravity to velocity of rigid and multi bodys + applyRigidBodyGravity(timeStep); + + ///apply gravity and explicit force to velocity, predict motion + predictUnconstraintMotion(timeStep); + + ///perform collision detection that involves rigid/multi bodies + btMultiBodyDynamicsWorld::performDiscreteCollisionDetection(); + + btMultiBodyDynamicsWorld::calculateSimulationIslands(); + + beforeSolverCallbacks(timeStep); + + ///solve contact constraints and then deformable bodies momemtum equation + solveConstraints(timeStep); + + afterSolverCallbacks(timeStep); performDeformableCollisionDetection(); - applyRepulsionForce(timeStep); + applyRepulsionForce(timeStep); + + performGeometricCollisions(timeStep); + + integrateTransforms(timeStep); - performGeometricCollisions(timeStep); + ///update vehicle simulation + btMultiBodyDynamicsWorld::updateActions(timeStep); - integrateTransforms(timeStep); - - ///update vehicle simulation - btMultiBodyDynamicsWorld::updateActions(timeStep); - - updateActivationState(timeStep); - // End solver-wise simulation step - // /////////////////////////////// + updateActivationState(timeStep); + // End solver-wise simulation step + // /////////////////////////////// } void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection() @@ -120,7 +121,7 @@ void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection() { m_softBodies[i]->m_softSoftCollision = true; } - + for (int i = 0; i < m_softBodies.size(); ++i) { for (int j = i; j < m_softBodies.size(); ++j) @@ -128,7 +129,7 @@ void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection() m_softBodies[i]->defaultCollisionHandler(m_softBodies[j]); } } - + for (int i = 0; i < m_softBodies.size(); ++i) { m_softBodies[i]->m_softSoftCollision = false; @@ -137,45 +138,45 @@ void btDeformableMultiBodyDynamicsWorld::performDeformableCollisionDetection() void btDeformableMultiBodyDynamicsWorld::updateActivationState(btScalar timeStep) { - for (int i = 0; i < m_softBodies.size(); i++) - { - btSoftBody* psb = m_softBodies[i]; - psb->updateDeactivation(timeStep); - if (psb->wantsSleeping()) - { - if (psb->getActivationState() == ACTIVE_TAG) - psb->setActivationState(WANTS_DEACTIVATION); - if (psb->getActivationState() == ISLAND_SLEEPING) - { - psb->setZeroVelocity(); - } - } - else - { - if (psb->getActivationState() != DISABLE_DEACTIVATION) - psb->setActivationState(ACTIVE_TAG); - } - } - btMultiBodyDynamicsWorld::updateActivationState(timeStep); + for (int i = 0; i < m_softBodies.size(); i++) + { + btSoftBody* psb = m_softBodies[i]; + psb->updateDeactivation(timeStep); + if (psb->wantsSleeping()) + { + if (psb->getActivationState() == ACTIVE_TAG) + psb->setActivationState(WANTS_DEACTIVATION); + if (psb->getActivationState() == ISLAND_SLEEPING) + { + psb->setZeroVelocity(); + } + } + else + { + if (psb->getActivationState() != DISABLE_DEACTIVATION) + psb->setActivationState(ACTIVE_TAG); + } + } + btMultiBodyDynamicsWorld::updateActivationState(timeStep); } void btDeformableMultiBodyDynamicsWorld::applyRepulsionForce(btScalar timeStep) { - BT_PROFILE("btDeformableMultiBodyDynamicsWorld::applyRepulsionForce"); - for (int i = 0; i < m_softBodies.size(); i++) - { - btSoftBody* psb = m_softBodies[i]; - if (psb->isActive()) - { + BT_PROFILE("btDeformableMultiBodyDynamicsWorld::applyRepulsionForce"); + for (int i = 0; i < m_softBodies.size(); i++) + { + btSoftBody* psb = m_softBodies[i]; + if (psb->isActive()) + { psb->applyRepulsionForce(timeStep, true); - } - } + } + } } void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar timeStep) { BT_PROFILE("btDeformableMultiBodyDynamicsWorld::performGeometricCollisions"); - // refit the BVH tree for CCD + // refit the BVH tree for CCD for (int i = 0; i < m_softBodies.size(); ++i) { btSoftBody* psb = m_softBodies[i]; @@ -214,7 +215,7 @@ void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar tim f.m_vn = (f.m_n[1]->m_v - f.m_n[0]->m_v).cross(f.m_n[2]->m_v - f.m_n[0]->m_v) * timeStep * timeStep; } } - } + } // apply CCD to register new contact points for (int i = 0; i < m_softBodies.size(); ++i) @@ -228,7 +229,7 @@ void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar tim m_softBodies[i]->geometricCollisionHandler(m_softBodies[j]); } } - } + } int penetration_count = 0; for (int i = 0; i < m_softBodies.size(); ++i) @@ -258,294 +259,292 @@ void btDeformableMultiBodyDynamicsWorld::performGeometricCollisions(btScalar tim void btDeformableMultiBodyDynamicsWorld::softBodySelfCollision() { - BT_PROFILE("btDeformableMultiBodyDynamicsWorld::softBodySelfCollision"); - for (int i = 0; i < m_softBodies.size(); i++) - { - btSoftBody* psb = m_softBodies[i]; - if (psb->isActive()) - { - psb->defaultCollisionHandler(psb); - } - } + BT_PROFILE("btDeformableMultiBodyDynamicsWorld::softBodySelfCollision"); + for (int i = 0; i < m_softBodies.size(); i++) + { + btSoftBody* psb = m_softBodies[i]; + if (psb->isActive()) + { + psb->defaultCollisionHandler(psb); + } + } } void btDeformableMultiBodyDynamicsWorld::positionCorrection(btScalar timeStep) { - // correct the position of rigid bodies with temporary velocity generated from split impulse - btContactSolverInfo infoGlobal; - btVector3 zero(0,0,0); - for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i) - { - btRigidBody* rb = m_nonStaticRigidBodies[i]; - //correct the position/orientation based on push/turn recovery - btTransform newTransform; - btVector3 pushVelocity = rb->getPushVelocity(); - btVector3 turnVelocity = rb->getTurnVelocity(); - if (pushVelocity[0] != 0.f || pushVelocity[1] != 0 || pushVelocity[2] != 0 || turnVelocity[0] != 0.f || turnVelocity[1] != 0 || turnVelocity[2] != 0) - { - btTransformUtil::integrateTransform(rb->getWorldTransform(), pushVelocity, turnVelocity * infoGlobal.m_splitImpulseTurnErp, timeStep, newTransform); - rb->setWorldTransform(newTransform); - rb->setPushVelocity(zero); - rb->setTurnVelocity(zero); - } - } + // correct the position of rigid bodies with temporary velocity generated from split impulse + btContactSolverInfo infoGlobal; + btVector3 zero(0, 0, 0); + for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i) + { + btRigidBody* rb = m_nonStaticRigidBodies[i]; + //correct the position/orientation based on push/turn recovery + btTransform newTransform; + btVector3 pushVelocity = rb->getPushVelocity(); + btVector3 turnVelocity = rb->getTurnVelocity(); + if (pushVelocity[0] != 0.f || pushVelocity[1] != 0 || pushVelocity[2] != 0 || turnVelocity[0] != 0.f || turnVelocity[1] != 0 || turnVelocity[2] != 0) + { + btTransformUtil::integrateTransform(rb->getWorldTransform(), pushVelocity, turnVelocity * infoGlobal.m_splitImpulseTurnErp, timeStep, newTransform); + rb->setWorldTransform(newTransform); + rb->setPushVelocity(zero); + rb->setTurnVelocity(zero); + } + } } void btDeformableMultiBodyDynamicsWorld::integrateTransforms(btScalar timeStep) { - BT_PROFILE("integrateTransforms"); - positionCorrection(timeStep); - btMultiBodyDynamicsWorld::integrateTransforms(timeStep); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - btSoftBody::Node& node = psb->m_nodes[j]; - btScalar maxDisplacement = psb->getWorldInfo()->m_maxDisplacement; - btScalar clampDeltaV = maxDisplacement / timeStep; - for (int c = 0; c < 3; c++) - { - if (node.m_v[c] > clampDeltaV) - { - node.m_v[c] = clampDeltaV; - } - if (node.m_v[c] < -clampDeltaV) - { - node.m_v[c] = -clampDeltaV; - } - } - node.m_x = node.m_x + timeStep * node.m_v; - node.m_q = node.m_x; - node.m_vn = node.m_v; - } - // enforce anchor constraints - for (int j = 0; j < psb->m_deformableAnchors.size();++j) - { - btSoftBody::DeformableNodeRigidAnchor& a = psb->m_deformableAnchors[j]; - btSoftBody::Node* n = a.m_node; - n->m_x = a.m_cti.m_colObj->getWorldTransform() * a.m_local; - - // update multibody anchor info - if (a.m_cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(a.m_cti.m_colObj); - if (multibodyLinkCol) - { - btVector3 nrm; - const btCollisionShape* shp = multibodyLinkCol->getCollisionShape(); - const btTransform& wtr = multibodyLinkCol->getWorldTransform(); - psb->m_worldInfo->m_sparsesdf.Evaluate( - wtr.invXform(n->m_x), - shp, - nrm, - 0); - a.m_cti.m_normal = wtr.getBasis() * nrm; - btVector3 normal = a.m_cti.m_normal; - btVector3 t1 = generateUnitOrthogonalVector(normal); - btVector3 t2 = btCross(normal, t1); - btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; - findJacobian(multibodyLinkCol, jacobianData_normal, a.m_node->m_x, normal); - findJacobian(multibodyLinkCol, jacobianData_t1, a.m_node->m_x, t1); - findJacobian(multibodyLinkCol, jacobianData_t2, a.m_node->m_x, t2); - - btScalar* J_n = &jacobianData_normal.m_jacobians[0]; - btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; - btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; - - btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; - - btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), - t1.getX(), t1.getY(), t1.getZ(), - t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame - const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; - btMatrix3x3 local_impulse_matrix = (Diagonal(n->m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); - a.m_c0 = rot.transpose() * local_impulse_matrix * rot; - a.jacobianData_normal = jacobianData_normal; - a.jacobianData_t1 = jacobianData_t1; - a.jacobianData_t2 = jacobianData_t2; - a.t1 = t1; - a.t2 = t2; - } - } - } - psb->interpolateRenderMesh(); - } + BT_PROFILE("integrateTransforms"); + positionCorrection(timeStep); + btMultiBodyDynamicsWorld::integrateTransforms(timeStep); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + btSoftBody::Node& node = psb->m_nodes[j]; + btScalar maxDisplacement = psb->getWorldInfo()->m_maxDisplacement; + btScalar clampDeltaV = maxDisplacement / timeStep; + for (int c = 0; c < 3; c++) + { + if (node.m_v[c] > clampDeltaV) + { + node.m_v[c] = clampDeltaV; + } + if (node.m_v[c] < -clampDeltaV) + { + node.m_v[c] = -clampDeltaV; + } + } + node.m_x = node.m_x + timeStep * (node.m_v + node.m_splitv); + node.m_q = node.m_x; + node.m_vn = node.m_v; + } + // enforce anchor constraints + for (int j = 0; j < psb->m_deformableAnchors.size(); ++j) + { + btSoftBody::DeformableNodeRigidAnchor& a = psb->m_deformableAnchors[j]; + btSoftBody::Node* n = a.m_node; + n->m_x = a.m_cti.m_colObj->getWorldTransform() * a.m_local; + + // update multibody anchor info + if (a.m_cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(a.m_cti.m_colObj); + if (multibodyLinkCol) + { + btVector3 nrm; + const btCollisionShape* shp = multibodyLinkCol->getCollisionShape(); + const btTransform& wtr = multibodyLinkCol->getWorldTransform(); + psb->m_worldInfo->m_sparsesdf.Evaluate( + wtr.invXform(n->m_x), + shp, + nrm, + 0); + a.m_cti.m_normal = wtr.getBasis() * nrm; + btVector3 normal = a.m_cti.m_normal; + btVector3 t1 = generateUnitOrthogonalVector(normal); + btVector3 t2 = btCross(normal, t1); + btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; + findJacobian(multibodyLinkCol, jacobianData_normal, a.m_node->m_x, normal); + findJacobian(multibodyLinkCol, jacobianData_t1, a.m_node->m_x, t1); + findJacobian(multibodyLinkCol, jacobianData_t2, a.m_node->m_x, t2); + + btScalar* J_n = &jacobianData_normal.m_jacobians[0]; + btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; + btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; + + btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; + + btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), + t1.getX(), t1.getY(), t1.getZ(), + t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + btMatrix3x3 local_impulse_matrix = (Diagonal(n->m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); + a.m_c0 = rot.transpose() * local_impulse_matrix * rot; + a.jacobianData_normal = jacobianData_normal; + a.jacobianData_t1 = jacobianData_t1; + a.jacobianData_t2 = jacobianData_t2; + a.t1 = t1; + a.t2 = t2; + } + } + } + psb->interpolateRenderMesh(); + } } void btDeformableMultiBodyDynamicsWorld::solveConstraints(btScalar timeStep) { - BT_PROFILE("btDeformableMultiBodyDynamicsWorld::solveConstraints"); - // save v_{n+1}^* velocity after explicit forces - m_deformableBodySolver->backupVelocity(); - - // set up constraints among multibodies and between multibodies and deformable bodies - setupConstraints(); - - // solve contact constraints - solveContactConstraints(); - - // set up the directions in which the velocity does not change in the momentum solve - if (m_useProjection) - m_deformableBodySolver->m_objective->m_projection.setProjection(); - else - m_deformableBodySolver->m_objective->m_projection.setLagrangeMultiplier(); - - // for explicit scheme, m_backupVelocity = v_{n+1}^* - // for implicit scheme, m_backupVelocity = v_n - // Here, set dv = v_{n+1} - v_n for nodes in contact - m_deformableBodySolver->setupDeformableSolve(m_implicit); - - // At this point, dv should be golden for nodes in contact - // proceed to solve deformable momentum equation - m_deformableBodySolver->solveDeformableConstraints(timeStep); + BT_PROFILE("btDeformableMultiBodyDynamicsWorld::solveConstraints"); + // save v_{n+1}^* velocity after explicit forces + m_deformableBodySolver->backupVelocity(); + + // set up constraints among multibodies and between multibodies and deformable bodies + setupConstraints(); + + // solve contact constraints + solveContactConstraints(); + + // set up the directions in which the velocity does not change in the momentum solve + if (m_useProjection) + m_deformableBodySolver->m_objective->m_projection.setProjection(); + else + m_deformableBodySolver->m_objective->m_projection.setLagrangeMultiplier(); + + // for explicit scheme, m_backupVelocity = v_{n+1}^* + // for implicit scheme, m_backupVelocity = v_n + // Here, set dv = v_{n+1} - v_n for nodes in contact + m_deformableBodySolver->setupDeformableSolve(m_implicit); + + // At this point, dv should be golden for nodes in contact + // proceed to solve deformable momentum equation + m_deformableBodySolver->solveDeformableConstraints(timeStep); } void btDeformableMultiBodyDynamicsWorld::setupConstraints() { - // set up constraints between multibody and deformable bodies - m_deformableBodySolver->setConstraints(m_solverInfo); - - // set up constraints among multibodies - { - sortConstraints(); - // setup the solver callback - btMultiBodyConstraint** sortedMultiBodyConstraints = m_sortedMultiBodyConstraints.size() ? &m_sortedMultiBodyConstraints[0] : 0; - btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0; - m_solverDeformableBodyIslandCallback->setup(&m_solverInfo, constraintsPtr, m_sortedConstraints.size(), sortedMultiBodyConstraints, m_sortedMultiBodyConstraints.size(), getDebugDrawer()); - - // build islands - m_islandManager->buildIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld()); - } + // set up constraints between multibody and deformable bodies + m_deformableBodySolver->setConstraints(m_solverInfo); + + // set up constraints among multibodies + { + sortConstraints(); + // setup the solver callback + btMultiBodyConstraint** sortedMultiBodyConstraints = m_sortedMultiBodyConstraints.size() ? &m_sortedMultiBodyConstraints[0] : 0; + btTypedConstraint** constraintsPtr = getNumConstraints() ? &m_sortedConstraints[0] : 0; + m_solverDeformableBodyIslandCallback->setup(&m_solverInfo, constraintsPtr, m_sortedConstraints.size(), sortedMultiBodyConstraints, m_sortedMultiBodyConstraints.size(), getDebugDrawer()); + + // build islands + m_islandManager->buildIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld()); + } } void btDeformableMultiBodyDynamicsWorld::sortConstraints() { - m_sortedConstraints.resize(m_constraints.size()); - int i; - for (i = 0; i < getNumConstraints(); i++) - { - m_sortedConstraints[i] = m_constraints[i]; - } - m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate2()); - - m_sortedMultiBodyConstraints.resize(m_multiBodyConstraints.size()); - for (i = 0; i < m_multiBodyConstraints.size(); i++) - { - m_sortedMultiBodyConstraints[i] = m_multiBodyConstraints[i]; - } - m_sortedMultiBodyConstraints.quickSort(btSortMultiBodyConstraintOnIslandPredicate()); + m_sortedConstraints.resize(m_constraints.size()); + int i; + for (i = 0; i < getNumConstraints(); i++) + { + m_sortedConstraints[i] = m_constraints[i]; + } + m_sortedConstraints.quickSort(btSortConstraintOnIslandPredicate2()); + + m_sortedMultiBodyConstraints.resize(m_multiBodyConstraints.size()); + for (i = 0; i < m_multiBodyConstraints.size(); i++) + { + m_sortedMultiBodyConstraints[i] = m_multiBodyConstraints[i]; + } + m_sortedMultiBodyConstraints.quickSort(btSortMultiBodyConstraintOnIslandPredicate()); } - - + void btDeformableMultiBodyDynamicsWorld::solveContactConstraints() { - // process constraints on each island - m_islandManager->processIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_solverDeformableBodyIslandCallback); - - // process deferred - m_solverDeformableBodyIslandCallback->processConstraints(); - m_constraintSolver->allSolved(m_solverInfo, m_debugDrawer); - - // write joint feedback - { - for (int i = 0; i < this->m_multiBodies.size(); i++) - { - btMultiBody* bod = m_multiBodies[i]; - - bool isSleeping = false; - - if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING) - { - isSleeping = true; - } - for (int b = 0; b < bod->getNumLinks(); b++) - { - if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING) - isSleeping = true; - } - - if (!isSleeping) - { - //useless? they get resized in stepVelocities once again (AND DIFFERENTLY) - m_scratch_r.resize(bod->getNumLinks() + 1); //multidof? ("Y"s use it and it is used to store qdd) - m_scratch_v.resize(bod->getNumLinks() + 1); - m_scratch_m.resize(bod->getNumLinks() + 1); - - if (bod->internalNeedsJointFeedback()) - { - if (!bod->isUsingRK4Integration()) - { - if (bod->internalNeedsJointFeedback()) - { - bool isConstraintPass = true; - bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep, m_scratch_r, m_scratch_v, m_scratch_m, isConstraintPass, - getSolverInfo().m_jointFeedbackInWorldSpace, - getSolverInfo().m_jointFeedbackInJointFrame); - } - } - } - } - } - } - - for (int i = 0; i < this->m_multiBodies.size(); i++) - { - btMultiBody* bod = m_multiBodies[i]; - bod->processDeltaVeeMultiDof2(); - } + // process constraints on each island + m_islandManager->processIslands(getCollisionWorld()->getDispatcher(), getCollisionWorld(), m_solverDeformableBodyIslandCallback); + + // process deferred + m_solverDeformableBodyIslandCallback->processConstraints(); + m_constraintSolver->allSolved(m_solverInfo, m_debugDrawer); + + // write joint feedback + { + for (int i = 0; i < this->m_multiBodies.size(); i++) + { + btMultiBody* bod = m_multiBodies[i]; + + bool isSleeping = false; + + if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING) + { + isSleeping = true; + } + for (int b = 0; b < bod->getNumLinks(); b++) + { + if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING) + isSleeping = true; + } + + if (!isSleeping) + { + //useless? they get resized in stepVelocities once again (AND DIFFERENTLY) + m_scratch_r.resize(bod->getNumLinks() + 1); //multidof? ("Y"s use it and it is used to store qdd) + m_scratch_v.resize(bod->getNumLinks() + 1); + m_scratch_m.resize(bod->getNumLinks() + 1); + + if (bod->internalNeedsJointFeedback()) + { + if (!bod->isUsingRK4Integration()) + { + if (bod->internalNeedsJointFeedback()) + { + bool isConstraintPass = true; + bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep, m_scratch_r, m_scratch_v, m_scratch_m, isConstraintPass, + getSolverInfo().m_jointFeedbackInWorldSpace, + getSolverInfo().m_jointFeedbackInJointFrame); + } + } + } + } + } + } + + for (int i = 0; i < this->m_multiBodies.size(); i++) + { + btMultiBody* bod = m_multiBodies[i]; + bod->processDeltaVeeMultiDof2(); + } } void btDeformableMultiBodyDynamicsWorld::addSoftBody(btSoftBody* body, int collisionFilterGroup, int collisionFilterMask) { - m_softBodies.push_back(body); - - // Set the soft body solver that will deal with this body - // to be the world's solver - body->setSoftBodySolver(m_deformableBodySolver); - - btCollisionWorld::addCollisionObject(body, - collisionFilterGroup, - collisionFilterMask); + m_softBodies.push_back(body); + + // Set the soft body solver that will deal with this body + // to be the world's solver + body->setSoftBodySolver(m_deformableBodySolver); + + btCollisionWorld::addCollisionObject(body, + collisionFilterGroup, + collisionFilterMask); } void btDeformableMultiBodyDynamicsWorld::predictUnconstraintMotion(btScalar timeStep) { - BT_PROFILE("predictUnconstraintMotion"); - btMultiBodyDynamicsWorld::predictUnconstraintMotion(timeStep); - m_deformableBodySolver->predictMotion(timeStep); + BT_PROFILE("predictUnconstraintMotion"); + btMultiBodyDynamicsWorld::predictUnconstraintMotion(timeStep); + m_deformableBodySolver->predictMotion(timeStep); } void btDeformableMultiBodyDynamicsWorld::reinitialize(btScalar timeStep) { - m_internalTime += timeStep; - m_deformableBodySolver->setImplicit(m_implicit); - m_deformableBodySolver->setLineSearch(m_lineSearch); - m_deformableBodySolver->reinitialize(m_softBodies, timeStep); - btDispatcherInfo& dispatchInfo = btMultiBodyDynamicsWorld::getDispatchInfo(); - dispatchInfo.m_timeStep = timeStep; - dispatchInfo.m_stepCount = 0; - dispatchInfo.m_debugDraw = btMultiBodyDynamicsWorld::getDebugDrawer(); - btMultiBodyDynamicsWorld::getSolverInfo().m_timeStep = timeStep; - if (m_useProjection) - { - m_deformableBodySolver->m_useProjection = true; -// m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = true; - m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_massPreconditioner; - } - else - { - m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_KKTPreconditioner; - } - + m_internalTime += timeStep; + m_deformableBodySolver->setImplicit(m_implicit); + m_deformableBodySolver->setLineSearch(m_lineSearch); + m_deformableBodySolver->reinitialize(m_softBodies, timeStep); + btDispatcherInfo& dispatchInfo = btMultiBodyDynamicsWorld::getDispatchInfo(); + dispatchInfo.m_timeStep = timeStep; + dispatchInfo.m_stepCount = 0; + dispatchInfo.m_debugDraw = btMultiBodyDynamicsWorld::getDebugDrawer(); + btMultiBodyDynamicsWorld::getSolverInfo().m_timeStep = timeStep; + if (m_useProjection) + { + m_deformableBodySolver->m_useProjection = true; + m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = true; + m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_massPreconditioner; + } + else + { + m_deformableBodySolver->m_useProjection = false; + m_deformableBodySolver->m_objective->m_projection.m_useStrainLimiting = false; + m_deformableBodySolver->m_objective->m_preconditioner = m_deformableBodySolver->m_objective->m_KKTPreconditioner; + } } - void btDeformableMultiBodyDynamicsWorld::debugDrawWorld() { - btMultiBodyDynamicsWorld::debugDrawWorld(); for (int i = 0; i < getSoftBodyArray().size(); i++) @@ -556,253 +555,260 @@ void btDeformableMultiBodyDynamicsWorld::debugDrawWorld() btSoftBodyHelpers::Draw(psb, getDebugDrawer(), getDrawFlags()); } } - - } void btDeformableMultiBodyDynamicsWorld::applyRigidBodyGravity(btScalar timeStep) { - // Gravity is applied in stepSimulation and then cleared here and then applied here and then cleared here again - // so that 1) gravity is applied to velocity before constraint solve and 2) gravity is applied in each substep - // when there are multiple substeps - btMultiBodyDynamicsWorld::applyGravity(); - // integrate rigid body gravity - for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i) - { - btRigidBody* rb = m_nonStaticRigidBodies[i]; - rb->integrateVelocities(timeStep); - } - - // integrate multibody gravity - { - forwardKinematics(); - clearMultiBodyConstraintForces(); - { - for (int i = 0; i < this->m_multiBodies.size(); i++) - { - btMultiBody* bod = m_multiBodies[i]; - - bool isSleeping = false; - - if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING) - { - isSleeping = true; - } - for (int b = 0; b < bod->getNumLinks(); b++) - { - if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING) - isSleeping = true; - } - - if (!isSleeping) - { - m_scratch_r.resize(bod->getNumLinks() + 1); - m_scratch_v.resize(bod->getNumLinks() + 1); - m_scratch_m.resize(bod->getNumLinks() + 1); - bool isConstraintPass = false; - { - if (!bod->isUsingRK4Integration()) - { - bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep, - m_scratch_r, m_scratch_v, m_scratch_m,isConstraintPass, - getSolverInfo().m_jointFeedbackInWorldSpace, - getSolverInfo().m_jointFeedbackInJointFrame); - } - else - { - btAssert(" RK4Integration is not supported" ); - } - } - } - } - } - } - clearGravity(); + // Gravity is applied in stepSimulation and then cleared here and then applied here and then cleared here again + // so that 1) gravity is applied to velocity before constraint solve and 2) gravity is applied in each substep + // when there are multiple substeps + btMultiBodyDynamicsWorld::applyGravity(); + // integrate rigid body gravity + for (int i = 0; i < m_nonStaticRigidBodies.size(); ++i) + { + btRigidBody* rb = m_nonStaticRigidBodies[i]; + rb->integrateVelocities(timeStep); + } + + // integrate multibody gravity + { + forwardKinematics(); + clearMultiBodyConstraintForces(); + { + for (int i = 0; i < this->m_multiBodies.size(); i++) + { + btMultiBody* bod = m_multiBodies[i]; + + bool isSleeping = false; + + if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING) + { + isSleeping = true; + } + for (int b = 0; b < bod->getNumLinks(); b++) + { + if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING) + isSleeping = true; + } + + if (!isSleeping) + { + m_scratch_r.resize(bod->getNumLinks() + 1); + m_scratch_v.resize(bod->getNumLinks() + 1); + m_scratch_m.resize(bod->getNumLinks() + 1); + bool isConstraintPass = false; + { + if (!bod->isUsingRK4Integration()) + { + bod->computeAccelerationsArticulatedBodyAlgorithmMultiDof(m_solverInfo.m_timeStep, + m_scratch_r, m_scratch_v, m_scratch_m, isConstraintPass, + getSolverInfo().m_jointFeedbackInWorldSpace, + getSolverInfo().m_jointFeedbackInJointFrame); + } + else + { + btAssert(" RK4Integration is not supported"); + } + } + } + } + } + } + clearGravity(); } void btDeformableMultiBodyDynamicsWorld::clearGravity() { - BT_PROFILE("btMultiBody clearGravity"); - // clear rigid body gravity - for (int i = 0; i < m_nonStaticRigidBodies.size(); i++) - { - btRigidBody* body = m_nonStaticRigidBodies[i]; - if (body->isActive()) - { - body->clearGravity(); - } - } - // clear multibody gravity - for (int i = 0; i < this->m_multiBodies.size(); i++) - { - btMultiBody* bod = m_multiBodies[i]; - - bool isSleeping = false; - - if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING) - { - isSleeping = true; - } - for (int b = 0; b < bod->getNumLinks(); b++) - { - if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING) - isSleeping = true; - } - - if (!isSleeping) - { - bod->addBaseForce(-m_gravity * bod->getBaseMass()); - - for (int j = 0; j < bod->getNumLinks(); ++j) - { - bod->addLinkForce(j, -m_gravity * bod->getLinkMass(j)); - } - } - } + BT_PROFILE("btMultiBody clearGravity"); + // clear rigid body gravity + for (int i = 0; i < m_nonStaticRigidBodies.size(); i++) + { + btRigidBody* body = m_nonStaticRigidBodies[i]; + if (body->isActive()) + { + body->clearGravity(); + } + } + // clear multibody gravity + for (int i = 0; i < this->m_multiBodies.size(); i++) + { + btMultiBody* bod = m_multiBodies[i]; + + bool isSleeping = false; + + if (bod->getBaseCollider() && bod->getBaseCollider()->getActivationState() == ISLAND_SLEEPING) + { + isSleeping = true; + } + for (int b = 0; b < bod->getNumLinks(); b++) + { + if (bod->getLink(b).m_collider && bod->getLink(b).m_collider->getActivationState() == ISLAND_SLEEPING) + isSleeping = true; + } + + if (!isSleeping) + { + bod->addBaseForce(-m_gravity * bod->getBaseMass()); + + for (int j = 0; j < bod->getNumLinks(); ++j) + { + bod->addLinkForce(j, -m_gravity * bod->getLinkMass(j)); + } + } + } } void btDeformableMultiBodyDynamicsWorld::beforeSolverCallbacks(btScalar timeStep) { - if (0 != m_internalTickCallback) - { - (*m_internalTickCallback)(this, timeStep); - } - - if (0 != m_solverCallback) - { - (*m_solverCallback)(m_internalTime, this); - } + if (0 != m_internalTickCallback) + { + (*m_internalTickCallback)(this, timeStep); + } + + if (0 != m_solverCallback) + { + (*m_solverCallback)(m_internalTime, this); + } } void btDeformableMultiBodyDynamicsWorld::afterSolverCallbacks(btScalar timeStep) { - if (0 != m_solverCallback) - { - (*m_solverCallback)(m_internalTime, this); - } + if (0 != m_solverCallback) + { + (*m_solverCallback)(m_internalTime, this); + } } void btDeformableMultiBodyDynamicsWorld::addForce(btSoftBody* psb, btDeformableLagrangianForce* force) { - btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf; - bool added = false; - for (int i = 0; i < forces.size(); ++i) - { - if (forces[i]->getForceType() == force->getForceType()) - { - forces[i]->addSoftBody(psb); - added = true; - break; - } - } - if (!added) - { - force->addSoftBody(psb); - force->setIndices(m_deformableBodySolver->m_objective->getIndices()); - forces.push_back(force); - } + btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf; + bool added = false; + for (int i = 0; i < forces.size(); ++i) + { + if (forces[i]->getForceType() == force->getForceType()) + { + forces[i]->addSoftBody(psb); + added = true; + break; + } + } + if (!added) + { + force->addSoftBody(psb); + force->setIndices(m_deformableBodySolver->m_objective->getIndices()); + forces.push_back(force); + } } void btDeformableMultiBodyDynamicsWorld::removeForce(btSoftBody* psb, btDeformableLagrangianForce* force) { - btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf; - int removed_index = -1; - for (int i = 0; i < forces.size(); ++i) - { - if (forces[i]->getForceType() == force->getForceType()) - { - forces[i]->removeSoftBody(psb); - if (forces[i]->m_softBodies.size() == 0) - removed_index = i; - break; - } - } - if (removed_index >= 0) - forces.removeAtIndex(removed_index); + btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf; + int removed_index = -1; + for (int i = 0; i < forces.size(); ++i) + { + if (forces[i]->getForceType() == force->getForceType()) + { + forces[i]->removeSoftBody(psb); + if (forces[i]->m_softBodies.size() == 0) + removed_index = i; + break; + } + } + if (removed_index >= 0) + forces.removeAtIndex(removed_index); +} + +void btDeformableMultiBodyDynamicsWorld::removeSoftBodyForce(btSoftBody* psb) +{ + btAlignedObjectArray<btDeformableLagrangianForce*>& forces = m_deformableBodySolver->m_objective->m_lf; + for (int i = 0; i < forces.size(); ++i) + { + forces[i]->removeSoftBody(psb); + } } void btDeformableMultiBodyDynamicsWorld::removeSoftBody(btSoftBody* body) { - m_softBodies.remove(body); - btCollisionWorld::removeCollisionObject(body); - // force a reinitialize so that node indices get updated. - m_deformableBodySolver->reinitialize(m_softBodies, btScalar(-1)); + removeSoftBodyForce(body); + m_softBodies.remove(body); + btCollisionWorld::removeCollisionObject(body); + // force a reinitialize so that node indices get updated. + m_deformableBodySolver->reinitialize(m_softBodies, btScalar(-1)); } void btDeformableMultiBodyDynamicsWorld::removeCollisionObject(btCollisionObject* collisionObject) { - btSoftBody* body = btSoftBody::upcast(collisionObject); - if (body) - removeSoftBody(body); - else - btDiscreteDynamicsWorld::removeCollisionObject(collisionObject); + btSoftBody* body = btSoftBody::upcast(collisionObject); + if (body) + removeSoftBody(body); + else + btDiscreteDynamicsWorld::removeCollisionObject(collisionObject); } - int btDeformableMultiBodyDynamicsWorld::stepSimulation(btScalar timeStep, int maxSubSteps, btScalar fixedTimeStep) { - startProfiling(timeStep); - - int numSimulationSubSteps = 0; - - if (maxSubSteps) - { - //fixed timestep with interpolation - m_fixedTimeStep = fixedTimeStep; - m_localTime += timeStep; - if (m_localTime >= fixedTimeStep) - { - numSimulationSubSteps = int(m_localTime / fixedTimeStep); - m_localTime -= numSimulationSubSteps * fixedTimeStep; - } - } - else - { - //variable timestep - fixedTimeStep = timeStep; - m_localTime = m_latencyMotionStateInterpolation ? 0 : timeStep; - m_fixedTimeStep = 0; - if (btFuzzyZero(timeStep)) - { - numSimulationSubSteps = 0; - maxSubSteps = 0; - } - else - { - numSimulationSubSteps = 1; - maxSubSteps = 1; - } - } - - //process some debugging flags - if (getDebugDrawer()) - { - btIDebugDraw* debugDrawer = getDebugDrawer(); - gDisableDeactivation = (debugDrawer->getDebugMode() & btIDebugDraw::DBG_NoDeactivation) != 0; - } - if (numSimulationSubSteps) - { - //clamp the number of substeps, to prevent simulation grinding spiralling down to a halt - int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps) ? maxSubSteps : numSimulationSubSteps; - - saveKinematicState(fixedTimeStep * clampedSimulationSteps); - - for (int i = 0; i < clampedSimulationSteps; i++) - { - internalSingleStepSimulation(fixedTimeStep); - synchronizeMotionStates(); - } - } - else - { - synchronizeMotionStates(); - } - - clearForces(); - + startProfiling(timeStep); + + int numSimulationSubSteps = 0; + + if (maxSubSteps) + { + //fixed timestep with interpolation + m_fixedTimeStep = fixedTimeStep; + m_localTime += timeStep; + if (m_localTime >= fixedTimeStep) + { + numSimulationSubSteps = int(m_localTime / fixedTimeStep); + m_localTime -= numSimulationSubSteps * fixedTimeStep; + } + } + else + { + //variable timestep + fixedTimeStep = timeStep; + m_localTime = m_latencyMotionStateInterpolation ? 0 : timeStep; + m_fixedTimeStep = 0; + if (btFuzzyZero(timeStep)) + { + numSimulationSubSteps = 0; + maxSubSteps = 0; + } + else + { + numSimulationSubSteps = 1; + maxSubSteps = 1; + } + } + + //process some debugging flags + if (getDebugDrawer()) + { + btIDebugDraw* debugDrawer = getDebugDrawer(); + gDisableDeactivation = (debugDrawer->getDebugMode() & btIDebugDraw::DBG_NoDeactivation) != 0; + } + if (numSimulationSubSteps) + { + //clamp the number of substeps, to prevent simulation grinding spiralling down to a halt + int clampedSimulationSteps = (numSimulationSubSteps > maxSubSteps) ? maxSubSteps : numSimulationSubSteps; + + saveKinematicState(fixedTimeStep * clampedSimulationSteps); + + for (int i = 0; i < clampedSimulationSteps; i++) + { + internalSingleStepSimulation(fixedTimeStep); + synchronizeMotionStates(); + } + } + else + { + synchronizeMotionStates(); + } + + clearForces(); + #ifndef BT_NO_PROFILE - CProfileManager::Increment_Frame_Counter(); + CProfileManager::Increment_Frame_Counter(); #endif //BT_NO_PROFILE - - return numSimulationSubSteps; + + return numSimulationSubSteps; } diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h index 76b58a0378..4b7069aac7 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableMultiBodyDynamicsWorld.h @@ -36,185 +36,192 @@ typedef btAlignedObjectArray<btSoftBody*> btSoftBodyArray; class btDeformableMultiBodyDynamicsWorld : public btMultiBodyDynamicsWorld { - typedef btAlignedObjectArray<btVector3> TVStack; - ///Solver classes that encapsulate multiple deformable bodies for solving - btDeformableBodySolver* m_deformableBodySolver; - btSoftBodyArray m_softBodies; - int m_drawFlags; - bool m_drawNodeTree; - bool m_drawFaceTree; - bool m_drawClusterTree; - btSoftBodyWorldInfo m_sbi; - btScalar m_internalTime; - int m_ccdIterations; - bool m_implicit; - bool m_lineSearch; - bool m_useProjection; - DeformableBodyInplaceSolverIslandCallback* m_solverDeformableBodyIslandCallback; - - typedef void (*btSolverCallback)(btScalar time, btDeformableMultiBodyDynamicsWorld* world); - btSolverCallback m_solverCallback; - + typedef btAlignedObjectArray<btVector3> TVStack; + ///Solver classes that encapsulate multiple deformable bodies for solving + btDeformableBodySolver* m_deformableBodySolver; + btSoftBodyArray m_softBodies; + int m_drawFlags; + bool m_drawNodeTree; + bool m_drawFaceTree; + bool m_drawClusterTree; + btSoftBodyWorldInfo m_sbi; + btScalar m_internalTime; + int m_ccdIterations; + bool m_implicit; + bool m_lineSearch; + bool m_useProjection; + DeformableBodyInplaceSolverIslandCallback* m_solverDeformableBodyIslandCallback; + + typedef void (*btSolverCallback)(btScalar time, btDeformableMultiBodyDynamicsWorld* world); + btSolverCallback m_solverCallback; + protected: - virtual void internalSingleStepSimulation(btScalar timeStep); - - virtual void integrateTransforms(btScalar timeStep); - - void positionCorrection(btScalar timeStep); - - void solveConstraints(btScalar timeStep); - - void updateActivationState(btScalar timeStep); - - void clearGravity(); - + virtual void internalSingleStepSimulation(btScalar timeStep); + + virtual void integrateTransforms(btScalar timeStep); + + void positionCorrection(btScalar timeStep); + + void solveConstraints(btScalar timeStep); + + void updateActivationState(btScalar timeStep); + + void clearGravity(); + public: btDeformableMultiBodyDynamicsWorld(btDispatcher* dispatcher, btBroadphaseInterface* pairCache, btDeformableMultiBodyConstraintSolver* constraintSolver, btCollisionConfiguration* collisionConfiguration, btDeformableBodySolver* deformableBodySolver = 0); - virtual int stepSimulation(btScalar timeStep, int maxSubSteps = 1, btScalar fixedTimeStep = btScalar(1.) / btScalar(60.)); + virtual int stepSimulation(btScalar timeStep, int maxSubSteps = 1, btScalar fixedTimeStep = btScalar(1.) / btScalar(60.)); virtual void debugDrawWorld(); - void setSolverCallback(btSolverCallback cb) - { - m_solverCallback = cb; - } - - virtual ~btDeformableMultiBodyDynamicsWorld(); - - virtual btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld() - { - return (btMultiBodyDynamicsWorld*)(this); - } - - virtual const btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld() const - { - return (const btMultiBodyDynamicsWorld*)(this); - } - - virtual btDynamicsWorldType getWorldType() const - { - return BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD; - } - - virtual void predictUnconstraintMotion(btScalar timeStep); - - virtual void addSoftBody(btSoftBody* body, int collisionFilterGroup = btBroadphaseProxy::DefaultFilter, int collisionFilterMask = btBroadphaseProxy::AllFilter); - - btSoftBodyArray& getSoftBodyArray() - { - return m_softBodies; - } - - const btSoftBodyArray& getSoftBodyArray() const - { - return m_softBodies; - } - - btSoftBodyWorldInfo& getWorldInfo() - { - return m_sbi; - } - - const btSoftBodyWorldInfo& getWorldInfo() const - { - return m_sbi; - } - - void reinitialize(btScalar timeStep); - - void applyRigidBodyGravity(btScalar timeStep); - - void beforeSolverCallbacks(btScalar timeStep); - - void afterSolverCallbacks(btScalar timeStep); - - void addForce(btSoftBody* psb, btDeformableLagrangianForce* force); - - void removeForce(btSoftBody* psb, btDeformableLagrangianForce* force); - - void removeSoftBody(btSoftBody* body); - - void removeCollisionObject(btCollisionObject* collisionObject); - - int getDrawFlags() const { return (m_drawFlags); } - void setDrawFlags(int f) { m_drawFlags = f; } - - void setupConstraints(); - - void performDeformableCollisionDetection(); - - void solveMultiBodyConstraints(); - - void solveContactConstraints(); - - void sortConstraints(); - - void softBodySelfCollision(); - - void setImplicit(bool implicit) - { - m_implicit = implicit; - } - - void setLineSearch(bool lineSearch) - { - m_lineSearch = lineSearch; - } - - void applyRepulsionForce(btScalar timeStep); - - void performGeometricCollisions(btScalar timeStep); - - struct btDeformableSingleRayCallback : public btBroadphaseRayCallback - { - btVector3 m_rayFromWorld; - btVector3 m_rayToWorld; - btTransform m_rayFromTrans; - btTransform m_rayToTrans; - btVector3 m_hitNormal; - - const btDeformableMultiBodyDynamicsWorld* m_world; - btCollisionWorld::RayResultCallback& m_resultCallback; - - btDeformableSingleRayCallback(const btVector3& rayFromWorld, const btVector3& rayToWorld, const btDeformableMultiBodyDynamicsWorld* world, btCollisionWorld::RayResultCallback& resultCallback) - : m_rayFromWorld(rayFromWorld), - m_rayToWorld(rayToWorld), - m_world(world), - m_resultCallback(resultCallback) - { - m_rayFromTrans.setIdentity(); - m_rayFromTrans.setOrigin(m_rayFromWorld); - m_rayToTrans.setIdentity(); - m_rayToTrans.setOrigin(m_rayToWorld); - - btVector3 rayDir = (rayToWorld - rayFromWorld); - - rayDir.normalize(); - ///what about division by zero? --> just set rayDirection[i] to INF/1e30 - m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0]; - m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1]; - m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2]; - m_signs[0] = m_rayDirectionInverse[0] < 0.0; - m_signs[1] = m_rayDirectionInverse[1] < 0.0; - m_signs[2] = m_rayDirectionInverse[2] < 0.0; - - m_lambda_max = rayDir.dot(m_rayToWorld - m_rayFromWorld); - } - - virtual bool process(const btBroadphaseProxy* proxy) - { - ///terminate further ray tests, once the closestHitFraction reached zero - if (m_resultCallback.m_closestHitFraction == btScalar(0.f)) - return false; - - btCollisionObject* collisionObject = (btCollisionObject*)proxy->m_clientObject; - - //only perform raycast if filterMask matches - if (m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) - { - //RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject(); - //btVector3 collisionObjectAabbMin,collisionObjectAabbMax; + void setSolverCallback(btSolverCallback cb) + { + m_solverCallback = cb; + } + + virtual ~btDeformableMultiBodyDynamicsWorld(); + + virtual btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld() + { + return (btMultiBodyDynamicsWorld*)(this); + } + + virtual const btMultiBodyDynamicsWorld* getMultiBodyDynamicsWorld() const + { + return (const btMultiBodyDynamicsWorld*)(this); + } + + virtual btDynamicsWorldType getWorldType() const + { + return BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD; + } + + virtual void predictUnconstraintMotion(btScalar timeStep); + + virtual void addSoftBody(btSoftBody* body, int collisionFilterGroup = btBroadphaseProxy::DefaultFilter, int collisionFilterMask = btBroadphaseProxy::AllFilter); + + btSoftBodyArray& getSoftBodyArray() + { + return m_softBodies; + } + + const btSoftBodyArray& getSoftBodyArray() const + { + return m_softBodies; + } + + btSoftBodyWorldInfo& getWorldInfo() + { + return m_sbi; + } + + const btSoftBodyWorldInfo& getWorldInfo() const + { + return m_sbi; + } + + void reinitialize(btScalar timeStep); + + void applyRigidBodyGravity(btScalar timeStep); + + void beforeSolverCallbacks(btScalar timeStep); + + void afterSolverCallbacks(btScalar timeStep); + + void addForce(btSoftBody* psb, btDeformableLagrangianForce* force); + + void removeForce(btSoftBody* psb, btDeformableLagrangianForce* force); + + void removeSoftBodyForce(btSoftBody* psb); + + void removeSoftBody(btSoftBody* body); + + void removeCollisionObject(btCollisionObject* collisionObject); + + int getDrawFlags() const { return (m_drawFlags); } + void setDrawFlags(int f) { m_drawFlags = f; } + + void setupConstraints(); + + void performDeformableCollisionDetection(); + + void solveMultiBodyConstraints(); + + void solveContactConstraints(); + + void sortConstraints(); + + void softBodySelfCollision(); + + void setImplicit(bool implicit) + { + m_implicit = implicit; + } + + void setLineSearch(bool lineSearch) + { + m_lineSearch = lineSearch; + } + + void setUseProjection(bool useProjection) + { + m_useProjection = useProjection; + } + + void applyRepulsionForce(btScalar timeStep); + + void performGeometricCollisions(btScalar timeStep); + + struct btDeformableSingleRayCallback : public btBroadphaseRayCallback + { + btVector3 m_rayFromWorld; + btVector3 m_rayToWorld; + btTransform m_rayFromTrans; + btTransform m_rayToTrans; + btVector3 m_hitNormal; + + const btDeformableMultiBodyDynamicsWorld* m_world; + btCollisionWorld::RayResultCallback& m_resultCallback; + + btDeformableSingleRayCallback(const btVector3& rayFromWorld, const btVector3& rayToWorld, const btDeformableMultiBodyDynamicsWorld* world, btCollisionWorld::RayResultCallback& resultCallback) + : m_rayFromWorld(rayFromWorld), + m_rayToWorld(rayToWorld), + m_world(world), + m_resultCallback(resultCallback) + { + m_rayFromTrans.setIdentity(); + m_rayFromTrans.setOrigin(m_rayFromWorld); + m_rayToTrans.setIdentity(); + m_rayToTrans.setOrigin(m_rayToWorld); + + btVector3 rayDir = (rayToWorld - rayFromWorld); + + rayDir.normalize(); + ///what about division by zero? --> just set rayDirection[i] to INF/1e30 + m_rayDirectionInverse[0] = rayDir[0] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[0]; + m_rayDirectionInverse[1] = rayDir[1] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[1]; + m_rayDirectionInverse[2] = rayDir[2] == btScalar(0.0) ? btScalar(1e30) : btScalar(1.0) / rayDir[2]; + m_signs[0] = m_rayDirectionInverse[0] < 0.0; + m_signs[1] = m_rayDirectionInverse[1] < 0.0; + m_signs[2] = m_rayDirectionInverse[2] < 0.0; + + m_lambda_max = rayDir.dot(m_rayToWorld - m_rayFromWorld); + } + + virtual bool process(const btBroadphaseProxy* proxy) + { + ///terminate further ray tests, once the closestHitFraction reached zero + if (m_resultCallback.m_closestHitFraction == btScalar(0.f)) + return false; + + btCollisionObject* collisionObject = (btCollisionObject*)proxy->m_clientObject; + + //only perform raycast if filterMask matches + if (m_resultCallback.needsCollision(collisionObject->getBroadphaseHandle())) + { + //RigidcollisionObject* collisionObject = ctrl->GetRigidcollisionObject(); + //btVector3 collisionObjectAabbMin,collisionObjectAabbMax; #if 0 #ifdef RECALCULATE_AABB btVector3 collisionObjectAabbMin,collisionObjectAabbMax; @@ -225,87 +232,85 @@ public: const btVector3& collisionObjectAabbMax = collisionObject->getBroadphaseHandle()->m_aabbMax; #endif #endif - //btScalar hitLambda = m_resultCallback.m_closestHitFraction; - //culling already done by broadphase - //if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal)) - { - m_world->rayTestSingle(m_rayFromTrans, m_rayToTrans, - collisionObject, - collisionObject->getCollisionShape(), - collisionObject->getWorldTransform(), - m_resultCallback); - } - } - return true; - } - }; - - - - void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const - { - BT_PROFILE("rayTest"); - /// use the broadphase to accelerate the search for objects, based on their aabb - /// and for each object with ray-aabb overlap, perform an exact ray test - btDeformableSingleRayCallback rayCB(rayFromWorld, rayToWorld, this, resultCallback); - + //btScalar hitLambda = m_resultCallback.m_closestHitFraction; + //culling already done by broadphase + //if (btRayAabb(m_rayFromWorld,m_rayToWorld,collisionObjectAabbMin,collisionObjectAabbMax,hitLambda,m_hitNormal)) + { + m_world->rayTestSingle(m_rayFromTrans, m_rayToTrans, + collisionObject, + collisionObject->getCollisionShape(), + collisionObject->getWorldTransform(), + m_resultCallback); + } + } + return true; + } + }; + + void rayTest(const btVector3& rayFromWorld, const btVector3& rayToWorld, RayResultCallback& resultCallback) const + { + BT_PROFILE("rayTest"); + /// use the broadphase to accelerate the search for objects, based on their aabb + /// and for each object with ray-aabb overlap, perform an exact ray test + btDeformableSingleRayCallback rayCB(rayFromWorld, rayToWorld, this, resultCallback); + #ifndef USE_BRUTEFORCE_RAYBROADPHASE - m_broadphasePairCache->rayTest(rayFromWorld, rayToWorld, rayCB); + m_broadphasePairCache->rayTest(rayFromWorld, rayToWorld, rayCB); #else - for (int i = 0; i < this->getNumCollisionObjects(); i++) - { - rayCB.process(m_collisionObjects[i]->getBroadphaseHandle()); - } + for (int i = 0; i < this->getNumCollisionObjects(); i++) + { + rayCB.process(m_collisionObjects[i]->getBroadphaseHandle()); + } #endif //USE_BRUTEFORCE_RAYBROADPHASE - } - - void rayTestSingle(const btTransform& rayFromTrans, const btTransform& rayToTrans, - btCollisionObject* collisionObject, - const btCollisionShape* collisionShape, - const btTransform& colObjWorldTransform, - RayResultCallback& resultCallback) const - { - if (collisionShape->isSoftBody()) - { - btSoftBody* softBody = btSoftBody::upcast(collisionObject); - if (softBody) - { - btSoftBody::sRayCast softResult; - if (softBody->rayFaceTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult)) - { - if (softResult.fraction <= resultCallback.m_closestHitFraction) - { - btCollisionWorld::LocalShapeInfo shapeInfo; - shapeInfo.m_shapePart = 0; - shapeInfo.m_triangleIndex = softResult.index; - // get the normal - btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin(); - btVector3 normal = -rayDir; - normal.normalize(); - { - normal = softBody->m_faces[softResult.index].m_normal; - if (normal.dot(rayDir) > 0) - { - // normal always point toward origin of the ray - normal = -normal; - } - } - - btCollisionWorld::LocalRayResult rayResult(collisionObject, - &shapeInfo, - normal, - softResult.fraction); - bool normalInWorldSpace = true; - resultCallback.addSingleResult(rayResult, normalInWorldSpace); - } - } - } - } - else - { - btCollisionWorld::rayTestSingle(rayFromTrans, rayToTrans, collisionObject, collisionShape, colObjWorldTransform, resultCallback); - } - } + } + + void rayTestSingle(const btTransform& rayFromTrans, const btTransform& rayToTrans, + btCollisionObject* collisionObject, + const btCollisionShape* collisionShape, + const btTransform& colObjWorldTransform, + RayResultCallback& resultCallback) const + { + if (collisionShape->isSoftBody()) + { + btSoftBody* softBody = btSoftBody::upcast(collisionObject); + if (softBody) + { + btSoftBody::sRayCast softResult; + if (softBody->rayFaceTest(rayFromTrans.getOrigin(), rayToTrans.getOrigin(), softResult)) + { + if (softResult.fraction <= resultCallback.m_closestHitFraction) + { + btCollisionWorld::LocalShapeInfo shapeInfo; + shapeInfo.m_shapePart = 0; + shapeInfo.m_triangleIndex = softResult.index; + // get the normal + btVector3 rayDir = rayToTrans.getOrigin() - rayFromTrans.getOrigin(); + btVector3 normal = -rayDir; + normal.normalize(); + { + normal = softBody->m_faces[softResult.index].m_normal; + if (normal.dot(rayDir) > 0) + { + // normal always point toward origin of the ray + normal = -normal; + } + } + + btCollisionWorld::LocalRayResult rayResult(collisionObject, + &shapeInfo, + normal, + softResult.fraction); + bool normalInWorldSpace = true; + resultCallback.addSingleResult(rayResult, normalInWorldSpace); + } + } + } + } + else + { + btCollisionWorld::rayTestSingle(rayFromTrans, rayToTrans, collisionObject, collisionShape, colObjWorldTransform, resultCallback); + } + } }; #endif //BT_DEFORMABLE_MULTIBODY_DYNAMICS_WORLD_H diff --git a/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h b/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h index d89bc4aca4..60798c5bcd 100644 --- a/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h +++ b/thirdparty/bullet/BulletSoftBody/btDeformableNeoHookeanForce.h @@ -23,30 +23,30 @@ subject to the following restrictions: class btDeformableNeoHookeanForce : public btDeformableLagrangianForce { public: - typedef btAlignedObjectArray<btVector3> TVStack; - btScalar m_mu, m_lambda; // Lame Parameters - btScalar m_E, m_nu; // Young's modulus and Poisson ratio - btScalar m_mu_damp, m_lambda_damp; - btDeformableNeoHookeanForce(): m_mu(1), m_lambda(1) - { - btScalar damping = 0.05; - m_mu_damp = damping * m_mu; - m_lambda_damp = damping * m_lambda; + typedef btAlignedObjectArray<btVector3> TVStack; + btScalar m_mu, m_lambda; // Lame Parameters + btScalar m_E, m_nu; // Young's modulus and Poisson ratio + btScalar m_mu_damp, m_lambda_damp; + btDeformableNeoHookeanForce() : m_mu(1), m_lambda(1) + { + btScalar damping = 0.05; + m_mu_damp = damping * m_mu; + m_lambda_damp = damping * m_lambda; updateYoungsModulusAndPoissonRatio(); - } - - btDeformableNeoHookeanForce(btScalar mu, btScalar lambda, btScalar damping = 0.05): m_mu(mu), m_lambda(lambda) - { - m_mu_damp = damping * m_mu; - m_lambda_damp = damping * m_lambda; + } + + btDeformableNeoHookeanForce(btScalar mu, btScalar lambda, btScalar damping = 0.05) : m_mu(mu), m_lambda(lambda) + { + m_mu_damp = damping * m_mu; + m_lambda_damp = damping * m_lambda; updateYoungsModulusAndPoissonRatio(); - } + } void updateYoungsModulusAndPoissonRatio() { // conversion from Lame Parameters to Young's modulus and Poisson ratio // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters - m_E = m_mu * (3*m_lambda + 2*m_mu)/(m_lambda + m_mu); + m_E = m_mu * (3 * m_lambda + 2 * m_mu) / (m_lambda + m_mu); m_nu = m_lambda * 0.5 / (m_mu + m_lambda); } @@ -55,21 +55,21 @@ public: // conversion from Young's modulus and Poisson ratio to Lame Parameters // https://en.wikipedia.org/wiki/Lam%C3%A9_parameters m_mu = m_E * 0.5 / (1 + m_nu); - m_lambda = m_E * m_nu / ((1 + m_nu) * (1- 2*m_nu)); + m_lambda = m_E * m_nu / ((1 + m_nu) * (1 - 2 * m_nu)); } - void setYoungsModulus(btScalar E) - { + void setYoungsModulus(btScalar E) + { m_E = E; updateLameParameters(); - } + } void setPoissonRatio(btScalar nu) { m_nu = nu; updateLameParameters(); } - + void setDamping(btScalar damping) { m_mu_damp = damping * m_mu; @@ -83,339 +83,338 @@ public: updateYoungsModulusAndPoissonRatio(); } - virtual void addScaledForces(btScalar scale, TVStack& force) - { - addScaledDampingForce(scale, force); - addScaledElasticForce(scale, force); - } - - virtual void addScaledExplicitForce(btScalar scale, TVStack& force) - { - addScaledElasticForce(scale, force); - } - - // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search - virtual void addScaledDampingForce(btScalar scale, TVStack& force) - { - if (m_mu_damp == 0 && m_lambda_damp == 0) - return; - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse; - btMatrix3x3 I; - I.setIdentity(); - btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp; -// firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP); - btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); + virtual void addScaledForces(btScalar scale, TVStack& force) + { + addScaledDampingForce(scale, force); + addScaledElasticForce(scale, force); + } + + virtual void addScaledExplicitForce(btScalar scale, TVStack& force) + { + addScaledElasticForce(scale, force); + } + + // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search + virtual void addScaledDampingForce(btScalar scale, TVStack& force) + { + if (m_mu_damp == 0 && m_lambda_damp == 0) + return; + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + btMatrix3x3 dF = DsFromVelocity(node0, node1, node2, node3) * tetra.m_Dm_inverse; + btMatrix3x3 I; + I.setIdentity(); + btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0] + dF[1][1] + dF[2][2]) * m_lambda_damp; + // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP); + btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose() * grad_N_hat_1st_col); + btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); + + // damping force differential + btScalar scale1 = scale * tetra.m_element_measure; + force[id0] -= scale1 * df_on_node0; + force[id1] -= scale1 * df_on_node123.getColumn(0); + force[id2] -= scale1 * df_on_node123.getColumn(1); + force[id3] -= scale1 * df_on_node123.getColumn(2); + } + } + } + + virtual double totalElasticEnergy(btScalar dt) + { + double energy = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetraScratches.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::TetraScratch& s = psb->m_tetraScratches[j]; + energy += tetra.m_element_measure * elasticEnergyDensity(s); + } + } + return energy; + } + + // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search + virtual double totalDampingEnergy(btScalar dt) + { + double energy = 0; + int sz = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + sz = btMax(sz, psb->m_nodes[j].index); + } + } + TVStack dampingForce; + dampingForce.resize(sz + 1); + for (int i = 0; i < dampingForce.size(); ++i) + dampingForce[i].setZero(); + addScaledDampingForce(0.5, dampingForce); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + energy -= dampingForce[node.index].dot(node.m_v) / dt; + } + } + return energy; + } + + double elasticEnergyDensity(const btSoftBody::TetraScratch& s) + { + double density = 0; + density += m_mu * 0.5 * (s.m_trace - 3.); + density += m_lambda * 0.5 * (s.m_J - 1. - 0.75 * m_mu / m_lambda) * (s.m_J - 1. - 0.75 * m_mu / m_lambda); + density -= m_mu * 0.5 * log(s.m_trace + 1); + return density; + } - // damping force differential - btScalar scale1 = scale * tetra.m_element_measure; - force[id0] -= scale1 * df_on_node0; - force[id1] -= scale1 * df_on_node123.getColumn(0); - force[id2] -= scale1 * df_on_node123.getColumn(1); - force[id3] -= scale1 * df_on_node123.getColumn(2); - } - } - } - - virtual double totalElasticEnergy(btScalar dt) - { - double energy = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetraScratches.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::TetraScratch& s = psb->m_tetraScratches[j]; - energy += tetra.m_element_measure * elasticEnergyDensity(s); - } - } - return energy; - } - - // The damping energy is formulated as in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search - virtual double totalDampingEnergy(btScalar dt) - { - double energy = 0; - int sz = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - sz = btMax(sz, psb->m_nodes[j].index); - } - } - TVStack dampingForce; - dampingForce.resize(sz+1); - for (int i = 0; i < dampingForce.size(); ++i) - dampingForce[i].setZero(); - addScaledDampingForce(0.5, dampingForce); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - const btSoftBody::Node& node = psb->m_nodes[j]; - energy -= dampingForce[node.index].dot(node.m_v) / dt; - } - } - return energy; - } - - double elasticEnergyDensity(const btSoftBody::TetraScratch& s) - { - double density = 0; - density += m_mu * 0.5 * (s.m_trace - 3.); - density += m_lambda * 0.5 * (s.m_J - 1. - 0.75 * m_mu / m_lambda)* (s.m_J - 1. - 0.75 * m_mu / m_lambda); - density -= m_mu * 0.5 * log(s.m_trace+1); - return density; - } - - virtual void addScaledElasticForce(btScalar scale, TVStack& force) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= force.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - btScalar max_p = psb->m_cfg.m_maxStress; - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btMatrix3x3 P; - firstPiola(psb->m_tetraScratches[j],P); + virtual void addScaledElasticForce(btScalar scale, TVStack& force) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= force.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + btScalar max_p = psb->m_cfg.m_maxStress; + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btMatrix3x3 P; + firstPiola(psb->m_tetraScratches[j], P); #ifdef USE_SVD - if (max_p > 0) - { - // since we want to clamp the principal stress to max_p, we only need to - // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p - btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2()); - if (trPTP > max_p * max_p) - { - btMatrix3x3 U, V; - btVector3 sigma; - singularValueDecomposition(P, U, sigma, V); - sigma[0] = btMin(sigma[0], max_p); - sigma[1] = btMin(sigma[1], max_p); - sigma[2] = btMin(sigma[2], max_p); - sigma[0] = btMax(sigma[0], -max_p); - sigma[1] = btMax(sigma[1], -max_p); - sigma[2] = btMax(sigma[2], -max_p); - btMatrix3x3 Sigma; - Sigma.setIdentity(); - Sigma[0][0] = sigma[0]; - Sigma[1][1] = sigma[1]; - Sigma[2][2] = sigma[2]; - P = U * Sigma * V.transpose(); - } - } + if (max_p > 0) + { + // since we want to clamp the principal stress to max_p, we only need to + // calculate SVD when sigma_0^2 + sigma_1^2 + sigma_2^2 > max_p * max_p + btScalar trPTP = (P[0].length2() + P[1].length2() + P[2].length2()); + if (trPTP > max_p * max_p) + { + btMatrix3x3 U, V; + btVector3 sigma; + singularValueDecomposition(P, U, sigma, V); + sigma[0] = btMin(sigma[0], max_p); + sigma[1] = btMin(sigma[1], max_p); + sigma[2] = btMin(sigma[2], max_p); + sigma[0] = btMax(sigma[0], -max_p); + sigma[1] = btMax(sigma[1], -max_p); + sigma[2] = btMax(sigma[2], -max_p); + btMatrix3x3 Sigma; + Sigma.setIdentity(); + Sigma[0][0] = sigma[0]; + Sigma[1][1] = sigma[1]; + Sigma[2][2] = sigma[2]; + P = U * Sigma * V.transpose(); + } + } #endif -// btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose(); - btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col; - - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - - // elastic force - btScalar scale1 = scale * tetra.m_element_measure; - force[id0] -= scale1 * force_on_node0; - force[id1] -= scale1 * force_on_node123.getColumn(0); - force[id2] -= scale1 * force_on_node123.getColumn(1); - force[id3] -= scale1 * force_on_node123.getColumn(2); - } - } - } - - // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search - virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) - { - if (m_mu_damp == 0 && m_lambda_damp == 0) - return; - int numNodes = getNumNodes(); - btAssert(numNodes <= df.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse; - btMatrix3x3 I; - I.setIdentity(); - btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0]+dF[1][1]+dF[2][2]) * m_lambda_damp; -// firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP); -// btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); - btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; + // btVector3 force_on_node0 = P * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); + btMatrix3x3 force_on_node123 = P * tetra.m_Dm_inverse.transpose(); + btVector3 force_on_node0 = force_on_node123 * grad_N_hat_1st_col; + + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + + // elastic force + btScalar scale1 = scale * tetra.m_element_measure; + force[id0] -= scale1 * force_on_node0; + force[id1] -= scale1 * force_on_node123.getColumn(0); + force[id2] -= scale1 * force_on_node123.getColumn(1); + force[id3] -= scale1 * force_on_node123.getColumn(2); + } + } + } + + // The damping matrix is calculated using the time n state as described in https://www.math.ucla.edu/~jteran/papers/GSSJT15.pdf to allow line search + virtual void addScaledDampingForceDifferential(btScalar scale, const TVStack& dv, TVStack& df) + { + if (m_mu_damp == 0 && m_lambda_damp == 0) + return; + int numNodes = getNumNodes(); + btAssert(numNodes <= df.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + btMatrix3x3 dF = Ds(id0, id1, id2, id3, dv) * tetra.m_Dm_inverse; + btMatrix3x3 I; + I.setIdentity(); + btMatrix3x3 dP = (dF + dF.transpose()) * m_mu_damp + I * (dF[0][0] + dF[1][1] + dF[2][2]) * m_lambda_damp; + // firstPiolaDampingDifferential(psb->m_tetraScratchesTn[j], dF, dP); + // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); + btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); + btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; + + // damping force differential + btScalar scale1 = scale * tetra.m_element_measure; + df[id0] -= scale1 * df_on_node0; + df[id1] -= scale1 * df_on_node123.getColumn(0); + df[id2] -= scale1 * df_on_node123.getColumn(1); + df[id3] -= scale1 * df_on_node123.getColumn(2); + } + } + } + + virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA) {} + + virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) + { + int numNodes = getNumNodes(); + btAssert(numNodes <= df.size()); + btVector3 grad_N_hat_1st_col = btVector3(-1, -1, -1); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + if (!psb->isActive()) + { + continue; + } + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + btSoftBody::Tetra& tetra = psb->m_tetras[j]; + btSoftBody::Node* node0 = tetra.m_n[0]; + btSoftBody::Node* node1 = tetra.m_n[1]; + btSoftBody::Node* node2 = tetra.m_n[2]; + btSoftBody::Node* node3 = tetra.m_n[3]; + size_t id0 = node0->index; + size_t id1 = node1->index; + size_t id2 = node2->index; + size_t id3 = node3->index; + btMatrix3x3 dF = Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse; + btMatrix3x3 dP; + firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP); + // btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); + btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); + btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; + + // elastic force differential + btScalar scale1 = scale * tetra.m_element_measure; + df[id0] -= scale1 * df_on_node0; + df[id1] -= scale1 * df_on_node123.getColumn(0); + df[id2] -= scale1 * df_on_node123.getColumn(1); + df[id3] -= scale1 * df_on_node123.getColumn(2); + } + } + } + + void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P) + { + btScalar c1 = (m_mu * (1. - 1. / (s.m_trace + 1.))); + btScalar c2 = (m_lambda * (s.m_J - 1.) - 0.75 * m_mu); + P = s.m_F * c1 + s.m_cofF * c2; + } + + // Let P be the first piola stress. + // This function calculates the dP = dP/dF * dF + void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) + { + btScalar c1 = m_mu * (1. - 1. / (s.m_trace + 1.)); + btScalar c2 = (2. * m_mu) * DotProduct(s.m_F, dF) * (1. / ((1. + s.m_trace) * (1. + s.m_trace))); + btScalar c3 = (m_lambda * DotProduct(s.m_cofF, dF)); + dP = dF * c1 + s.m_F * c2; + addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda * (s.m_J - 1.) - 0.75 * m_mu, dP); + dP += s.m_cofF * c3; + } - // damping force differential - btScalar scale1 = scale * tetra.m_element_measure; - df[id0] -= scale1 * df_on_node0; - df[id1] -= scale1 * df_on_node123.getColumn(0); - df[id2] -= scale1 * df_on_node123.getColumn(1); - df[id3] -= scale1 * df_on_node123.getColumn(2); - } - } - } - - virtual void buildDampingForceDifferentialDiagonal(btScalar scale, TVStack& diagA){} - - virtual void addScaledElasticForceDifferential(btScalar scale, const TVStack& dx, TVStack& df) - { - int numNodes = getNumNodes(); - btAssert(numNodes <= df.size()); - btVector3 grad_N_hat_1st_col = btVector3(-1,-1,-1); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - if (!psb->isActive()) - { - continue; - } - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - btSoftBody::Tetra& tetra = psb->m_tetras[j]; - btSoftBody::Node* node0 = tetra.m_n[0]; - btSoftBody::Node* node1 = tetra.m_n[1]; - btSoftBody::Node* node2 = tetra.m_n[2]; - btSoftBody::Node* node3 = tetra.m_n[3]; - size_t id0 = node0->index; - size_t id1 = node1->index; - size_t id2 = node2->index; - size_t id3 = node3->index; - btMatrix3x3 dF = Ds(id0, id1, id2, id3, dx) * tetra.m_Dm_inverse; - btMatrix3x3 dP; - firstPiolaDifferential(psb->m_tetraScratches[j], dF, dP); -// btVector3 df_on_node0 = dP * (tetra.m_Dm_inverse.transpose()*grad_N_hat_1st_col); - btMatrix3x3 df_on_node123 = dP * tetra.m_Dm_inverse.transpose(); - btVector3 df_on_node0 = df_on_node123 * grad_N_hat_1st_col; - - // elastic force differential - btScalar scale1 = scale * tetra.m_element_measure; - df[id0] -= scale1 * df_on_node0; - df[id1] -= scale1 * df_on_node123.getColumn(0); - df[id2] -= scale1 * df_on_node123.getColumn(1); - df[id3] -= scale1 * df_on_node123.getColumn(2); - } - } - } - - void firstPiola(const btSoftBody::TetraScratch& s, btMatrix3x3& P) - { - btScalar c1 = (m_mu * ( 1. - 1. / (s.m_trace + 1.))); - btScalar c2 = (m_lambda * (s.m_J - 1.) - 0.75 * m_mu); - P = s.m_F * c1 + s.m_cofF * c2; - } - - // Let P be the first piola stress. - // This function calculates the dP = dP/dF * dF - void firstPiolaDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) - { - btScalar c1 = m_mu * ( 1. - 1. / (s.m_trace + 1.)); - btScalar c2 = (2.*m_mu) * DotProduct(s.m_F, dF) * (1./((1.+s.m_trace)*(1.+s.m_trace))); - btScalar c3 = (m_lambda * DotProduct(s.m_cofF, dF)); - dP = dF * c1 + s.m_F * c2; - addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda*(s.m_J-1.) - 0.75*m_mu, dP); - dP += s.m_cofF * c3; - } - - // Let Q be the damping stress. - // This function calculates the dP = dQ/dF * dF - void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) - { - btScalar c1 = (m_mu_damp * ( 1. - 1. / (s.m_trace + 1.))); - btScalar c2 = ((2.*m_mu_damp) * DotProduct(s.m_F, dF) *(1./((1.+s.m_trace)*(1.+s.m_trace)))); - btScalar c3 = (m_lambda_damp * DotProduct(s.m_cofF, dF)); - dP = dF * c1 + s.m_F * c2; - addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda_damp*(s.m_J-1.) - 0.75*m_mu_damp, dP); - dP += s.m_cofF * c3; - } - - btScalar DotProduct(const btMatrix3x3& A, const btMatrix3x3& B) - { - btScalar ans = 0; - for (int i = 0; i < 3; ++i) - { - ans += A[i].dot(B[i]); - } - return ans; - } - - // Let C(A) be the cofactor of the matrix A - // Let H = the derivative of C(A) with respect to A evaluated at F = A - // This function calculates H*dF - void addScaledCofactorMatrixDifferential(const btMatrix3x3& F, const btMatrix3x3& dF, btScalar scale, btMatrix3x3& M) - { - M[0][0] += scale * (dF[1][1] * F[2][2] + F[1][1] * dF[2][2] - dF[2][1] * F[1][2] - F[2][1] * dF[1][2]); - M[1][0] += scale * (dF[2][1] * F[0][2] + F[2][1] * dF[0][2] - dF[0][1] * F[2][2] - F[0][1] * dF[2][2]); - M[2][0] += scale * (dF[0][1] * F[1][2] + F[0][1] * dF[1][2] - dF[1][1] * F[0][2] - F[1][1] * dF[0][2]); - M[0][1] += scale * (dF[2][0] * F[1][2] + F[2][0] * dF[1][2] - dF[1][0] * F[2][2] - F[1][0] * dF[2][2]); - M[1][1] += scale * (dF[0][0] * F[2][2] + F[0][0] * dF[2][2] - dF[2][0] * F[0][2] - F[2][0] * dF[0][2]); - M[2][1] += scale * (dF[1][0] * F[0][2] + F[1][0] * dF[0][2] - dF[0][0] * F[1][2] - F[0][0] * dF[1][2]); - M[0][2] += scale * (dF[1][0] * F[2][1] + F[1][0] * dF[2][1] - dF[2][0] * F[1][1] - F[2][0] * dF[1][1]); - M[1][2] += scale * (dF[2][0] * F[0][1] + F[2][0] * dF[0][1] - dF[0][0] * F[2][1] - F[0][0] * dF[2][1]); - M[2][2] += scale * (dF[0][0] * F[1][1] + F[0][0] * dF[1][1] - dF[1][0] * F[0][1] - F[1][0] * dF[0][1]); - } - - virtual btDeformableLagrangianForceType getForceType() - { - return BT_NEOHOOKEAN_FORCE; - } - + // Let Q be the damping stress. + // This function calculates the dP = dQ/dF * dF + void firstPiolaDampingDifferential(const btSoftBody::TetraScratch& s, const btMatrix3x3& dF, btMatrix3x3& dP) + { + btScalar c1 = (m_mu_damp * (1. - 1. / (s.m_trace + 1.))); + btScalar c2 = ((2. * m_mu_damp) * DotProduct(s.m_F, dF) * (1. / ((1. + s.m_trace) * (1. + s.m_trace)))); + btScalar c3 = (m_lambda_damp * DotProduct(s.m_cofF, dF)); + dP = dF * c1 + s.m_F * c2; + addScaledCofactorMatrixDifferential(s.m_F, dF, m_lambda_damp * (s.m_J - 1.) - 0.75 * m_mu_damp, dP); + dP += s.m_cofF * c3; + } + + btScalar DotProduct(const btMatrix3x3& A, const btMatrix3x3& B) + { + btScalar ans = 0; + for (int i = 0; i < 3; ++i) + { + ans += A[i].dot(B[i]); + } + return ans; + } + + // Let C(A) be the cofactor of the matrix A + // Let H = the derivative of C(A) with respect to A evaluated at F = A + // This function calculates H*dF + void addScaledCofactorMatrixDifferential(const btMatrix3x3& F, const btMatrix3x3& dF, btScalar scale, btMatrix3x3& M) + { + M[0][0] += scale * (dF[1][1] * F[2][2] + F[1][1] * dF[2][2] - dF[2][1] * F[1][2] - F[2][1] * dF[1][2]); + M[1][0] += scale * (dF[2][1] * F[0][2] + F[2][1] * dF[0][2] - dF[0][1] * F[2][2] - F[0][1] * dF[2][2]); + M[2][0] += scale * (dF[0][1] * F[1][2] + F[0][1] * dF[1][2] - dF[1][1] * F[0][2] - F[1][1] * dF[0][2]); + M[0][1] += scale * (dF[2][0] * F[1][2] + F[2][0] * dF[1][2] - dF[1][0] * F[2][2] - F[1][0] * dF[2][2]); + M[1][1] += scale * (dF[0][0] * F[2][2] + F[0][0] * dF[2][2] - dF[2][0] * F[0][2] - F[2][0] * dF[0][2]); + M[2][1] += scale * (dF[1][0] * F[0][2] + F[1][0] * dF[0][2] - dF[0][0] * F[1][2] - F[0][0] * dF[1][2]); + M[0][2] += scale * (dF[1][0] * F[2][1] + F[1][0] * dF[2][1] - dF[2][0] * F[1][1] - F[2][0] * dF[1][1]); + M[1][2] += scale * (dF[2][0] * F[0][1] + F[2][0] * dF[0][1] - dF[0][0] * F[2][1] - F[0][0] * dF[2][1]); + M[2][2] += scale * (dF[0][0] * F[1][1] + F[0][0] * dF[1][1] - dF[1][0] * F[0][1] - F[1][0] * dF[0][1]); + } + + virtual btDeformableLagrangianForceType getForceType() + { + return BT_NEOHOOKEAN_FORCE; + } }; #endif /* BT_NEOHOOKEAN_H */ diff --git a/thirdparty/bullet/BulletSoftBody/btKrylovSolver.h b/thirdparty/bullet/BulletSoftBody/btKrylovSolver.h new file mode 100644 index 0000000000..59126b47ae --- /dev/null +++ b/thirdparty/bullet/BulletSoftBody/btKrylovSolver.h @@ -0,0 +1,107 @@ +/* + Written by Xuchen Han <xuchenhan2015@u.northwestern.edu> + + Bullet Continuous Collision Detection and Physics Library + Copyright (c) 2019 Google Inc. http://bulletphysics.org + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from the use of this software. + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it freely, + subject to the following restrictions: + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + */ + +#ifndef BT_KRYLOV_SOLVER_H +#define BT_KRYLOV_SOLVER_H +#include <iostream> +#include <cmath> +#include <limits> +#include <LinearMath/btAlignedObjectArray.h> +#include <LinearMath/btVector3.h> +#include <LinearMath/btScalar.h> +#include "LinearMath/btQuickprof.h" + +template <class MatrixX> +class btKrylovSolver +{ + typedef btAlignedObjectArray<btVector3> TVStack; + +public: + int m_maxIterations; + btScalar m_tolerance; + btKrylovSolver(int maxIterations, btScalar tolerance) + : m_maxIterations(maxIterations), m_tolerance(tolerance) + { + } + + virtual ~btKrylovSolver() {} + + virtual int solve(MatrixX& A, TVStack& x, const TVStack& b, bool verbose = false) = 0; + + virtual void reinitialize(const TVStack& b) = 0; + + virtual SIMD_FORCE_INLINE TVStack sub(const TVStack& a, const TVStack& b) + { + // c = a-b + btAssert(a.size() == b.size()); + TVStack c; + c.resize(a.size()); + for (int i = 0; i < a.size(); ++i) + { + c[i] = a[i] - b[i]; + } + return c; + } + + virtual SIMD_FORCE_INLINE btScalar squaredNorm(const TVStack& a) + { + return dot(a, a); + } + + virtual SIMD_FORCE_INLINE btScalar norm(const TVStack& a) + { + btScalar ret = 0; + for (int i = 0; i < a.size(); ++i) + { + for (int d = 0; d < 3; ++d) + { + ret = btMax(ret, btFabs(a[i][d])); + } + } + return ret; + } + + virtual SIMD_FORCE_INLINE btScalar dot(const TVStack& a, const TVStack& b) + { + btScalar ans(0); + for (int i = 0; i < a.size(); ++i) + ans += a[i].dot(b[i]); + return ans; + } + + virtual SIMD_FORCE_INLINE void multAndAddTo(btScalar s, const TVStack& a, TVStack& result) + { + // result += s*a + btAssert(a.size() == result.size()); + for (int i = 0; i < a.size(); ++i) + result[i] += s * a[i]; + } + + virtual SIMD_FORCE_INLINE TVStack multAndAdd(btScalar s, const TVStack& a, const TVStack& b) + { + // result = a*s + b + TVStack result; + result.resize(a.size()); + for (int i = 0; i < a.size(); ++i) + result[i] = s * a[i] + b[i]; + return result; + } + + virtual SIMD_FORCE_INLINE void setTolerance(btScalar tolerance) + { + m_tolerance = tolerance; + } +}; +#endif /* BT_KRYLOV_SOLVER_H */ diff --git a/thirdparty/bullet/BulletSoftBody/btPreconditioner.h b/thirdparty/bullet/BulletSoftBody/btPreconditioner.h index c2db448ef8..21c1106a42 100644 --- a/thirdparty/bullet/BulletSoftBody/btPreconditioner.h +++ b/thirdparty/bullet/BulletSoftBody/btPreconditioner.h @@ -19,269 +19,266 @@ class Preconditioner { public: - typedef btAlignedObjectArray<btVector3> TVStack; - virtual void operator()(const TVStack& x, TVStack& b) = 0; - virtual void reinitialize(bool nodeUpdated) = 0; - virtual ~Preconditioner(){} + typedef btAlignedObjectArray<btVector3> TVStack; + virtual void operator()(const TVStack& x, TVStack& b) = 0; + virtual void reinitialize(bool nodeUpdated) = 0; + virtual ~Preconditioner() {} }; class DefaultPreconditioner : public Preconditioner { public: - virtual void operator()(const TVStack& x, TVStack& b) - { - btAssert(b.size() == x.size()); - for (int i = 0; i < b.size(); ++i) - b[i] = x[i]; - } - virtual void reinitialize(bool nodeUpdated) - { - } - - virtual ~DefaultPreconditioner(){} + virtual void operator()(const TVStack& x, TVStack& b) + { + btAssert(b.size() == x.size()); + for (int i = 0; i < b.size(); ++i) + b[i] = x[i]; + } + virtual void reinitialize(bool nodeUpdated) + { + } + + virtual ~DefaultPreconditioner() {} }; class MassPreconditioner : public Preconditioner { - btAlignedObjectArray<btScalar> m_inv_mass; - const btAlignedObjectArray<btSoftBody *>& m_softBodies; + btAlignedObjectArray<btScalar> m_inv_mass; + const btAlignedObjectArray<btSoftBody*>& m_softBodies; + public: - MassPreconditioner(const btAlignedObjectArray<btSoftBody *>& softBodies) - : m_softBodies(softBodies) - { - } - - virtual void reinitialize(bool nodeUpdated) - { - if (nodeUpdated) - { - m_inv_mass.clear(); - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - m_inv_mass.push_back(psb->m_nodes[j].m_im); - } - } - } - - virtual void operator()(const TVStack& x, TVStack& b) - { - btAssert(b.size() == x.size()); - btAssert(m_inv_mass.size() <= x.size()); - for (int i = 0; i < m_inv_mass.size(); ++i) - { - b[i] = x[i] * m_inv_mass[i]; - } - for (int i = m_inv_mass.size(); i < b.size(); ++i) - { - b[i] = x[i]; - } - } -}; + MassPreconditioner(const btAlignedObjectArray<btSoftBody*>& softBodies) + : m_softBodies(softBodies) + { + } + virtual void reinitialize(bool nodeUpdated) + { + if (nodeUpdated) + { + m_inv_mass.clear(); + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + m_inv_mass.push_back(psb->m_nodes[j].m_im); + } + } + } + + virtual void operator()(const TVStack& x, TVStack& b) + { + btAssert(b.size() == x.size()); + btAssert(m_inv_mass.size() <= x.size()); + for (int i = 0; i < m_inv_mass.size(); ++i) + { + b[i] = x[i] * m_inv_mass[i]; + } + for (int i = m_inv_mass.size(); i < b.size(); ++i) + { + b[i] = x[i]; + } + } +}; class KKTPreconditioner : public Preconditioner { - const btAlignedObjectArray<btSoftBody *>& m_softBodies; - const btDeformableContactProjection& m_projections; - const btAlignedObjectArray<btDeformableLagrangianForce*>& m_lf; - TVStack m_inv_A, m_inv_S; - const btScalar& m_dt; - const bool& m_implicit; + const btAlignedObjectArray<btSoftBody*>& m_softBodies; + const btDeformableContactProjection& m_projections; + const btAlignedObjectArray<btDeformableLagrangianForce*>& m_lf; + TVStack m_inv_A, m_inv_S; + const btScalar& m_dt; + const bool& m_implicit; + public: - KKTPreconditioner(const btAlignedObjectArray<btSoftBody *>& softBodies, const btDeformableContactProjection& projections, const btAlignedObjectArray<btDeformableLagrangianForce*>& lf, const btScalar& dt, const bool& implicit) - : m_softBodies(softBodies) - , m_projections(projections) - , m_lf(lf) - , m_dt(dt) - , m_implicit(implicit) - { - } - - virtual void reinitialize(bool nodeUpdated) - { - if (nodeUpdated) - { - int num_nodes = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - num_nodes += psb->m_nodes.size(); - } - m_inv_A.resize(num_nodes); - } - buildDiagonalA(m_inv_A); - for (int i = 0; i < m_inv_A.size(); ++i) - { -// printf("A[%d] = %f, %f, %f \n", i, m_inv_A[i][0], m_inv_A[i][1], m_inv_A[i][2]); - for (int d = 0; d < 3; ++d) - { - m_inv_A[i][d] = (m_inv_A[i][d] == 0) ? 0.0 : 1.0/ m_inv_A[i][d]; - } - } - m_inv_S.resize(m_projections.m_lagrangeMultipliers.size()); -// printf("S.size() = %d \n", m_inv_S.size()); - buildDiagonalS(m_inv_A, m_inv_S); - for (int i = 0; i < m_inv_S.size(); ++i) - { -// printf("S[%d] = %f, %f, %f \n", i, m_inv_S[i][0], m_inv_S[i][1], m_inv_S[i][2]); - for (int d = 0; d < 3; ++d) - { - m_inv_S[i][d] = (m_inv_S[i][d] == 0) ? 0.0 : 1.0/ m_inv_S[i][d]; - } - } - } - - void buildDiagonalA(TVStack& diagA) const - { - size_t counter = 0; - for (int i = 0; i < m_softBodies.size(); ++i) - { - btSoftBody* psb = m_softBodies[i]; - for (int j = 0; j < psb->m_nodes.size(); ++j) - { - const btSoftBody::Node& node = psb->m_nodes[j]; - diagA[counter] = (node.m_im == 0) ? btVector3(0,0,0) : btVector3(1.0/node.m_im, 1.0 / node.m_im, 1.0 / node.m_im); - ++counter; - } - } - if (m_implicit) - { - printf("implicit not implemented\n"); - btAssert(false); - } - for (int i = 0; i < m_lf.size(); ++i) - { - // add damping matrix - m_lf[i]->buildDampingForceDifferentialDiagonal(-m_dt, diagA); - } - } - - void buildDiagonalS(const TVStack& inv_A, TVStack& diagS) - { - for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) - { - // S[k,k] = e_k^T * C A_d^-1 C^T * e_k - const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; - btVector3& t = diagS[c]; - t.setZero(); - for (int j = 0; j < lm.m_num_constraints; ++j) - { - for (int i = 0; i < lm.m_num_nodes; ++i) - { - for (int d = 0; d < 3; ++d) - { - t[j] += inv_A[lm.m_indices[i]][d] * lm.m_dirs[j][d] * lm.m_dirs[j][d] * lm.m_weights[i] * lm.m_weights[i]; - } - } - } - } - } -#define USE_FULL_PRECONDITIONER + KKTPreconditioner(const btAlignedObjectArray<btSoftBody*>& softBodies, const btDeformableContactProjection& projections, const btAlignedObjectArray<btDeformableLagrangianForce*>& lf, const btScalar& dt, const bool& implicit) + : m_softBodies(softBodies), m_projections(projections), m_lf(lf), m_dt(dt), m_implicit(implicit) + { + } + + virtual void reinitialize(bool nodeUpdated) + { + if (nodeUpdated) + { + int num_nodes = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + num_nodes += psb->m_nodes.size(); + } + m_inv_A.resize(num_nodes); + } + buildDiagonalA(m_inv_A); + for (int i = 0; i < m_inv_A.size(); ++i) + { + // printf("A[%d] = %f, %f, %f \n", i, m_inv_A[i][0], m_inv_A[i][1], m_inv_A[i][2]); + for (int d = 0; d < 3; ++d) + { + m_inv_A[i][d] = (m_inv_A[i][d] == 0) ? 0.0 : 1.0 / m_inv_A[i][d]; + } + } + m_inv_S.resize(m_projections.m_lagrangeMultipliers.size()); + // printf("S.size() = %d \n", m_inv_S.size()); + buildDiagonalS(m_inv_A, m_inv_S); + for (int i = 0; i < m_inv_S.size(); ++i) + { + // printf("S[%d] = %f, %f, %f \n", i, m_inv_S[i][0], m_inv_S[i][1], m_inv_S[i][2]); + for (int d = 0; d < 3; ++d) + { + m_inv_S[i][d] = (m_inv_S[i][d] == 0) ? 0.0 : 1.0 / m_inv_S[i][d]; + } + } + } + + void buildDiagonalA(TVStack& diagA) const + { + size_t counter = 0; + for (int i = 0; i < m_softBodies.size(); ++i) + { + btSoftBody* psb = m_softBodies[i]; + for (int j = 0; j < psb->m_nodes.size(); ++j) + { + const btSoftBody::Node& node = psb->m_nodes[j]; + diagA[counter] = (node.m_im == 0) ? btVector3(0, 0, 0) : btVector3(1.0 / node.m_im, 1.0 / node.m_im, 1.0 / node.m_im); + ++counter; + } + } + if (m_implicit) + { + printf("implicit not implemented\n"); + btAssert(false); + } + for (int i = 0; i < m_lf.size(); ++i) + { + // add damping matrix + m_lf[i]->buildDampingForceDifferentialDiagonal(-m_dt, diagA); + } + } + + void buildDiagonalS(const TVStack& inv_A, TVStack& diagS) + { + for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) + { + // S[k,k] = e_k^T * C A_d^-1 C^T * e_k + const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; + btVector3& t = diagS[c]; + t.setZero(); + for (int j = 0; j < lm.m_num_constraints; ++j) + { + for (int i = 0; i < lm.m_num_nodes; ++i) + { + for (int d = 0; d < 3; ++d) + { + t[j] += inv_A[lm.m_indices[i]][d] * lm.m_dirs[j][d] * lm.m_dirs[j][d] * lm.m_weights[i] * lm.m_weights[i]; + } + } + } + } + } +//#define USE_FULL_PRECONDITIONER #ifndef USE_FULL_PRECONDITIONER - virtual void operator()(const TVStack& x, TVStack& b) - { - btAssert(b.size() == x.size()); - for (int i = 0; i < m_inv_A.size(); ++i) - { - b[i] = x[i] * m_inv_A[i]; - } - int offset = m_inv_A.size(); - for (int i = 0; i < m_inv_S.size(); ++i) - { - b[i+offset] = x[i+offset] * m_inv_S[i]; - } - } + virtual void operator()(const TVStack& x, TVStack& b) + { + btAssert(b.size() == x.size()); + for (int i = 0; i < m_inv_A.size(); ++i) + { + b[i] = x[i] * m_inv_A[i]; + } + int offset = m_inv_A.size(); + for (int i = 0; i < m_inv_S.size(); ++i) + { + b[i + offset] = x[i + offset] * m_inv_S[i]; + } + } #else - virtual void operator()(const TVStack& x, TVStack& b) - { - btAssert(b.size() == x.size()); - int offset = m_inv_A.size(); + virtual void operator()(const TVStack& x, TVStack& b) + { + btAssert(b.size() == x.size()); + int offset = m_inv_A.size(); - for (int i = 0; i < m_inv_A.size(); ++i) - { - b[i] = x[i] * m_inv_A[i]; - } + for (int i = 0; i < m_inv_A.size(); ++i) + { + b[i] = x[i] * m_inv_A[i]; + } - for (int i = 0; i < m_inv_S.size(); ++i) - { - b[i+offset].setZero(); - } + for (int i = 0; i < m_inv_S.size(); ++i) + { + b[i + offset].setZero(); + } - for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) - { - const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; - // C * x - for (int d = 0; d < lm.m_num_constraints; ++d) - { - for (int i = 0; i < lm.m_num_nodes; ++i) - { - b[offset+c][d] += lm.m_weights[i] * b[lm.m_indices[i]].dot(lm.m_dirs[d]); - } - } - } + for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) + { + const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; + // C * x + for (int d = 0; d < lm.m_num_constraints; ++d) + { + for (int i = 0; i < lm.m_num_nodes; ++i) + { + b[offset + c][d] += lm.m_weights[i] * b[lm.m_indices[i]].dot(lm.m_dirs[d]); + } + } + } - for (int i = 0; i < m_inv_S.size(); ++i) - { - b[i+offset] = b[i+offset] * m_inv_S[i]; - } + for (int i = 0; i < m_inv_S.size(); ++i) + { + b[i + offset] = b[i + offset] * m_inv_S[i]; + } - for (int i = 0; i < m_inv_A.size(); ++i) - { - b[i].setZero(); - } + for (int i = 0; i < m_inv_A.size(); ++i) + { + b[i].setZero(); + } - for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) - { - // C^T * lambda - const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; - for (int i = 0; i < lm.m_num_nodes; ++i) - { - for (int j = 0; j < lm.m_num_constraints; ++j) - { - b[lm.m_indices[i]] += b[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j]; - } - } - } + for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) + { + // C^T * lambda + const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; + for (int i = 0; i < lm.m_num_nodes; ++i) + { + for (int j = 0; j < lm.m_num_constraints; ++j) + { + b[lm.m_indices[i]] += b[offset + c][j] * lm.m_weights[i] * lm.m_dirs[j]; + } + } + } - for (int i = 0; i < m_inv_A.size(); ++i) - { - b[i] = (x[i] - b[i]) * m_inv_A[i]; - } + for (int i = 0; i < m_inv_A.size(); ++i) + { + b[i] = (x[i] - b[i]) * m_inv_A[i]; + } - TVStack t; - t.resize(b.size()); - for (int i = 0; i < m_inv_S.size(); ++i) - { - t[i+offset] = x[i+offset] * m_inv_S[i]; - } - for (int i = 0; i < m_inv_A.size(); ++i) - { - t[i].setZero(); - } - for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) - { - // C^T * lambda - const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; - for (int i = 0; i < lm.m_num_nodes; ++i) - { - for (int j = 0; j < lm.m_num_constraints; ++j) - { - t[lm.m_indices[i]] += t[offset+c][j] * lm.m_weights[i] * lm.m_dirs[j]; - } - } - } - for (int i = 0; i < m_inv_A.size(); ++i) - { - b[i] += t[i] * m_inv_A[i]; - } + TVStack t; + t.resize(b.size()); + for (int i = 0; i < m_inv_S.size(); ++i) + { + t[i + offset] = x[i + offset] * m_inv_S[i]; + } + for (int i = 0; i < m_inv_A.size(); ++i) + { + t[i].setZero(); + } + for (int c = 0; c < m_projections.m_lagrangeMultipliers.size(); ++c) + { + // C^T * lambda + const LagrangeMultiplier& lm = m_projections.m_lagrangeMultipliers[c]; + for (int i = 0; i < lm.m_num_nodes; ++i) + { + for (int j = 0; j < lm.m_num_constraints; ++j) + { + t[lm.m_indices[i]] += t[offset + c][j] * lm.m_weights[i] * lm.m_dirs[j]; + } + } + } + for (int i = 0; i < m_inv_A.size(); ++i) + { + b[i] += t[i] * m_inv_A[i]; + } - for (int i = 0; i < m_inv_S.size(); ++i) - { - b[i+offset] -= x[i+offset] * m_inv_S[i]; - } - } + for (int i = 0; i < m_inv_S.size(); ++i) + { + b[i + offset] -= x[i + offset] * m_inv_S[i]; + } + } #endif }; diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp b/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp index 81b846d7f8..d1980ea6c5 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp +++ b/thirdparty/bullet/BulletSoftBody/btSoftBody.cpp @@ -37,12 +37,12 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l { btAlignedObjectArray<bool> marked; btAlignedObjectArray<btDbvtNode*> newLeafNodes; - btAlignedObjectArray<std::pair<int,int> > childIds; + btAlignedObjectArray<std::pair<int, int> > childIds; btAlignedObjectArray<btAlignedObjectArray<int> > newAdj; marked.resize(N); for (int i = 0; i < N; ++i) marked[i] = false; - + // pair adjacent nodes into new(parent) node for (int i = 0; i < N; ++i) { @@ -61,7 +61,7 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l leafNodes[i]->parent = node; leafNodes[n]->parent = node; newLeafNodes.push_back(node); - childIds.push_back(std::make_pair(i,n)); + childIds.push_back(std::make_pair(i, n)); merged = true; marked[n] = true; break; @@ -70,7 +70,7 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l if (!merged) { newLeafNodes.push_back(leafNodes[i]); - childIds.push_back(std::make_pair(i,-1)); + childIds.push_back(std::make_pair(i, -1)); } marked[i] = true; } @@ -78,7 +78,7 @@ static inline btDbvtNode* buildTreeBottomUp(btAlignedObjectArray<btDbvtNode*>& l newAdj.resize(newLeafNodes.size()); for (int i = 0; i < newLeafNodes.size(); ++i) { - for (int j = i+1; j < newLeafNodes.size(); ++j) + for (int j = i + 1; j < newLeafNodes.size(); ++j) { bool neighbor = false; const btAlignedObjectArray<int>& leftChildNeighbors = adj[childIds[i].first]; @@ -143,7 +143,7 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btV /* Nodes */ const btScalar margin = getCollisionShape()->getMargin(); m_nodes.resize(node_count); - m_X.resize(node_count); + m_X.resize(node_count); for (int i = 0, ni = node_count; i < ni; ++i) { Node& n = m_nodes[i]; @@ -154,7 +154,7 @@ btSoftBody::btSoftBody(btSoftBodyWorldInfo* worldInfo, int node_count, const btV n.m_im = n.m_im > 0 ? 1 / n.m_im : 0; n.m_leaf = m_ndbvt.insert(btDbvtVolume::FromCR(n.m_x, margin), &n); n.m_material = pm; - m_X[i] = n.m_x; + m_X[i] = n.m_x; } updateBounds(); setCollisionQuadrature(3); @@ -195,8 +195,8 @@ void btSoftBody::initDefaults() m_cfg.piterations = 1; m_cfg.diterations = 0; m_cfg.citerations = 4; - m_cfg.drag = 0; - m_cfg.m_maxStress = 0; + m_cfg.drag = 0; + m_cfg.m_maxStress = 0; m_cfg.collisions = fCollision::Default; m_pose.m_bvolume = false; m_pose.m_bframe = false; @@ -222,12 +222,14 @@ void btSoftBody::initDefaults() m_windVelocity = btVector3(0, 0, 0); m_restLengthScale = btScalar(1.0); m_dampingCoefficient = 1.0; - m_sleepingThreshold = .4; + m_sleepingThreshold = .04; m_useSelfCollision = false; m_collisionFlags = 0; m_softSoftCollision = false; m_maxSpeedSquared = 0; m_repulsionStiffness = 0.5; + m_gravityFactor = 1; + m_cacheBarycenter = false; m_fdbvnt = 0; } @@ -436,7 +438,7 @@ void btSoftBody::appendFace(int model, Material* mat) ZeroInitialize(f); f.m_material = mat ? mat : m_materials[0]; } - m_faces.push_back(f); + m_faces.push_back(f); } // @@ -525,94 +527,111 @@ void btSoftBody::appendAnchor(int node, btRigidBody* body, const btVector3& loca // void btSoftBody::appendDeformableAnchor(int node, btRigidBody* body) { - DeformableNodeRigidAnchor c; - btSoftBody::Node& n = m_nodes[node]; - const btScalar ima = n.m_im; - const btScalar imb = body->getInvMass(); - btVector3 nrm; - const btCollisionShape* shp = body->getCollisionShape(); - const btTransform& wtr = body->getWorldTransform(); - btScalar dst = - m_worldInfo->m_sparsesdf.Evaluate( - wtr.invXform(m_nodes[node].m_x), - shp, - nrm, - 0); - - c.m_cti.m_colObj = body; - c.m_cti.m_normal = wtr.getBasis() * nrm; - c.m_cti.m_offset = dst; - c.m_node = &m_nodes[node]; - const btScalar fc = m_cfg.kDF * body->getFriction(); - c.m_c2 = ima; - c.m_c3 = fc; - c.m_c4 = body->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR; - static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); - const btMatrix3x3& iwi = body->getInvInertiaTensorWorld(); - const btVector3 ra = n.m_x - wtr.getOrigin(); - - c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra); - c.m_c1 = ra; - c.m_local = body->getWorldTransform().inverse() * m_nodes[node].m_x; - c.m_node->m_battach = 1; - m_deformableAnchors.push_back(c); + DeformableNodeRigidAnchor c; + btSoftBody::Node& n = m_nodes[node]; + const btScalar ima = n.m_im; + const btScalar imb = body->getInvMass(); + btVector3 nrm; + const btCollisionShape* shp = body->getCollisionShape(); + const btTransform& wtr = body->getWorldTransform(); + btScalar dst = + m_worldInfo->m_sparsesdf.Evaluate( + wtr.invXform(m_nodes[node].m_x), + shp, + nrm, + 0); + + c.m_cti.m_colObj = body; + c.m_cti.m_normal = wtr.getBasis() * nrm; + c.m_cti.m_offset = dst; + c.m_node = &m_nodes[node]; + const btScalar fc = m_cfg.kDF * body->getFriction(); + c.m_c2 = ima; + c.m_c3 = fc; + c.m_c4 = body->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR; + static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); + const btMatrix3x3& iwi = body->getInvInertiaTensorWorld(); + const btVector3 ra = n.m_x - wtr.getOrigin(); + + c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra); + c.m_c1 = ra; + c.m_local = body->getWorldTransform().inverse() * m_nodes[node].m_x; + c.m_node->m_battach = 1; + m_deformableAnchors.push_back(c); +} + +void btSoftBody::removeAnchor(int node) +{ + const btSoftBody::Node& n = m_nodes[node]; + for (int i = 0; i < m_deformableAnchors.size();) + { + const DeformableNodeRigidAnchor& c = m_deformableAnchors[i]; + if (c.m_node == &n) + { + m_deformableAnchors.removeAtIndex(i); + } + else + { + i++; + } + } } // void btSoftBody::appendDeformableAnchor(int node, btMultiBodyLinkCollider* link) { - DeformableNodeRigidAnchor c; - btSoftBody::Node& n = m_nodes[node]; - const btScalar ima = n.m_im; - btVector3 nrm; - const btCollisionShape* shp = link->getCollisionShape(); - const btTransform& wtr = link->getWorldTransform(); - btScalar dst = - m_worldInfo->m_sparsesdf.Evaluate( - wtr.invXform(m_nodes[node].m_x), - shp, - nrm, - 0); - c.m_cti.m_colObj = link; - c.m_cti.m_normal = wtr.getBasis() * nrm; - c.m_cti.m_offset = dst; - c.m_node = &m_nodes[node]; - const btScalar fc = m_cfg.kDF * link->getFriction(); - c.m_c2 = ima; - c.m_c3 = fc; - c.m_c4 = link->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR; - btVector3 normal = c.m_cti.m_normal; - btVector3 t1 = generateUnitOrthogonalVector(normal); - btVector3 t2 = btCross(normal, t1); - btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; - findJacobian(link, jacobianData_normal, c.m_node->m_x, normal); - findJacobian(link, jacobianData_t1, c.m_node->m_x, t1); - findJacobian(link, jacobianData_t2, c.m_node->m_x, t2); - - btScalar* J_n = &jacobianData_normal.m_jacobians[0]; - btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; - btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; - - btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; - - btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), - t1.getX(), t1.getY(), t1.getZ(), - t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame - const int ndof = link->m_multiBody->getNumDofs() + 6; - btMatrix3x3 local_impulse_matrix = (Diagonal(n.m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); - c.m_c0 = rot.transpose() * local_impulse_matrix * rot; - c.jacobianData_normal = jacobianData_normal; - c.jacobianData_t1 = jacobianData_t1; - c.jacobianData_t2 = jacobianData_t2; - c.t1 = t1; - c.t2 = t2; - const btVector3 ra = n.m_x - wtr.getOrigin(); - c.m_c1 = ra; - c.m_local = link->getWorldTransform().inverse() * m_nodes[node].m_x; - c.m_node->m_battach = 1; - m_deformableAnchors.push_back(c); + DeformableNodeRigidAnchor c; + btSoftBody::Node& n = m_nodes[node]; + const btScalar ima = n.m_im; + btVector3 nrm; + const btCollisionShape* shp = link->getCollisionShape(); + const btTransform& wtr = link->getWorldTransform(); + btScalar dst = + m_worldInfo->m_sparsesdf.Evaluate( + wtr.invXform(m_nodes[node].m_x), + shp, + nrm, + 0); + c.m_cti.m_colObj = link; + c.m_cti.m_normal = wtr.getBasis() * nrm; + c.m_cti.m_offset = dst; + c.m_node = &m_nodes[node]; + const btScalar fc = m_cfg.kDF * link->getFriction(); + c.m_c2 = ima; + c.m_c3 = fc; + c.m_c4 = link->isStaticOrKinematicObject() ? m_cfg.kKHR : m_cfg.kCHR; + btVector3 normal = c.m_cti.m_normal; + btVector3 t1 = generateUnitOrthogonalVector(normal); + btVector3 t2 = btCross(normal, t1); + btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; + findJacobian(link, jacobianData_normal, c.m_node->m_x, normal); + findJacobian(link, jacobianData_t1, c.m_node->m_x, t1); + findJacobian(link, jacobianData_t2, c.m_node->m_x, t2); + + btScalar* J_n = &jacobianData_normal.m_jacobians[0]; + btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; + btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; + + btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; + + btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), + t1.getX(), t1.getY(), t1.getZ(), + t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame + const int ndof = link->m_multiBody->getNumDofs() + 6; + btMatrix3x3 local_impulse_matrix = (Diagonal(n.m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); + c.m_c0 = rot.transpose() * local_impulse_matrix * rot; + c.jacobianData_normal = jacobianData_normal; + c.jacobianData_t1 = jacobianData_t1; + c.jacobianData_t2 = jacobianData_t2; + c.t1 = t1; + c.t2 = t2; + const btVector3 ra = n.m_x - wtr.getOrigin(); + c.m_c1 = ra; + c.m_local = link->getWorldTransform().inverse() * m_nodes[node].m_x; + c.m_node->m_battach = 1; + m_deformableAnchors.push_back(c); } // void btSoftBody::appendLinearJoint(const LJoint::Specs& specs, Cluster* body0, Body body1) @@ -731,7 +750,7 @@ void btSoftBody::addAeroForceToNode(const btVector3& windVelocity, int nodeIndex fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm); // Check angle of attack - // cos(10º) = 0.98480 + // cos(10º) = 0.98480 if (0 < n_dot_v && n_dot_v < 0.98480f) fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f - n_dot_v * n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm)); @@ -817,7 +836,7 @@ void btSoftBody::addAeroForceToFace(const btVector3& windVelocity, int faceIndex fDrag = 0.5f * kDG * medium.m_density * rel_v2 * tri_area * n_dot_v * (-rel_v_nrm); // Check angle of attack - // cos(10º) = 0.98480 + // cos(10º) = 0.98480 if (0 < n_dot_v && n_dot_v < 0.98480f) fLift = 0.5f * kLF * medium.m_density * rel_v_len * tri_area * btSqrt(1.0f - n_dot_v * n_dot_v) * (nrm.cross(rel_v_nrm).cross(rel_v_nrm)); @@ -882,6 +901,7 @@ void btSoftBody::setVelocity(const btVector3& velocity) if (n.m_im > 0) { n.m_v = velocity; + n.m_vn = velocity; } } } @@ -1010,66 +1030,70 @@ void btSoftBody::setVolumeDensity(btScalar density) // btVector3 btSoftBody::getLinearVelocity() { - btVector3 total_momentum = btVector3(0,0,0); - for (int i = 0; i < m_nodes.size(); ++i) - { - btScalar mass = m_nodes[i].m_im == 0 ? 0 : 1.0/m_nodes[i].m_im; - total_momentum += mass * m_nodes[i].m_v; - } - btScalar total_mass = getTotalMass(); - return total_mass == 0 ? total_momentum : total_momentum / total_mass; + btVector3 total_momentum = btVector3(0, 0, 0); + for (int i = 0; i < m_nodes.size(); ++i) + { + btScalar mass = m_nodes[i].m_im == 0 ? 0 : 1.0 / m_nodes[i].m_im; + total_momentum += mass * m_nodes[i].m_v; + } + btScalar total_mass = getTotalMass(); + return total_mass == 0 ? total_momentum : total_momentum / total_mass; } // void btSoftBody::setLinearVelocity(const btVector3& linVel) { - btVector3 old_vel = getLinearVelocity(); - btVector3 diff = linVel - old_vel; - for (int i = 0; i < m_nodes.size(); ++i) - m_nodes[i].m_v += diff; + btVector3 old_vel = getLinearVelocity(); + btVector3 diff = linVel - old_vel; + for (int i = 0; i < m_nodes.size(); ++i) + m_nodes[i].m_v += diff; } // void btSoftBody::setAngularVelocity(const btVector3& angVel) { - btVector3 old_vel = getLinearVelocity(); - btVector3 com = getCenterOfMass(); - for (int i = 0; i < m_nodes.size(); ++i) - { - m_nodes[i].m_v = angVel.cross(m_nodes[i].m_x - com) + old_vel; - } + btVector3 old_vel = getLinearVelocity(); + btVector3 com = getCenterOfMass(); + for (int i = 0; i < m_nodes.size(); ++i) + { + m_nodes[i].m_v = angVel.cross(m_nodes[i].m_x - com) + old_vel; + } } // btTransform btSoftBody::getRigidTransform() { - btVector3 t = getCenterOfMass(); - btMatrix3x3 S; - S.setZero(); - // get rotation that minimizes L2 difference: \sum_i || RX_i + t - x_i || - for (int i = 0; i < m_nodes.size(); ++i) - { - S += OuterProduct(m_X[i], t-m_nodes[i].m_x); - } - btVector3 sigma; - btMatrix3x3 U,V; - singularValueDecomposition(S,U,sigma,V); - btMatrix3x3 R = V * U.transpose(); - btTransform trs; - trs.setIdentity(); - trs.setOrigin(t); - trs.setBasis(R); - return trs; + btVector3 t = getCenterOfMass(); + btMatrix3x3 S; + S.setZero(); + // Get rotation that minimizes L2 difference: \sum_i || RX_i + t - x_i || + // It's important to make sure that S has the correct signs. + // SVD is only unique up to the ordering of singular values. + // SVD will manipulate U and V to ensure the ordering of singular values. If all three singular + // vaues are negative, SVD will permute colums of U to make two of them positive. + for (int i = 0; i < m_nodes.size(); ++i) + { + S -= OuterProduct(m_X[i], t - m_nodes[i].m_x); + } + btVector3 sigma; + btMatrix3x3 U, V; + singularValueDecomposition(S, U, sigma, V); + btMatrix3x3 R = V * U.transpose(); + btTransform trs; + trs.setIdentity(); + trs.setOrigin(t); + trs.setBasis(R); + return trs; } // void btSoftBody::transformTo(const btTransform& trs) { - // get the current best rigid fit - btTransform current_transform = getRigidTransform(); - // apply transform in material space - btTransform new_transform = trs * current_transform.inverse(); - transform(new_transform); + // get the current best rigid fit + btTransform current_transform = getRigidTransform(); + // apply transform in material space + btTransform new_transform = trs * current_transform.inverse(); + transform(new_transform); } // @@ -1130,7 +1154,7 @@ void btSoftBody::scale(const btVector3& scl) updateNormals(); updateBounds(); updateConstants(); - initializeDmInverse(); + initializeDmInverse(); } // @@ -2010,22 +2034,22 @@ bool btSoftBody::rayTest(const btVector3& rayFrom, } bool btSoftBody::rayFaceTest(const btVector3& rayFrom, - const btVector3& rayTo, - sRayCast& results) + const btVector3& rayTo, + sRayCast& results) { if (m_faces.size() == 0) return false; else { - if (m_fdbvt.empty()) - initializeFaceTree(); + if (m_fdbvt.empty()) + initializeFaceTree(); } - - results.body = this; - results.fraction = 1.f; - results.index = -1; - - return (rayFaceTest(rayFrom, rayTo, results.fraction, results.index) != 0); + + results.body = this; + results.fraction = 1.f; + results.index = -1; + + return (rayFaceTest(rayFrom, rayTo, results.fraction, results.index) != 0); } // @@ -2056,112 +2080,111 @@ void btSoftBody::setSolver(eSolverPresets::_ preset) void btSoftBody::predictMotion(btScalar dt) { - int i, ni; - - /* Update */ - if (m_bUpdateRtCst) - { - m_bUpdateRtCst = false; - updateConstants(); - m_fdbvt.clear(); - if (m_cfg.collisions & fCollision::VF_SS) - { - initializeFaceTree(); - } - } - - /* Prepare */ - m_sst.sdt = dt * m_cfg.timescale; - m_sst.isdt = 1 / m_sst.sdt; - m_sst.velmrg = m_sst.sdt * 3; - m_sst.radmrg = getCollisionShape()->getMargin(); - m_sst.updmrg = m_sst.radmrg * (btScalar)0.25; - /* Forces */ - addVelocity(m_worldInfo->m_gravity * m_sst.sdt); - applyForces(); - /* Integrate */ - for (i = 0, ni = m_nodes.size(); i < ni; ++i) - { - Node& n = m_nodes[i]; - n.m_q = n.m_x; - btVector3 deltaV = n.m_f * n.m_im * m_sst.sdt; - { - btScalar maxDisplacement = m_worldInfo->m_maxDisplacement; - btScalar clampDeltaV = maxDisplacement / m_sst.sdt; - for (int c = 0; c < 3; c++) - { - if (deltaV[c] > clampDeltaV) - { - deltaV[c] = clampDeltaV; - } - if (deltaV[c] < -clampDeltaV) - { - deltaV[c] = -clampDeltaV; - } - } - } - n.m_v += deltaV; - n.m_x += n.m_v * m_sst.sdt; - n.m_f = btVector3(0, 0, 0); - } - /* Clusters */ - updateClusters(); - /* Bounds */ - updateBounds(); - /* Nodes */ - ATTRIBUTE_ALIGNED16(btDbvtVolume) - vol; - for (i = 0, ni = m_nodes.size(); i < ni; ++i) - { - Node& n = m_nodes[i]; - vol = btDbvtVolume::FromCR(n.m_x, m_sst.radmrg); - m_ndbvt.update(n.m_leaf, - vol, - n.m_v * m_sst.velmrg, - m_sst.updmrg); - } - /* Faces */ - if (!m_fdbvt.empty()) - { - for (int i = 0; i < m_faces.size(); ++i) - { - Face& f = m_faces[i]; - const btVector3 v = (f.m_n[0]->m_v + - f.m_n[1]->m_v + - f.m_n[2]->m_v) / - 3; - vol = VolumeOf(f, m_sst.radmrg); - m_fdbvt.update(f.m_leaf, - vol, - v * m_sst.velmrg, - m_sst.updmrg); - } - } - /* Pose */ - updatePose(); - /* Match */ - if (m_pose.m_bframe && (m_cfg.kMT > 0)) - { - const btMatrix3x3 posetrs = m_pose.m_rot; - for (int i = 0, ni = m_nodes.size(); i < ni; ++i) - { - Node& n = m_nodes[i]; - if (n.m_im > 0) - { - const btVector3 x = posetrs * m_pose.m_pos[i] + m_pose.m_com; - n.m_x = Lerp(n.m_x, x, m_cfg.kMT); - } - } - } - /* Clear contacts */ - m_rcontacts.resize(0); - m_scontacts.resize(0); - /* Optimize dbvt's */ - m_ndbvt.optimizeIncremental(1); - m_fdbvt.optimizeIncremental(1); - m_cdbvt.optimizeIncremental(1); -} + int i, ni; + /* Update */ + if (m_bUpdateRtCst) + { + m_bUpdateRtCst = false; + updateConstants(); + m_fdbvt.clear(); + if (m_cfg.collisions & fCollision::VF_SS) + { + initializeFaceTree(); + } + } + + /* Prepare */ + m_sst.sdt = dt * m_cfg.timescale; + m_sst.isdt = 1 / m_sst.sdt; + m_sst.velmrg = m_sst.sdt * 3; + m_sst.radmrg = getCollisionShape()->getMargin(); + m_sst.updmrg = m_sst.radmrg * (btScalar)0.25; + /* Forces */ + addVelocity(m_worldInfo->m_gravity * m_sst.sdt); + applyForces(); + /* Integrate */ + for (i = 0, ni = m_nodes.size(); i < ni; ++i) + { + Node& n = m_nodes[i]; + n.m_q = n.m_x; + btVector3 deltaV = n.m_f * n.m_im * m_sst.sdt; + { + btScalar maxDisplacement = m_worldInfo->m_maxDisplacement; + btScalar clampDeltaV = maxDisplacement / m_sst.sdt; + for (int c = 0; c < 3; c++) + { + if (deltaV[c] > clampDeltaV) + { + deltaV[c] = clampDeltaV; + } + if (deltaV[c] < -clampDeltaV) + { + deltaV[c] = -clampDeltaV; + } + } + } + n.m_v += deltaV; + n.m_x += n.m_v * m_sst.sdt; + n.m_f = btVector3(0, 0, 0); + } + /* Clusters */ + updateClusters(); + /* Bounds */ + updateBounds(); + /* Nodes */ + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol; + for (i = 0, ni = m_nodes.size(); i < ni; ++i) + { + Node& n = m_nodes[i]; + vol = btDbvtVolume::FromCR(n.m_x, m_sst.radmrg); + m_ndbvt.update(n.m_leaf, + vol, + n.m_v * m_sst.velmrg, + m_sst.updmrg); + } + /* Faces */ + if (!m_fdbvt.empty()) + { + for (int i = 0; i < m_faces.size(); ++i) + { + Face& f = m_faces[i]; + const btVector3 v = (f.m_n[0]->m_v + + f.m_n[1]->m_v + + f.m_n[2]->m_v) / + 3; + vol = VolumeOf(f, m_sst.radmrg); + m_fdbvt.update(f.m_leaf, + vol, + v * m_sst.velmrg, + m_sst.updmrg); + } + } + /* Pose */ + updatePose(); + /* Match */ + if (m_pose.m_bframe && (m_cfg.kMT > 0)) + { + const btMatrix3x3 posetrs = m_pose.m_rot; + for (int i = 0, ni = m_nodes.size(); i < ni; ++i) + { + Node& n = m_nodes[i]; + if (n.m_im > 0) + { + const btVector3 x = posetrs * m_pose.m_pos[i] + m_pose.m_com; + n.m_x = Lerp(n.m_x, x, m_cfg.kMT); + } + } + } + /* Clear contacts */ + m_rcontacts.resize(0); + m_scontacts.resize(0); + /* Optimize dbvt's */ + m_ndbvt.optimizeIncremental(1); + m_fdbvt.optimizeIncremental(1); + m_cdbvt.optimizeIncremental(1); +} // void btSoftBody::solveConstraints() @@ -2534,12 +2557,12 @@ int btSoftBody::rayTest(const btVector3& rayFrom, const btVector3& rayTo, } int btSoftBody::rayFaceTest(const btVector3& rayFrom, const btVector3& rayTo, - btScalar& mint, int& index) const + btScalar& mint, int& index) const { int cnt = 0; { /* Use dbvt */ RayFromToCaster collider(rayFrom, rayTo, mint); - + btDbvt::rayTest(m_fdbvt.m_root, rayFrom, rayTo, collider); if (collider.m_face) { @@ -2551,7 +2574,6 @@ int btSoftBody::rayFaceTest(const btVector3& rayFrom, const btVector3& rayTo, return (cnt); } - // static inline btDbvntNode* copyToDbvnt(const btDbvtNode* n) { @@ -2580,7 +2602,7 @@ static inline void calculateNormalCone(btDbvntNode* root) } else { - btVector3 n0(0,0,0), n1(0,0,0); + btVector3 n0(0, 0, 0), n1(0, 0, 0); btScalar a0 = 0, a1 = 0; if (root->childs[0]) { @@ -2594,8 +2616,8 @@ static inline void calculateNormalCone(btDbvntNode* root) n1 = root->childs[1]->normal; a1 = root->childs[1]->angle; } - root->normal = (n0+n1).safeNormalize(); - root->angle = btMax(a0,a1) + btAngle(n0, n1)*0.5; + root->normal = (n0 + n1).safeNormalize(); + root->angle = btMax(a0, a1) + btAngle(n0, n1) * 0.5; } } @@ -2609,7 +2631,8 @@ void btSoftBody::initializeFaceTree() for (int i = 0; i < m_faces.size(); ++i) { Face& f = m_faces[i]; - ATTRIBUTE_ALIGNED16(btDbvtVolume) vol = VolumeOf(f, 0); + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol = VolumeOf(f, 0); btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode(); node->parent = NULL; node->data = &f; @@ -2623,7 +2646,7 @@ void btSoftBody::initializeFaceTree() // construct the adjacency list for triangles for (int i = 0; i < adj.size(); ++i) { - for (int j = i+1; j < adj.size(); ++j) + for (int j = i + 1; j < adj.size(); ++j) { int dup = 0; for (int k = 0; k < 3; ++k) @@ -2661,7 +2684,8 @@ void btSoftBody::rebuildNodeTree() for (int i = 0; i < m_nodes.size(); ++i) { Node& n = m_nodes[i]; - ATTRIBUTE_ALIGNED16(btDbvtVolume) vol = btDbvtVolume::FromCR(n.m_x, 0); + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol = btDbvtVolume::FromCR(n.m_x, 0); btDbvtNode* node = new (btAlignedAlloc(sizeof(btDbvtNode), 16)) btDbvtNode(); node->parent = NULL; node->data = &n; @@ -2704,61 +2728,61 @@ btVector3 btSoftBody::evaluateCom() const } bool btSoftBody::checkContact(const btCollisionObjectWrapper* colObjWrap, - const btVector3& x, - btScalar margin, - btSoftBody::sCti& cti) const -{ - btVector3 nrm; - const btCollisionShape* shp = colObjWrap->getCollisionShape(); - // const btRigidBody *tmpRigid = btRigidBody::upcast(colObjWrap->getCollisionObject()); - //const btTransform &wtr = tmpRigid ? tmpRigid->getWorldTransform() : colObjWrap->getWorldTransform(); - const btTransform& wtr = colObjWrap->getWorldTransform(); - //todo: check which transform is needed here - - btScalar dst = - m_worldInfo->m_sparsesdf.Evaluate( - wtr.invXform(x), - shp, - nrm, - margin); - if (dst < 0) - { - cti.m_colObj = colObjWrap->getCollisionObject(); - cti.m_normal = wtr.getBasis() * nrm; - cti.m_offset = -btDot(cti.m_normal, x - cti.m_normal * dst); - return (true); - } - return (false); + const btVector3& x, + btScalar margin, + btSoftBody::sCti& cti) const +{ + btVector3 nrm; + const btCollisionShape* shp = colObjWrap->getCollisionShape(); + // const btRigidBody *tmpRigid = btRigidBody::upcast(colObjWrap->getCollisionObject()); + //const btTransform &wtr = tmpRigid ? tmpRigid->getWorldTransform() : colObjWrap->getWorldTransform(); + const btTransform& wtr = colObjWrap->getWorldTransform(); + //todo: check which transform is needed here + + btScalar dst = + m_worldInfo->m_sparsesdf.Evaluate( + wtr.invXform(x), + shp, + nrm, + margin); + if (dst < 0) + { + cti.m_colObj = colObjWrap->getCollisionObject(); + cti.m_normal = wtr.getBasis() * nrm; + cti.m_offset = -btDot(cti.m_normal, x - cti.m_normal * dst); + return (true); + } + return (false); } // bool btSoftBody::checkDeformableContact(const btCollisionObjectWrapper* colObjWrap, - const btVector3& x, - btScalar margin, - btSoftBody::sCti& cti, bool predict) const + const btVector3& x, + btScalar margin, + btSoftBody::sCti& cti, bool predict) const { btVector3 nrm; const btCollisionShape* shp = colObjWrap->getCollisionShape(); - const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject(); - // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect - // but resolve contact at x_n - btTransform wtr = (predict) ? - (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform()) - : colObjWrap->getWorldTransform(); + const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject(); + // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect + // but resolve contact at x_n + btTransform wtr = (predict) ? (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform() * (*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform()) + : colObjWrap->getWorldTransform(); btScalar dst = m_worldInfo->m_sparsesdf.Evaluate( wtr.invXform(x), shp, nrm, margin); + if (!predict) { cti.m_colObj = colObjWrap->getCollisionObject(); cti.m_normal = wtr.getBasis() * nrm; - cti.m_offset = dst; + cti.m_offset = dst; } - if (dst < 0) - return true; + if (dst < 0) + return true; return (false); } @@ -2767,175 +2791,131 @@ bool btSoftBody::checkDeformableContact(const btCollisionObjectWrapper* colObjWr // point p with respect to triangle (a, b, c) static void getBarycentric(const btVector3& p, btVector3& a, btVector3& b, btVector3& c, btVector3& bary) { - btVector3 v0 = b - a, v1 = c - a, v2 = p - a; - btScalar d00 = v0.dot(v0); - btScalar d01 = v0.dot(v1); - btScalar d11 = v1.dot(v1); - btScalar d20 = v2.dot(v0); - btScalar d21 = v2.dot(v1); - btScalar denom = d00 * d11 - d01 * d01; - bary.setY((d11 * d20 - d01 * d21) / denom); - bary.setZ((d00 * d21 - d01 * d20) / denom); - bary.setX(btScalar(1) - bary.getY() - bary.getZ()); + btVector3 v0 = b - a, v1 = c - a, v2 = p - a; + btScalar d00 = v0.dot(v0); + btScalar d01 = v0.dot(v1); + btScalar d11 = v1.dot(v1); + btScalar d20 = v2.dot(v0); + btScalar d21 = v2.dot(v1); + btScalar denom = d00 * d11 - d01 * d01; + bary.setY((d11 * d20 - d01 * d21) / denom); + bary.setZ((d00 * d21 - d01 * d20) / denom); + bary.setX(btScalar(1) - bary.getY() - bary.getZ()); } // bool btSoftBody::checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap, - Face& f, - btVector3& contact_point, - btVector3& bary, - btScalar margin, - btSoftBody::sCti& cti, bool predict) const -{ - btVector3 nrm; - const btCollisionShape* shp = colObjWrap->getCollisionShape(); - const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject(); - // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect - // but resolve contact at x_n - btTransform wtr = (predict) ? - (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform()*(*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform()) - : colObjWrap->getWorldTransform(); - btScalar dst; - -//#define USE_QUADRATURE 1 -//#define CACHE_PREV_COLLISION - - // use the contact position of the previous collision -#ifdef CACHE_PREV_COLLISION - if (f.m_pcontact[3] != 0) - { - for (int i = 0; i < 3; ++i) - bary[i] = f.m_pcontact[i]; - contact_point = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary); - dst = m_worldInfo->m_sparsesdf.Evaluate( - wtr.invXform(contact_point), - shp, - nrm, - margin); - nrm = wtr.getBasis() * nrm; - cti.m_colObj = colObjWrap->getCollisionObject(); - // use cached contact point - } - else - { - btGjkEpaSolver2::sResults results; - btTransform triangle_transform; - triangle_transform.setIdentity(); - triangle_transform.setOrigin(f.m_n[0]->m_x); - btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_x-f.m_n[0]->m_x, f.m_n[2]->m_x-f.m_n[0]->m_x); - btVector3 guess(0,0,0); - const btConvexShape* csh = static_cast<const btConvexShape*>(shp); - btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results); - dst = results.distance - margin; - contact_point = results.witnesses[0]; - getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary); - nrm = results.normal; - cti.m_colObj = colObjWrap->getCollisionObject(); - for (int i = 0; i < 3; ++i) - f.m_pcontact[i] = bary[i]; - } - return (dst < 0); -#endif + Face& f, + btVector3& contact_point, + btVector3& bary, + btScalar margin, + btSoftBody::sCti& cti, bool predict) const +{ + btVector3 nrm; + const btCollisionShape* shp = colObjWrap->getCollisionShape(); + const btCollisionObject* tmpCollisionObj = colObjWrap->getCollisionObject(); + // use the position x_{n+1}^* = x_n + dt * v_{n+1}^* where v_{n+1}^* = v_n + dtg for collision detect + // but resolve contact at x_n + btTransform wtr = (predict) ? (colObjWrap->m_preTransform != NULL ? tmpCollisionObj->getInterpolationWorldTransform() * (*colObjWrap->m_preTransform) : tmpCollisionObj->getInterpolationWorldTransform()) + : colObjWrap->getWorldTransform(); + btScalar dst; + btGjkEpaSolver2::sResults results; + +// #define USE_QUADRATURE 1 - // use collision quadrature point + // use collision quadrature point #ifdef USE_QUADRATURE - { - dst = SIMD_INFINITY; - btVector3 local_nrm; - for (int q = 0; q < m_quads.size(); ++q) - { - btVector3 p; - if (predict) - p = BaryEval(f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, m_quads[q]); - else - p = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, m_quads[q]); - btScalar local_dst = m_worldInfo->m_sparsesdf.Evaluate( - wtr.invXform(p), - shp, - local_nrm, - margin); - if (local_dst < dst) - { - if (local_dst < 0 && predict) - return true; - dst = local_dst; - contact_point = p; - bary = m_quads[q]; - nrm = local_nrm; - } - if (!predict) - { - cti.m_colObj = colObjWrap->getCollisionObject(); - cti.m_normal = wtr.getBasis() * nrm; - cti.m_offset = dst; - } - } - return (dst < 0); - } + { + dst = SIMD_INFINITY; + btVector3 local_nrm; + for (int q = 0; q < m_quads.size(); ++q) + { + btVector3 p; + if (predict) + p = BaryEval(f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, m_quads[q]); + else + p = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, m_quads[q]); + btScalar local_dst = m_worldInfo->m_sparsesdf.Evaluate( + wtr.invXform(p), + shp, + local_nrm, + margin); + if (local_dst < dst) + { + if (local_dst < 0 && predict) + return true; + dst = local_dst; + contact_point = p; + bary = m_quads[q]; + nrm = local_nrm; + } + if (!predict) + { + cti.m_colObj = colObjWrap->getCollisionObject(); + cti.m_normal = wtr.getBasis() * nrm; + cti.m_offset = dst; + } + } + return (dst < 0); + } #endif - -// // regular face contact -// { -// btGjkEpaSolver2::sResults results; -// btTransform triangle_transform; -// triangle_transform.setIdentity(); -// triangle_transform.setOrigin(f.m_n[0]->m_x); -// btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_x-f.m_n[0]->m_x, f.m_n[2]->m_x-f.m_n[0]->m_x); -// btVector3 guess(0,0,0); -// if (predict) -// { -// triangle_transform.setOrigin(f.m_n[0]->m_q); -// triangle = btTriangleShape(btVector3(0,0,0), f.m_n[1]->m_q-f.m_n[0]->m_q, f.m_n[2]->m_q-f.m_n[0]->m_q); -// } -// const btConvexShape* csh = static_cast<const btConvexShape*>(shp); -//// btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results); -//// dst = results.distance - margin; -//// contact_point = results.witnesses[0]; -// btGjkEpaSolver2::Penetration(&triangle, triangle_transform, csh, wtr, guess, results); -// if (results.status == btGjkEpaSolver2::sResults::Separated) -// return false; -// dst = results.distance - margin; -// contact_point = results.witnesses[1]; -// getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary); -// nrm = results.normal; -// for (int i = 0; i < 3; ++i) -// f.m_pcontact[i] = bary[i]; -// } -// -// if (!predict) -// { -// cti.m_colObj = colObjWrap->getCollisionObject(); -// cti.m_normal = nrm; -// cti.m_offset = dst; -// } -// - - // regular face contact - { - btGjkEpaSolver2::sResults results; - btTransform triangle_transform; - triangle_transform.setIdentity(); - triangle_transform.setOrigin(f.m_n[0]->m_q); - btTriangleShape triangle(btVector3(0,0,0), f.m_n[1]->m_q-f.m_n[0]->m_q, f.m_n[2]->m_q-f.m_n[0]->m_q); - btVector3 guess(0,0,0); - const btConvexShape* csh = static_cast<const btConvexShape*>(shp); - btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results); - dst = results.distance-csh->getMargin(); - dst -= margin; - if (dst >= 0) - return false; - contact_point = results.witnesses[0]; - getBarycentric(contact_point, f.m_n[0]->m_q, f.m_n[1]->m_q, f.m_n[2]->m_q, bary); - btVector3 curr = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary); - nrm = results.normal; - cti.m_colObj = colObjWrap->getCollisionObject(); - cti.m_normal = nrm; - cti.m_offset = dst + (curr - contact_point).dot(nrm); - } - return (dst < 0); + + // collision detection using x* + btTransform triangle_transform; + triangle_transform.setIdentity(); + triangle_transform.setOrigin(f.m_n[0]->m_q); + btTriangleShape triangle(btVector3(0, 0, 0), f.m_n[1]->m_q - f.m_n[0]->m_q, f.m_n[2]->m_q - f.m_n[0]->m_q); + btVector3 guess(0, 0, 0); + const btConvexShape* csh = static_cast<const btConvexShape*>(shp); + btGjkEpaSolver2::SignedDistance(&triangle, triangle_transform, csh, wtr, guess, results); + dst = results.distance - 2.0 * csh->getMargin() - margin; // margin padding so that the distance = the actual distance between face and rigid - margin of rigid - margin of deformable + if (dst >= 0) + return false; + + // Use consistent barycenter to recalculate distance. + if (this->m_cacheBarycenter) + { + if (f.m_pcontact[3] != 0) + { + for (int i = 0; i < 3; ++i) + bary[i] = f.m_pcontact[i]; + contact_point = BaryEval(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary); + const btConvexShape* csh = static_cast<const btConvexShape*>(shp); + btGjkEpaSolver2::SignedDistance(contact_point, margin, csh, wtr, results); + cti.m_colObj = colObjWrap->getCollisionObject(); + dst = results.distance; + cti.m_normal = results.normal; + cti.m_offset = dst; + + //point-convex CD + wtr = colObjWrap->getWorldTransform(); + btTriangleShape triangle2(btVector3(0, 0, 0), f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x); + triangle_transform.setOrigin(f.m_n[0]->m_x); + btGjkEpaSolver2::SignedDistance(&triangle2, triangle_transform, csh, wtr, guess, results); + + dst = results.distance - csh->getMargin() - margin; + return true; + } + } + + // Use triangle-convex CD. + wtr = colObjWrap->getWorldTransform(); + btTriangleShape triangle2(btVector3(0, 0, 0), f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x); + triangle_transform.setOrigin(f.m_n[0]->m_x); + btGjkEpaSolver2::SignedDistance(&triangle2, triangle_transform, csh, wtr, guess, results); + contact_point = results.witnesses[0]; + getBarycentric(contact_point, f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, bary); + + for (int i = 0; i < 3; ++i) + f.m_pcontact[i] = bary[i]; + + dst = results.distance - csh->getMargin() - margin; + cti.m_colObj = colObjWrap->getCollisionObject(); + cti.m_normal = results.normal; + cti.m_offset = dst; + return true; } -// void btSoftBody::updateNormals() { const btVector3 zv(0, 0, 0); @@ -2979,63 +2959,63 @@ void btSoftBody::updateBounds() m_bounds[1] = btVector3(1000, 1000, 1000); } else {*/ -// if (m_ndbvt.m_root) -// { -// const btVector3& mins = m_ndbvt.m_root->volume.Mins(); -// const btVector3& maxs = m_ndbvt.m_root->volume.Maxs(); -// const btScalar csm = getCollisionShape()->getMargin(); -// const btVector3 mrg = btVector3(csm, -// csm, -// csm) * -// 1; // ??? to investigate... -// m_bounds[0] = mins - mrg; -// m_bounds[1] = maxs + mrg; -// if (0 != getBroadphaseHandle()) -// { -// m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(), -// m_bounds[0], -// m_bounds[1], -// m_worldInfo->m_dispatcher); -// } -// } -// else -// { -// m_bounds[0] = -// m_bounds[1] = btVector3(0, 0, 0); -// } - if (m_nodes.size()) - { - btVector3 mins = m_nodes[0].m_x; - btVector3 maxs = m_nodes[0].m_x; - for (int i = 1; i < m_nodes.size(); ++i) - { - for (int d = 0; d < 3; ++d) - { - if (m_nodes[i].m_x[d] > maxs[d]) - maxs[d] = m_nodes[i].m_x[d]; - if (m_nodes[i].m_x[d] < mins[d]) - mins[d] = m_nodes[i].m_x[d]; - } - } - const btScalar csm = getCollisionShape()->getMargin(); - const btVector3 mrg = btVector3(csm, - csm, - csm); - m_bounds[0] = mins - mrg; - m_bounds[1] = maxs + mrg; - if (0 != getBroadphaseHandle()) - { - m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(), - m_bounds[0], - m_bounds[1], - m_worldInfo->m_dispatcher); - } - } - else - { - m_bounds[0] = - m_bounds[1] = btVector3(0, 0, 0); - } + // if (m_ndbvt.m_root) + // { + // const btVector3& mins = m_ndbvt.m_root->volume.Mins(); + // const btVector3& maxs = m_ndbvt.m_root->volume.Maxs(); + // const btScalar csm = getCollisionShape()->getMargin(); + // const btVector3 mrg = btVector3(csm, + // csm, + // csm) * + // 1; // ??? to investigate... + // m_bounds[0] = mins - mrg; + // m_bounds[1] = maxs + mrg; + // if (0 != getBroadphaseHandle()) + // { + // m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(), + // m_bounds[0], + // m_bounds[1], + // m_worldInfo->m_dispatcher); + // } + // } + // else + // { + // m_bounds[0] = + // m_bounds[1] = btVector3(0, 0, 0); + // } + if (m_nodes.size()) + { + btVector3 mins = m_nodes[0].m_x; + btVector3 maxs = m_nodes[0].m_x; + for (int i = 1; i < m_nodes.size(); ++i) + { + for (int d = 0; d < 3; ++d) + { + if (m_nodes[i].m_x[d] > maxs[d]) + maxs[d] = m_nodes[i].m_x[d]; + if (m_nodes[i].m_x[d] < mins[d]) + mins[d] = m_nodes[i].m_x[d]; + } + } + const btScalar csm = getCollisionShape()->getMargin(); + const btVector3 mrg = btVector3(csm, + csm, + csm); + m_bounds[0] = mins - mrg; + m_bounds[1] = maxs + mrg; + if (0 != getBroadphaseHandle()) + { + m_worldInfo->m_broadphase->setAabb(getBroadphaseHandle(), + m_bounds[0], + m_bounds[1], + m_worldInfo->m_dispatcher); + } + } + else + { + m_bounds[0] = + m_bounds[1] = btVector3(0, 0, 0); + } } // @@ -3454,60 +3434,120 @@ void btSoftBody::dampClusters() void btSoftBody::setSpringStiffness(btScalar k) { - for (int i = 0; i < m_links.size(); ++i) - { - m_links[i].Feature::m_material->m_kLST = k; - } - m_repulsionStiffness = k; + for (int i = 0; i < m_links.size(); ++i) + { + m_links[i].Feature::m_material->m_kLST = k; + } + m_repulsionStiffness = k; +} + +void btSoftBody::setGravityFactor(btScalar gravFactor) +{ + m_gravityFactor = gravFactor; +} + +void btSoftBody::setCacheBarycenter(bool cacheBarycenter) +{ + m_cacheBarycenter = cacheBarycenter; } void btSoftBody::initializeDmInverse() { - btScalar unit_simplex_measure = 1./6.; - - for (int i = 0; i < m_tetras.size(); ++i) - { - Tetra &t = m_tetras[i]; - btVector3 c1 = t.m_n[1]->m_x - t.m_n[0]->m_x; - btVector3 c2 = t.m_n[2]->m_x - t.m_n[0]->m_x; - btVector3 c3 = t.m_n[3]->m_x - t.m_n[0]->m_x; - btMatrix3x3 Dm(c1.getX(), c2.getX(), c3.getX(), - c1.getY(), c2.getY(), c3.getY(), - c1.getZ(), c2.getZ(), c3.getZ()); - t.m_element_measure = Dm.determinant() * unit_simplex_measure; - t.m_Dm_inverse = Dm.inverse(); - } + btScalar unit_simplex_measure = 1. / 6.; + + for (int i = 0; i < m_tetras.size(); ++i) + { + Tetra& t = m_tetras[i]; + btVector3 c1 = t.m_n[1]->m_x - t.m_n[0]->m_x; + btVector3 c2 = t.m_n[2]->m_x - t.m_n[0]->m_x; + btVector3 c3 = t.m_n[3]->m_x - t.m_n[0]->m_x; + btMatrix3x3 Dm(c1.getX(), c2.getX(), c3.getX(), + c1.getY(), c2.getY(), c3.getY(), + c1.getZ(), c2.getZ(), c3.getZ()); + t.m_element_measure = Dm.determinant() * unit_simplex_measure; + t.m_Dm_inverse = Dm.inverse(); + + // calculate the first three columns of P^{-1} + btVector3 a = t.m_n[0]->m_x; + btVector3 b = t.m_n[1]->m_x; + btVector3 c = t.m_n[2]->m_x; + btVector3 d = t.m_n[3]->m_x; + + btScalar det = 1 / (a[0] * b[1] * c[2] - a[0] * b[1] * d[2] - a[0] * b[2] * c[1] + a[0] * b[2] * d[1] + a[0] * c[1] * d[2] - a[0] * c[2] * d[1] + a[1] * (-b[0] * c[2] + b[0] * d[2] + b[2] * c[0] - b[2] * d[0] - c[0] * d[2] + c[2] * d[0]) + a[2] * (b[0] * c[1] - b[0] * d[1] + b[1] * (d[0] - c[0]) + c[0] * d[1] - c[1] * d[0]) - b[0] * c[1] * d[2] + b[0] * c[2] * d[1] + b[1] * c[0] * d[2] - b[1] * c[2] * d[0] - b[2] * c[0] * d[1] + b[2] * c[1] * d[0]); + + btScalar P11 = -b[2] * c[1] + d[2] * c[1] + b[1] * c[2] + b[2] * d[1] - c[2] * d[1] - b[1] * d[2]; + btScalar P12 = b[2] * c[0] - d[2] * c[0] - b[0] * c[2] - b[2] * d[0] + c[2] * d[0] + b[0] * d[2]; + btScalar P13 = -b[1] * c[0] + d[1] * c[0] + b[0] * c[1] + b[1] * d[0] - c[1] * d[0] - b[0] * d[1]; + btScalar P21 = a[2] * c[1] - d[2] * c[1] - a[1] * c[2] - a[2] * d[1] + c[2] * d[1] + a[1] * d[2]; + btScalar P22 = -a[2] * c[0] + d[2] * c[0] + a[0] * c[2] + a[2] * d[0] - c[2] * d[0] - a[0] * d[2]; + btScalar P23 = a[1] * c[0] - d[1] * c[0] - a[0] * c[1] - a[1] * d[0] + c[1] * d[0] + a[0] * d[1]; + btScalar P31 = -a[2] * b[1] + d[2] * b[1] + a[1] * b[2] + a[2] * d[1] - b[2] * d[1] - a[1] * d[2]; + btScalar P32 = a[2] * b[0] - d[2] * b[0] - a[0] * b[2] - a[2] * d[0] + b[2] * d[0] + a[0] * d[2]; + btScalar P33 = -a[1] * b[0] + d[1] * b[0] + a[0] * b[1] + a[1] * d[0] - b[1] * d[0] - a[0] * d[1]; + btScalar P41 = a[2] * b[1] - c[2] * b[1] - a[1] * b[2] - a[2] * c[1] + b[2] * c[1] + a[1] * c[2]; + btScalar P42 = -a[2] * b[0] + c[2] * b[0] + a[0] * b[2] + a[2] * c[0] - b[2] * c[0] - a[0] * c[2]; + btScalar P43 = a[1] * b[0] - c[1] * b[0] - a[0] * b[1] - a[1] * c[0] + b[1] * c[0] + a[0] * c[1]; + + btVector4 p1(P11 * det, P21 * det, P31 * det, P41 * det); + btVector4 p2(P12 * det, P22 * det, P32 * det, P42 * det); + btVector4 p3(P13 * det, P23 * det, P33 * det, P43 * det); + + t.m_P_inv[0] = p1; + t.m_P_inv[1] = p2; + t.m_P_inv[2] = p3; + } +} + +static btScalar Dot4(const btVector4& a, const btVector4& b) +{ + return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3]; } void btSoftBody::updateDeformation() { - for (int i = 0; i < m_tetras.size(); ++i) - { - btSoftBody::Tetra& t = m_tetras[i]; - btVector3 c1 = t.m_n[1]->m_q - t.m_n[0]->m_q; - btVector3 c2 = t.m_n[2]->m_q - t.m_n[0]->m_q; - btVector3 c3 = t.m_n[3]->m_q - t.m_n[0]->m_q; - btMatrix3x3 Ds(c1.getX(), c2.getX(), c3.getX(), - c1.getY(), c2.getY(), c3.getY(), - c1.getZ(), c2.getZ(), c3.getZ()); - t.m_F = Ds * t.m_Dm_inverse; - - btSoftBody::TetraScratch& s = m_tetraScratches[i]; - s.m_F = t.m_F; - s.m_J = t.m_F.determinant(); - btMatrix3x3 C = t.m_F.transpose()*t.m_F; - s.m_trace = C[0].getX() + C[1].getY() + C[2].getZ(); - s.m_cofF = t.m_F.adjoint().transpose(); - } + btQuaternion q; + for (int i = 0; i < m_tetras.size(); ++i) + { + btSoftBody::Tetra& t = m_tetras[i]; + btVector3 c1 = t.m_n[1]->m_q - t.m_n[0]->m_q; + btVector3 c2 = t.m_n[2]->m_q - t.m_n[0]->m_q; + btVector3 c3 = t.m_n[3]->m_q - t.m_n[0]->m_q; + btMatrix3x3 Ds(c1.getX(), c2.getX(), c3.getX(), + c1.getY(), c2.getY(), c3.getY(), + c1.getZ(), c2.getZ(), c3.getZ()); + t.m_F = Ds * t.m_Dm_inverse; + + btSoftBody::TetraScratch& s = m_tetraScratches[i]; + s.m_F = t.m_F; + s.m_J = t.m_F.determinant(); + btMatrix3x3 C = t.m_F.transpose() * t.m_F; + s.m_trace = C[0].getX() + C[1].getY() + C[2].getZ(); + s.m_cofF = t.m_F.adjoint().transpose(); + + btVector3 a = t.m_n[0]->m_q; + btVector3 b = t.m_n[1]->m_q; + btVector3 c = t.m_n[2]->m_q; + btVector3 d = t.m_n[3]->m_q; + btVector4 q1(a[0], b[0], c[0], d[0]); + btVector4 q2(a[1], b[1], c[1], d[1]); + btVector4 q3(a[2], b[2], c[2], d[2]); + btMatrix3x3 B(Dot4(q1, t.m_P_inv[0]), Dot4(q1, t.m_P_inv[1]), Dot4(q1, t.m_P_inv[2]), + Dot4(q2, t.m_P_inv[0]), Dot4(q2, t.m_P_inv[1]), Dot4(q2, t.m_P_inv[2]), + Dot4(q3, t.m_P_inv[0]), Dot4(q3, t.m_P_inv[1]), Dot4(q3, t.m_P_inv[2])); + q.setRotation(btVector3(0, 0, 1), 0); + B.extractRotation(q, 0.01); // precision of the rotation is not very important for visual correctness. + btMatrix3x3 Q(q); + s.m_corotation = Q; + } } void btSoftBody::advanceDeformation() { - updateDeformation(); - for (int i = 0; i < m_tetras.size(); ++i) - { - m_tetraScratchesTn[i] = m_tetraScratches[i]; - } + updateDeformation(); + for (int i = 0; i < m_tetras.size(); ++i) + { + m_tetraScratchesTn[i] = m_tetraScratches[i]; + } } // void btSoftBody::Joint::Prepare(btScalar dt, int) @@ -3750,7 +3790,7 @@ void btSoftBody::applyForces() // void btSoftBody::setMaxStress(btScalar maxStress) { - m_cfg.m_maxStress = maxStress; + m_cfg.m_maxStress = maxStress; } // @@ -3765,7 +3805,7 @@ void btSoftBody::interpolateRenderMesh() const Node* p2 = m_renderNodesParents[i][2]; btVector3 normal = btCross(p1->m_x - p0->m_x, p2->m_x - p0->m_x); btVector3 unit_normal = normal.normalized(); - Node& n = m_renderNodes[i]; + RenderNode& n = m_renderNodes[i]; n.m_x.setZero(); for (int j = 0; j < 3; ++j) { @@ -3778,7 +3818,7 @@ void btSoftBody::interpolateRenderMesh() { for (int i = 0; i < m_renderNodes.size(); ++i) { - Node& n = m_renderNodes[i]; + RenderNode& n = m_renderNodes[i]; n.m_x.setZero(); for (int j = 0; j < 4; ++j) { @@ -3793,13 +3833,13 @@ void btSoftBody::interpolateRenderMesh() void btSoftBody::setCollisionQuadrature(int N) { - for (int i = 0; i <= N; ++i) - { - for (int j = 0; i+j <= N; ++j) - { - m_quads.push_back(btVector3(btScalar(i)/btScalar(N), btScalar(j)/btScalar(N), btScalar(N-i-j)/btScalar(N))); - } - } + for (int i = 0; i <= N; ++i) + { + for (int j = 0; i + j <= N; ++j) + { + m_quads.push_back(btVector3(btScalar(i) / btScalar(N), btScalar(j) / btScalar(N), btScalar(N - i - j) / btScalar(N))); + } + } } // @@ -4006,12 +4046,12 @@ btSoftBody::vsolver_t btSoftBody::getSolver(eVSolver::_ solver) void btSoftBody::setSelfCollision(bool useSelfCollision) { - m_useSelfCollision = useSelfCollision; + m_useSelfCollision = useSelfCollision; } bool btSoftBody::useSelfCollision() { - return m_useSelfCollision; + return m_useSelfCollision; } // @@ -4052,23 +4092,23 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap collider.ProcessColObj(this, pcoWrap); } break; - case fCollision::SDF_RD: - { - btRigidBody* prb1 = (btRigidBody*)btRigidBody::upcast(pcoWrap->getCollisionObject()); - if (pcoWrap->getCollisionObject()->isActive() || this->isActive()) - { - const btTransform wtr = pcoWrap->getWorldTransform(); - const btScalar timemargin = 0; - const btScalar basemargin = getCollisionShape()->getMargin(); - btVector3 mins; - btVector3 maxs; - ATTRIBUTE_ALIGNED16(btDbvtVolume) - volume; - pcoWrap->getCollisionShape()->getAabb(wtr, - mins, - maxs); - volume = btDbvtVolume::FromMM(mins, maxs); - volume.Expand(btVector3(basemargin, basemargin, basemargin)); + case fCollision::SDF_RD: + { + btRigidBody* prb1 = (btRigidBody*)btRigidBody::upcast(pcoWrap->getCollisionObject()); + if (pcoWrap->getCollisionObject()->isActive() || this->isActive()) + { + const btTransform wtr = pcoWrap->getWorldTransform(); + const btScalar timemargin = 0; + const btScalar basemargin = getCollisionShape()->getMargin(); + btVector3 mins; + btVector3 maxs; + ATTRIBUTE_ALIGNED16(btDbvtVolume) + volume; + pcoWrap->getCollisionShape()->getAabb(wtr, + mins, + maxs); + volume = btDbvtVolume::FromMM(mins, maxs); + volume.Expand(btVector3(basemargin, basemargin, basemargin)); if (m_cfg.collisions & fCollision::SDF_RDN) { btSoftColliders::CollideSDF_RD docollideNode; @@ -4080,26 +4120,26 @@ void btSoftBody::defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap m_ndbvt.collideTV(m_ndbvt.m_root, volume, docollideNode); } - if (((pcoWrap->getCollisionObject()->getInternalType() == CO_RIGID_BODY) && (m_cfg.collisions & fCollision::SDF_RDF)) || ((pcoWrap->getCollisionObject()->getInternalType() == CO_FEATHERSTONE_LINK) && (m_cfg.collisions & fCollision::SDF_MDF))) - { - btSoftColliders::CollideSDF_RDF docollideFace; - docollideFace.psb = this; - docollideFace.m_colObj1Wrap = pcoWrap; - docollideFace.m_rigidBody = prb1; + if (((pcoWrap->getCollisionObject()->getInternalType() == CO_RIGID_BODY) && (m_cfg.collisions & fCollision::SDF_RDF)) || ((pcoWrap->getCollisionObject()->getInternalType() == CO_FEATHERSTONE_LINK) && (m_cfg.collisions & fCollision::SDF_MDF))) + { + btSoftColliders::CollideSDF_RDF docollideFace; + docollideFace.psb = this; + docollideFace.m_colObj1Wrap = pcoWrap; + docollideFace.m_rigidBody = prb1; docollideFace.dynmargin = basemargin + timemargin; docollideFace.stamargin = basemargin; - m_fdbvt.collideTV(m_fdbvt.m_root, volume, docollideFace); - } - } - } - break; + m_fdbvt.collideTV(m_fdbvt.m_root, volume, docollideFace); + } + } + } + break; } } // void btSoftBody::defaultCollisionHandler(btSoftBody* psb) { - BT_PROFILE("Deformable Collision"); + BT_PROFILE("Deformable Collision"); const int cf = m_cfg.collisions & psb->m_cfg.collisions; switch (cf & fCollision::SVSmask) { @@ -4137,60 +4177,60 @@ void btSoftBody::defaultCollisionHandler(btSoftBody* psb) } } break; - case fCollision::VF_DD: - { - if (!psb->m_softSoftCollision) - return; - if (psb->isActive() || this->isActive()) - { - if (this != psb) - { - btSoftColliders::CollideVF_DD docollide; - /* common */ - docollide.mrg = getCollisionShape()->getMargin() + - psb->getCollisionShape()->getMargin(); - /* psb0 nodes vs psb1 faces */ - if (psb->m_tetras.size() > 0) - docollide.useFaceNormal = true; - else - docollide.useFaceNormal = false; - docollide.psb[0] = this; - docollide.psb[1] = psb; - docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root, - docollide.psb[1]->m_fdbvt.m_root, - docollide); - - /* psb1 nodes vs psb0 faces */ - if (this->m_tetras.size() > 0) - docollide.useFaceNormal = true; - else - docollide.useFaceNormal = false; - docollide.psb[0] = psb; - docollide.psb[1] = this; - docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root, - docollide.psb[1]->m_fdbvt.m_root, - docollide); - } - else - { - if (psb->useSelfCollision()) - { - btSoftColliders::CollideFF_DD docollide; - docollide.mrg = 2*getCollisionShape()->getMargin(); - docollide.psb[0] = this; - docollide.psb[1] = psb; - if (this->m_tetras.size() > 0) - docollide.useFaceNormal = true; - else - docollide.useFaceNormal = false; - /* psb0 faces vs psb0 faces */ - calculateNormalCone(this->m_fdbvnt); - this->m_fdbvt.selfCollideT(m_fdbvnt,docollide); - } - } - } - } - break; + case fCollision::VF_DD: + { + if (!psb->m_softSoftCollision) + return; + if (psb->isActive() || this->isActive()) + { + if (this != psb) + { + btSoftColliders::CollideVF_DD docollide; + /* common */ + docollide.mrg = getCollisionShape()->getMargin() + + psb->getCollisionShape()->getMargin(); + /* psb0 nodes vs psb1 faces */ + if (psb->m_tetras.size() > 0) + docollide.useFaceNormal = true; + else + docollide.useFaceNormal = false; + docollide.psb[0] = this; + docollide.psb[1] = psb; + docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root, + docollide.psb[1]->m_fdbvt.m_root, + docollide); + + /* psb1 nodes vs psb0 faces */ + if (this->m_tetras.size() > 0) + docollide.useFaceNormal = true; + else + docollide.useFaceNormal = false; + docollide.psb[0] = psb; + docollide.psb[1] = this; + docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root, + docollide.psb[1]->m_fdbvt.m_root, + docollide); + } + else + { + if (psb->useSelfCollision()) + { + btSoftColliders::CollideFF_DD docollide; + docollide.mrg = 2 * getCollisionShape()->getMargin(); + docollide.psb[0] = this; + docollide.psb[1] = psb; + if (this->m_tetras.size() > 0) + docollide.useFaceNormal = true; + else + docollide.useFaceNormal = false; + /* psb0 faces vs psb0 faces */ + calculateNormalCone(this->m_fdbvnt); + this->m_fdbvt.selfCollideT(m_fdbvnt, docollide); + } + } + } + } + break; default: { } @@ -4205,7 +4245,7 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb) { btSoftColliders::CollideCCD docollide; /* common */ - docollide.mrg = SAFE_EPSILON; // for rounding error instead of actual margin + docollide.mrg = SAFE_EPSILON; // for rounding error instead of actual margin docollide.dt = psb->m_sst.sdt; /* psb0 nodes vs psb1 faces */ if (psb->m_tetras.size() > 0) @@ -4215,8 +4255,8 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb) docollide.psb[0] = this; docollide.psb[1] = psb; docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root, - docollide.psb[1]->m_fdbvt.m_root, - docollide); + docollide.psb[1]->m_fdbvt.m_root, + docollide); /* psb1 nodes vs psb0 faces */ if (this->m_tetras.size() > 0) docollide.useFaceNormal = true; @@ -4225,8 +4265,8 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb) docollide.psb[0] = psb; docollide.psb[1] = this; docollide.psb[0]->m_ndbvt.collideTT(docollide.psb[0]->m_ndbvt.m_root, - docollide.psb[1]->m_fdbvt.m_root, - docollide); + docollide.psb[1]->m_fdbvt.m_root, + docollide); } else { @@ -4236,14 +4276,14 @@ void btSoftBody::geometricCollisionHandler(btSoftBody* psb) docollide.mrg = SAFE_EPSILON; docollide.psb[0] = this; docollide.psb[1] = psb; - docollide.dt = psb->m_sst.sdt; + docollide.dt = psb->m_sst.sdt; if (this->m_tetras.size() > 0) docollide.useFaceNormal = true; else docollide.useFaceNormal = false; /* psb0 faces vs psb0 faces */ calculateNormalCone(this->m_fdbvnt); // should compute this outside of this scope - this->m_fdbvt.selfCollideT(m_fdbvnt,docollide); + this->m_fdbvt.selfCollideT(m_fdbvnt, docollide); } } } @@ -4648,44 +4688,43 @@ const char* btSoftBody::serialize(void* dataBuffer, class btSerializer* serializ void btSoftBody::updateDeactivation(btScalar timeStep) { - if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION)) - return; + if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == DISABLE_DEACTIVATION)) + return; - if (m_maxSpeedSquared < m_sleepingThreshold * m_sleepingThreshold) - { - m_deactivationTime += timeStep; - } - else - { - m_deactivationTime = btScalar(0.); - setActivationState(0); - } + if (m_maxSpeedSquared < m_sleepingThreshold * m_sleepingThreshold) + { + m_deactivationTime += timeStep; + } + else + { + m_deactivationTime = btScalar(0.); + setActivationState(0); + } } - void btSoftBody::setZeroVelocity() { - for (int i = 0; i < m_nodes.size(); ++i) - { - m_nodes[i].m_v.setZero(); - } + for (int i = 0; i < m_nodes.size(); ++i) + { + m_nodes[i].m_v.setZero(); + } } bool btSoftBody::wantsSleeping() { - if (getActivationState() == DISABLE_DEACTIVATION) - return false; + if (getActivationState() == DISABLE_DEACTIVATION) + return false; - //disable deactivation - if (gDisableDeactivation || (gDeactivationTime == btScalar(0.))) - return false; + //disable deactivation + if (gDisableDeactivation || (gDeactivationTime == btScalar(0.))) + return false; - if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION)) - return true; + if ((getActivationState() == ISLAND_SLEEPING) || (getActivationState() == WANTS_DEACTIVATION)) + return true; - if (m_deactivationTime > gDeactivationTime) - { - return true; - } - return false; + if (m_deactivationTime > gDeactivationTime) + { + return true; + } + return false; } diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBody.h b/thirdparty/bullet/BulletSoftBody/btSoftBody.h index 6a55eccbd2..f578487b8c 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftBody.h +++ b/thirdparty/bullet/BulletSoftBody/btSoftBody.h @@ -35,7 +35,7 @@ subject to the following restrictions: //#else #define btSoftBodyData btSoftBodyFloatData #define btSoftBodyDataName "btSoftBodyFloatData" -static const btScalar OVERLAP_REDUCTION_FACTOR = 0.1; +static const btScalar OVERLAP_REDUCTION_FACTOR = 0.1; static unsigned long seed = 243703; //#endif //BT_USE_DOUBLE_PRECISION @@ -171,10 +171,10 @@ public: CL_SELF = 0x0040, ///Cluster soft body self collision VF_DD = 0x0080, ///Vertex vs face soft vs soft handling - RVDFmask = 0x0f00, /// Rigid versus deformable face mask - SDF_RDF = 0x0100, /// GJK based Rigid vs. deformable face - SDF_MDF = 0x0200, /// GJK based Multibody vs. deformable face - SDF_RDN = 0x0400, /// SDF based Rigid vs. deformable node + RVDFmask = 0x0f00, /// Rigid versus deformable face mask + SDF_RDF = 0x0100, /// GJK based Rigid vs. deformable face + SDF_MDF = 0x0200, /// GJK based Multibody vs. deformable face + SDF_RDN = 0x0400, /// SDF based Rigid vs. deformable node /* presets */ Default = SDF_RS, END @@ -226,7 +226,7 @@ public: const btCollisionObject* m_colObj; /* Rigid body */ btVector3 m_normal; /* Outward normal */ btScalar m_offset; /* Offset from origin */ - btVector3 m_bary; /* Barycentric weights for faces */ + btVector3 m_bary; /* Barycentric weights for faces */ }; /* sMedium */ @@ -258,20 +258,29 @@ public: Material* m_material; // Material }; /* Node */ + struct RenderNode + { + btVector3 m_x; + btVector3 m_uv1; + btVector3 m_normal; + }; struct Node : Feature { btVector3 m_x; // Position btVector3 m_q; // Previous step position/Test position btVector3 m_v; // Velocity - btVector3 m_vn; // Previous step velocity + btVector3 m_vn; // Previous step velocity btVector3 m_f; // Force accumulator btVector3 m_n; // Normal btScalar m_im; // 1/mass btScalar m_area; // Area btDbvtNode* m_leaf; // Leaf data - btScalar m_penetration; // depth of penetration + int m_constrained; // depth of penetration int m_battach : 1; // Attached - int index; + int index; + btVector3 m_splitv; // velocity associated with split impulse + btMatrix3x3 m_effectiveMass; // effective mass in contact + btMatrix3x3 m_effectiveMass_inv; // inverse of effective mass }; /* Link */ ATTRIBUTE_ALIGNED16(struct) @@ -287,40 +296,47 @@ public: BT_DECLARE_ALIGNED_ALLOCATOR(); }; + struct RenderFace + { + RenderNode* m_n[3]; // Node pointers + }; + /* Face */ struct Face : Feature { - Node* m_n[3]; // Node pointers - btVector3 m_normal; // Normal - btScalar m_ra; // Rest area - btDbvtNode* m_leaf; // Leaf data - btVector4 m_pcontact; // barycentric weights of the persistent contact - btVector3 m_n0, m_n1, m_vn; - int m_index; + Node* m_n[3]; // Node pointers + btVector3 m_normal; // Normal + btScalar m_ra; // Rest area + btDbvtNode* m_leaf; // Leaf data + btVector4 m_pcontact; // barycentric weights of the persistent contact + btVector3 m_n0, m_n1, m_vn; + int m_index; }; /* Tetra */ struct Tetra : Feature { - Node* m_n[4]; // Node pointers - btScalar m_rv; // Rest volume - btDbvtNode* m_leaf; // Leaf data - btVector3 m_c0[4]; // gradients - btScalar m_c1; // (4*kVST)/(im0+im1+im2+im3) - btScalar m_c2; // m_c1/sum(|g0..3|^2) - btMatrix3x3 m_Dm_inverse; // rest Dm^-1 - btMatrix3x3 m_F; - btScalar m_element_measure; + Node* m_n[4]; // Node pointers + btScalar m_rv; // Rest volume + btDbvtNode* m_leaf; // Leaf data + btVector3 m_c0[4]; // gradients + btScalar m_c1; // (4*kVST)/(im0+im1+im2+im3) + btScalar m_c2; // m_c1/sum(|g0..3|^2) + btMatrix3x3 m_Dm_inverse; // rest Dm^-1 + btMatrix3x3 m_F; + btScalar m_element_measure; + btVector4 m_P_inv[3]; // first three columns of P_inv matrix + }; + + /* TetraScratch */ + struct TetraScratch + { + btMatrix3x3 m_F; // deformation gradient F + btScalar m_trace; // trace of F^T * F + btScalar m_J; // det(F) + btMatrix3x3 m_cofF; // cofactor of F + btMatrix3x3 m_corotation; // corotatio of the tetra }; - - /* TetraScratch */ - struct TetraScratch - { - btMatrix3x3 m_F; // deformation gradient F - btScalar m_trace; // trace of F^T * F - btScalar m_J; // det(F) - btMatrix3x3 m_cofF; // cofactor of F - }; - + /* RContact */ struct RContact { @@ -331,67 +347,68 @@ public: btScalar m_c2; // ima*dt btScalar m_c3; // Friction btScalar m_c4; // Hardness - - // jacobians and unit impulse responses for multibody - btMultiBodyJacobianData jacobianData_normal; - btMultiBodyJacobianData jacobianData_t1; - btMultiBodyJacobianData jacobianData_t2; - btVector3 t1; - btVector3 t2; + + // jacobians and unit impulse responses for multibody + btMultiBodyJacobianData jacobianData_normal; + btMultiBodyJacobianData jacobianData_t1; + btMultiBodyJacobianData jacobianData_t2; + btVector3 t1; + btVector3 t2; }; - - class DeformableRigidContact - { - public: - sCti m_cti; // Contact infos - btMatrix3x3 m_c0; // Impulse matrix - btVector3 m_c1; // Relative anchor - btScalar m_c2; // inverse mass of node/face - btScalar m_c3; // Friction - btScalar m_c4; // Hardness - - // jacobians and unit impulse responses for multibody - btMultiBodyJacobianData jacobianData_normal; - btMultiBodyJacobianData jacobianData_t1; - btMultiBodyJacobianData jacobianData_t2; - btVector3 t1; - btVector3 t2; - }; - - class DeformableNodeRigidContact : public DeformableRigidContact - { - public: - Node* m_node; // Owner node - }; - - class DeformableNodeRigidAnchor : public DeformableNodeRigidContact - { - public: - btVector3 m_local; // Anchor position in body space - }; - - class DeformableFaceRigidContact : public DeformableRigidContact - { - public: - Face* m_face; // Owner face - btVector3 m_contactPoint; // Contact point - btVector3 m_bary; // Barycentric weights - btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v; - }; - - struct DeformableFaceNodeContact - { - Node* m_node; // Node - Face* m_face; // Face - btVector3 m_bary; // Barycentric weights - btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v; - btVector3 m_normal; // Normal - btScalar m_margin; // Margin - btScalar m_friction; // Friction - btScalar m_imf; // inverse mass of the face at contact point - btScalar m_c0; // scale of the impulse matrix; - }; - + + class DeformableRigidContact + { + public: + sCti m_cti; // Contact infos + btMatrix3x3 m_c0; // Impulse matrix + btVector3 m_c1; // Relative anchor + btScalar m_c2; // inverse mass of node/face + btScalar m_c3; // Friction + btScalar m_c4; // Hardness + btMatrix3x3 m_c5; // inverse effective mass + + // jacobians and unit impulse responses for multibody + btMultiBodyJacobianData jacobianData_normal; + btMultiBodyJacobianData jacobianData_t1; + btMultiBodyJacobianData jacobianData_t2; + btVector3 t1; + btVector3 t2; + }; + + class DeformableNodeRigidContact : public DeformableRigidContact + { + public: + Node* m_node; // Owner node + }; + + class DeformableNodeRigidAnchor : public DeformableNodeRigidContact + { + public: + btVector3 m_local; // Anchor position in body space + }; + + class DeformableFaceRigidContact : public DeformableRigidContact + { + public: + Face* m_face; // Owner face + btVector3 m_contactPoint; // Contact point + btVector3 m_bary; // Barycentric weights + btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v; + }; + + struct DeformableFaceNodeContact + { + Node* m_node; // Node + Face* m_face; // Face + btVector3 m_bary; // Barycentric weights + btVector3 m_weights; // v_contactPoint * m_weights[i] = m_face->m_node[i]->m_v; + btVector3 m_normal; // Normal + btScalar m_margin; // Margin + btScalar m_friction; // Friction + btScalar m_imf; // inverse mass of the face at contact point + btScalar m_c0; // scale of the impulse matrix; + }; + /* SContact */ struct SContact { @@ -718,19 +735,19 @@ public: tVSolverArray m_vsequence; // Velocity solvers sequence tPSolverArray m_psequence; // Position solvers sequence tPSolverArray m_dsequence; // Drift solvers sequence - btScalar drag; // deformable air drag - btScalar m_maxStress; // Maximum principle first Piola stress + btScalar drag; // deformable air drag + btScalar m_maxStress; // Maximum principle first Piola stress }; /* SolverState */ struct SolverState { //if you add new variables, always initialize them! SolverState() - :sdt(0), - isdt(0), - velmrg(0), - radmrg(0), - updmrg(0) + : sdt(0), + isdt(0), + velmrg(0), + radmrg(0), + updmrg(0) { } btScalar sdt; // dt*timescale @@ -769,9 +786,11 @@ public: typedef btAlignedObjectArray<Cluster*> tClusterArray; typedef btAlignedObjectArray<Note> tNoteArray; typedef btAlignedObjectArray<Node> tNodeArray; + typedef btAlignedObjectArray< RenderNode> tRenderNodeArray; typedef btAlignedObjectArray<btDbvtNode*> tLeafArray; typedef btAlignedObjectArray<Link> tLinkArray; typedef btAlignedObjectArray<Face> tFaceArray; + typedef btAlignedObjectArray<RenderFace> tRenderFaceArray; typedef btAlignedObjectArray<Tetra> tTetraArray; typedef btAlignedObjectArray<Anchor> tAnchorArray; typedef btAlignedObjectArray<RContact> tRContactArray; @@ -791,40 +810,42 @@ public: btSoftBodyWorldInfo* m_worldInfo; // World info tNoteArray m_notes; // Notes tNodeArray m_nodes; // Nodes - tNodeArray m_renderNodes; // Nodes + tRenderNodeArray m_renderNodes; // Render Nodes tLinkArray m_links; // Links tFaceArray m_faces; // Faces - tFaceArray m_renderFaces; // Faces + tRenderFaceArray m_renderFaces; // Faces tTetraArray m_tetras; // Tetras - btAlignedObjectArray<TetraScratch> m_tetraScratches; - btAlignedObjectArray<TetraScratch> m_tetraScratchesTn; - tAnchorArray m_anchors; // Anchors - btAlignedObjectArray<DeformableNodeRigidAnchor> m_deformableAnchors; - tRContactArray m_rcontacts; // Rigid contacts - btAlignedObjectArray<DeformableNodeRigidContact> m_nodeRigidContacts; - btAlignedObjectArray<DeformableFaceNodeContact> m_faceNodeContacts; - btAlignedObjectArray<DeformableFaceRigidContact> m_faceRigidContacts; - tSContactArray m_scontacts; // Soft contacts - tJointArray m_joints; // Joints - tMaterialArray m_materials; // Materials - btScalar m_timeacc; // Time accumulator - btVector3 m_bounds[2]; // Spatial bounds - bool m_bUpdateRtCst; // Update runtime constants - btDbvt m_ndbvt; // Nodes tree - btDbvt m_fdbvt; // Faces tree - btDbvntNode* m_fdbvnt; // Faces tree with normals - btDbvt m_cdbvt; // Clusters tree - tClusterArray m_clusters; // Clusters - btScalar m_dampingCoefficient; // Damping Coefficient + btAlignedObjectArray<TetraScratch> m_tetraScratches; + btAlignedObjectArray<TetraScratch> m_tetraScratchesTn; + tAnchorArray m_anchors; // Anchors + btAlignedObjectArray<DeformableNodeRigidAnchor> m_deformableAnchors; + tRContactArray m_rcontacts; // Rigid contacts + btAlignedObjectArray<DeformableNodeRigidContact> m_nodeRigidContacts; + btAlignedObjectArray<DeformableFaceNodeContact> m_faceNodeContacts; + btAlignedObjectArray<DeformableFaceRigidContact> m_faceRigidContacts; + tSContactArray m_scontacts; // Soft contacts + tJointArray m_joints; // Joints + tMaterialArray m_materials; // Materials + btScalar m_timeacc; // Time accumulator + btVector3 m_bounds[2]; // Spatial bounds + bool m_bUpdateRtCst; // Update runtime constants + btDbvt m_ndbvt; // Nodes tree + btDbvt m_fdbvt; // Faces tree + btDbvntNode* m_fdbvnt; // Faces tree with normals + btDbvt m_cdbvt; // Clusters tree + tClusterArray m_clusters; // Clusters + btScalar m_dampingCoefficient; // Damping Coefficient btScalar m_sleepingThreshold; btScalar m_maxSpeedSquared; - btAlignedObjectArray<btVector3> m_quads; // quadrature points for collision detection + btAlignedObjectArray<btVector3> m_quads; // quadrature points for collision detection btScalar m_repulsionStiffness; - btAlignedObjectArray<btVector3> m_X; // initial positions + btScalar m_gravityFactor; + bool m_cacheBarycenter; + btAlignedObjectArray<btVector3> m_X; // initial positions btAlignedObjectArray<btVector4> m_renderNodesInterpolationWeights; btAlignedObjectArray<btAlignedObjectArray<const btSoftBody::Node*> > m_renderNodesParents; - btAlignedObjectArray<btScalar> m_z; // vertical distance used in extrapolation + btAlignedObjectArray<btScalar> m_z; // vertical distance used in extrapolation bool m_useSelfCollision; bool m_softSoftCollision; @@ -856,11 +877,11 @@ public: { return m_worldInfo; } - - void setDampingCoefficient(btScalar damping_coeff) - { - m_dampingCoefficient = damping_coeff; - } + + void setDampingCoefficient(btScalar damping_coeff) + { + m_dampingCoefficient = damping_coeff; + } ///@todo: avoid internal softbody shape hack and move collision code to collision library virtual void setCollisionShape(btCollisionShape* collisionShape) @@ -921,11 +942,12 @@ public: Material* mat = 0); /* Append anchor */ - void appendDeformableAnchor(int node, btRigidBody* body); - void appendDeformableAnchor(int node, btMultiBodyLinkCollider* link); - void appendAnchor(int node, + void appendDeformableAnchor(int node, btRigidBody* body); + void appendDeformableAnchor(int node, btMultiBodyLinkCollider* link); + void appendAnchor(int node, btRigidBody* body, bool disableCollisionBetweenLinkedBodies = false, btScalar influence = 1); void appendAnchor(int node, btRigidBody* body, const btVector3& localPivot, bool disableCollisionBetweenLinkedBodies = false, btScalar influence = 1); + void removeAnchor(int node); /* Append linear joint */ void appendLinearJoint(const LJoint::Specs& specs, Cluster* body0, Body body1); void appendLinearJoint(const LJoint::Specs& specs, Body body = Body()); @@ -976,10 +998,10 @@ public: void setLinearVelocity(const btVector3& linVel); /* Set the angular velocity of the center of mass */ void setAngularVelocity(const btVector3& angVel); - /* Get best fit rigid transform */ - btTransform getRigidTransform(); - /* Transform to given pose */ - void transformTo(const btTransform& trs); + /* Get best fit rigid transform */ + btTransform getRigidTransform(); + /* Transform to given pose */ + void transformTo(const btTransform& trs); /* Transform */ void transform(const btTransform& trs); /* Translate */ @@ -1068,11 +1090,11 @@ public: /* defaultCollisionHandlers */ void defaultCollisionHandler(const btCollisionObjectWrapper* pcoWrap); void defaultCollisionHandler(btSoftBody* psb); - void setSelfCollision(bool useSelfCollision); - bool useSelfCollision(); - void updateDeactivation(btScalar timeStep); - void setZeroVelocity(); - bool wantsSleeping(); + void setSelfCollision(bool useSelfCollision); + bool useSelfCollision(); + void updateDeactivation(btScalar timeStep); + void setZeroVelocity(); + bool wantsSleeping(); // // Functionality to deal with new accelerated solvers. @@ -1151,8 +1173,8 @@ public: void rebuildNodeTree(); btVector3 evaluateCom() const; bool checkDeformableContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const; - bool checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap, Face& f, btVector3& contact_point, btVector3& bary, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const; - bool checkContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti) const; + bool checkDeformableFaceContact(const btCollisionObjectWrapper* colObjWrap, Face& f, btVector3& contact_point, btVector3& bary, btScalar margin, btSoftBody::sCti& cti, bool predict = false) const; + bool checkContact(const btCollisionObjectWrapper* colObjWrap, const btVector3& x, btScalar margin, btSoftBody::sCti& cti) const; void updateNormals(); void updateBounds(); void updatePose(); @@ -1166,14 +1188,16 @@ public: void solveClusters(btScalar sor); void applyClusters(bool drift); void dampClusters(); - void setSpringStiffness(btScalar k); - void initializeDmInverse(); - void updateDeformation(); - void advanceDeformation(); + void setSpringStiffness(btScalar k); + void setGravityFactor(btScalar gravFactor); + void setCacheBarycenter(bool cacheBarycenter); + void initializeDmInverse(); + void updateDeformation(); + void advanceDeformation(); void applyForces(); - void setMaxStress(btScalar maxStress); - void interpolateRenderMesh(); - void setCollisionQuadrature(int N); + void setMaxStress(btScalar maxStress); + void interpolateRenderMesh(); + void setCollisionQuadrature(int N); static void PSolve_Anchors(btSoftBody* psb, btScalar kst, btScalar ti); static void PSolve_RContacts(btSoftBody* psb, btScalar kst, btScalar ti); static void PSolve_SContacts(btSoftBody* psb, btScalar, btScalar ti); @@ -1182,14 +1206,15 @@ public: static psolver_t getSolver(ePSolver::_ solver); static vsolver_t getSolver(eVSolver::_ solver); void geometricCollisionHandler(btSoftBody* psb); -#define SAFE_EPSILON SIMD_EPSILON*100.0 +#define SAFE_EPSILON SIMD_EPSILON * 100.0 void updateNode(btDbvtNode* node, bool use_velocity, bool margin) { if (node->isleaf()) { btSoftBody::Node* n = (btSoftBody::Node*)(node->data); - ATTRIBUTE_ALIGNED16(btDbvtVolume) vol; - btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol; + btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision if (use_velocity) { btVector3 points[2] = {n->m_x, n->m_x + m_sst.sdt * n->m_v}; @@ -1207,38 +1232,40 @@ public: { updateNode(node->childs[0], use_velocity, margin); updateNode(node->childs[1], use_velocity, margin); - ATTRIBUTE_ALIGNED16(btDbvtVolume) vol; + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol; Merge(node->childs[0]->volume, node->childs[1]->volume, vol); node->volume = vol; } } - - void updateNodeTree(bool use_velocity, bool margin) + + void updateNodeTree(bool use_velocity, bool margin) { if (m_ndbvt.m_root) updateNode(m_ndbvt.m_root, use_velocity, margin); } - template <class DBVTNODE> // btDbvtNode or btDbvntNode + template <class DBVTNODE> // btDbvtNode or btDbvntNode void updateFace(DBVTNODE* node, bool use_velocity, bool margin) { if (node->isleaf()) { btSoftBody::Face* f = (btSoftBody::Face*)(node->data); - btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision - ATTRIBUTE_ALIGNED16(btDbvtVolume) vol; + btScalar pad = margin ? m_sst.radmrg : SAFE_EPSILON; // use user defined margin or margin for floating point precision + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol; if (use_velocity) { btVector3 points[6] = {f->m_n[0]->m_x, f->m_n[0]->m_x + m_sst.sdt * f->m_n[0]->m_v, - f->m_n[1]->m_x, f->m_n[1]->m_x + m_sst.sdt * f->m_n[1]->m_v, - f->m_n[2]->m_x, f->m_n[2]->m_x + m_sst.sdt * f->m_n[2]->m_v}; + f->m_n[1]->m_x, f->m_n[1]->m_x + m_sst.sdt * f->m_n[1]->m_v, + f->m_n[2]->m_x, f->m_n[2]->m_x + m_sst.sdt * f->m_n[2]->m_v}; vol = btDbvtVolume::FromPoints(points, 6); } else { btVector3 points[3] = {f->m_n[0]->m_x, - f->m_n[1]->m_x, - f->m_n[2]->m_x}; + f->m_n[1]->m_x, + f->m_n[2]->m_x}; vol = btDbvtVolume::FromPoints(points, 3); } vol.Expand(btVector3(pad, pad, pad)); @@ -1249,7 +1276,8 @@ public: { updateFace(node->childs[0], use_velocity, margin); updateFace(node->childs[1], use_velocity, margin); - ATTRIBUTE_ALIGNED16(btDbvtVolume) vol; + ATTRIBUTE_ALIGNED16(btDbvtVolume) + vol; Merge(node->childs[0]->volume, node->childs[1]->volume, vol); node->volume = vol; } @@ -1271,7 +1299,7 @@ public: return (a * coord.x() + b * coord.y() + c * coord.z()); } - void applyRepulsionForce(btScalar timeStep, bool applySpringForce) + void applyRepulsionForce(btScalar timeStep, bool applySpringForce) { btAlignedObjectArray<int> indices; { @@ -1297,58 +1325,60 @@ public: const btVector3& n = c.m_normal; btVector3 l = node->m_x - BaryEval(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, w); btScalar d = c.m_margin - n.dot(l); - d = btMax(btScalar(0),d); - + d = btMax(btScalar(0), d); + const btVector3& va = node->m_v; btVector3 vb = BaryEval(face->m_n[0]->m_v, face->m_n[1]->m_v, face->m_n[2]->m_v, w); btVector3 vr = va - vb; - const btScalar vn = btDot(vr, n); // dn < 0 <==> opposing + const btScalar vn = btDot(vr, n); // dn < 0 <==> opposing if (vn > OVERLAP_REDUCTION_FACTOR * d / timeStep) continue; - btVector3 vt = vr - vn*n; + btVector3 vt = vr - vn * n; btScalar I = 0; - btScalar mass = node->m_im == 0 ? 0 : btScalar(1)/node->m_im; + btScalar mass = node->m_im == 0 ? 0 : btScalar(1) / node->m_im; if (applySpringForce) I = -btMin(m_repulsionStiffness * timeStep * d, mass * (OVERLAP_REDUCTION_FACTOR * d / timeStep - vn)); if (vn < 0) I += 0.5 * mass * vn; - btScalar face_penetration = 0, node_penetration = node->m_penetration; + int face_penetration = 0, node_penetration = node->m_constrained; for (int i = 0; i < 3; ++i) - face_penetration = btMax(face_penetration, face->m_n[i]->m_penetration); - btScalar I_tilde = .5 *I /(1.0+w.length2()); - -// double the impulse if node or face is constrained. - if (face_penetration > 0 || node_penetration > 0) - I_tilde *= 2.0; - if (face_penetration <= node_penetration) + face_penetration |= face->m_n[i]->m_constrained; + btScalar I_tilde = 2.0 * I / (1.0 + w.length2()); + + // double the impulse if node or face is constrained. + if (face_penetration > 0 || node_penetration > 0) + { + I_tilde *= 2.0; + } + if (face_penetration <= 0) { for (int j = 0; j < 3; ++j) - face->m_n[j]->m_v += w[j]*n*I_tilde*node->m_im; + face->m_n[j]->m_v += w[j] * n * I_tilde * node->m_im; } - if (face_penetration >= node_penetration) + if (node_penetration <= 0) { - node->m_v -= I_tilde*node->m_im*n; + node->m_v -= I_tilde * node->m_im * n; } - + // apply frictional impulse btScalar vt_norm = vt.safeNorm(); if (vt_norm > SIMD_EPSILON) { btScalar delta_vn = -2 * I * node->m_im; btScalar mu = c.m_friction; - btScalar vt_new = btMax(btScalar(1) - mu * delta_vn / (vt_norm + SIMD_EPSILON), btScalar(0))*vt_norm; - I = 0.5 * mass * (vt_norm-vt_new); + btScalar vt_new = btMax(btScalar(1) - mu * delta_vn / (vt_norm + SIMD_EPSILON), btScalar(0)) * vt_norm; + I = 0.5 * mass * (vt_norm - vt_new); vt.safeNormalize(); - I_tilde = .5 *I /(1.0+w.length2()); -// double the impulse if node or face is constrained. -// if (face_penetration > 0 || node_penetration > 0) -// I_tilde *= 2.0; - if (face_penetration <= node_penetration) + I_tilde = 2.0 * I / (1.0 + w.length2()); + // double the impulse if node or face is constrained. + if (face_penetration > 0 || node_penetration > 0) + I_tilde *= 2.0; + if (face_penetration <= 0) { for (int j = 0; j < 3; ++j) face->m_n[j]->m_v += w[j] * vt * I_tilde * (face->m_n[j])->m_im; } - if (face_penetration >= node_penetration) + if (node_penetration <= 0) { node->m_v -= I_tilde * node->m_im * vt; } @@ -1356,7 +1386,7 @@ public: } } virtual int calculateSerializeBufferSize() const; - + ///fills the dataBuffer and returns the struct name (and 0 on failure) virtual const char* serialize(void* dataBuffer, class btSerializer* serializer) const; }; diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp index c1a87c7d57..f63e48f9a5 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp +++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.cpp @@ -727,7 +727,7 @@ btSoftBody* btSoftBodyHelpers::CreatePatch(btSoftBodyWorldInfo& worldInfo, const int resy, int fixeds, bool gendiags, - btScalar perturbation) + btScalar perturbation) { #define IDX(_x_, _y_) ((_y_)*rx + (_x_)) /* Create nodes */ @@ -747,12 +747,12 @@ btSoftBody* btSoftBodyHelpers::CreatePatch(btSoftBodyWorldInfo& worldInfo, const for (int ix = 0; ix < rx; ++ix) { const btScalar tx = ix / (btScalar)(rx - 1); - btScalar pert = perturbation * btScalar(rand())/RAND_MAX; - btVector3 temp1 = py1; - temp1.setY(py1.getY() + pert); - btVector3 temp = py0; - pert = perturbation * btScalar(rand())/RAND_MAX; - temp.setY(py0.getY() + pert); + btScalar pert = perturbation * btScalar(rand()) / RAND_MAX; + btVector3 temp1 = py1; + temp1.setY(py1.getY() + pert); + btVector3 temp = py0; + pert = perturbation * btScalar(rand()) / RAND_MAX; + temp.setY(py0.getY() + pert); x[IDX(ix, iy)] = lerp(temp, temp1, tx); m[IDX(ix, iy)] = 1; } @@ -1233,9 +1233,9 @@ if(face&&face[0]) } } } - psb->initializeDmInverse(); - psb->m_tetraScratches.resize(psb->m_tetras.size()); - psb->m_tetraScratchesTn.resize(psb->m_tetras.size()); + psb->initializeDmInverse(); + psb->m_tetraScratches.resize(psb->m_tetras.size()); + psb->m_tetraScratchesTn.resize(psb->m_tetras.size()); printf("Nodes: %u\r\n", psb->m_nodes.size()); printf("Links: %u\r\n", psb->m_links.size()); printf("Faces: %u\r\n", psb->m_faces.size()); @@ -1245,61 +1245,62 @@ if(face&&face[0]) btSoftBody* btSoftBodyHelpers::CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, const char* vtk_file) { - std::ifstream fs; - fs.open(vtk_file); - btAssert(fs); - - typedef btAlignedObjectArray<int> Index; - std::string line; - btAlignedObjectArray<btVector3> X; - btVector3 position; - btAlignedObjectArray<Index> indices; - bool reading_points = false; - bool reading_tets = false; - size_t n_points = 0; - size_t n_tets = 0; - size_t x_count = 0; - size_t indices_count = 0; - while (std::getline(fs, line)) - { - std::stringstream ss(line); - if (line.size() == (size_t)(0)) - { - } - else if (line.substr(0, 6) == "POINTS") - { - reading_points = true; - reading_tets = false; - ss.ignore(128, ' '); // ignore "POINTS" - ss >> n_points; - X.resize(n_points); - } - else if (line.substr(0, 5) == "CELLS") - { - reading_points = false; - reading_tets = true; - ss.ignore(128, ' '); // ignore "CELLS" - ss >> n_tets; - indices.resize(n_tets); - } - else if (line.substr(0, 10) == "CELL_TYPES") - { - reading_points = false; - reading_tets = false; - } - else if (reading_points) - { - btScalar p; - ss >> p; - position.setX(p); - ss >> p; - position.setY(p); - ss >> p; - position.setZ(p); - X[x_count++] = position; - } - else if (reading_tets) - { + std::ifstream fs; + fs.open(vtk_file); + btAssert(fs); + + typedef btAlignedObjectArray<int> Index; + std::string line; + btAlignedObjectArray<btVector3> X; + btVector3 position; + btAlignedObjectArray<Index> indices; + bool reading_points = false; + bool reading_tets = false; + size_t n_points = 0; + size_t n_tets = 0; + size_t x_count = 0; + size_t indices_count = 0; + while (std::getline(fs, line)) + { + std::stringstream ss(line); + if (line.size() == (size_t)(0)) + { + } + else if (line.substr(0, 6) == "POINTS") + { + reading_points = true; + reading_tets = false; + ss.ignore(128, ' '); // ignore "POINTS" + ss >> n_points; + X.resize(n_points); + } + else if (line.substr(0, 5) == "CELLS") + { + reading_points = false; + reading_tets = true; + ss.ignore(128, ' '); // ignore "CELLS" + ss >> n_tets; + indices.resize(n_tets); + } + else if (line.substr(0, 10) == "CELL_TYPES") + { + reading_points = false; + reading_tets = false; + } + else if (reading_points) + { + btScalar p; + ss >> p; + position.setX(p); + ss >> p; + position.setY(p); + ss >> p; + position.setZ(p); + //printf("v %f %f %f\n", position.getX(), position.getY(), position.getZ()); + X[x_count++] = position; + } + else if (reading_tets) + { int d; ss >> d; if (d != 4) @@ -1308,317 +1309,355 @@ btSoftBody* btSoftBodyHelpers::CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, fs.close(); return 0; } - ss.ignore(128, ' '); // ignore "4" - Index tet; - tet.resize(4); - for (size_t i = 0; i < 4; i++) - { - ss >> tet[i]; - printf("%d ", tet[i]); - } - printf("\n"); - indices[indices_count++] = tet; - } - } - btSoftBody* psb = new btSoftBody(&worldInfo, n_points, &X[0], 0); - - for (int i = 0; i < n_tets; ++i) - { - const Index& ni = indices[i]; - psb->appendTetra(ni[0], ni[1], ni[2], ni[3]); - { - psb->appendLink(ni[0], ni[1], 0, true); - psb->appendLink(ni[1], ni[2], 0, true); - psb->appendLink(ni[2], ni[0], 0, true); - psb->appendLink(ni[0], ni[3], 0, true); - psb->appendLink(ni[1], ni[3], 0, true); - psb->appendLink(ni[2], ni[3], 0, true); - } - } - - - generateBoundaryFaces(psb); - psb->initializeDmInverse(); - psb->m_tetraScratches.resize(psb->m_tetras.size()); - psb->m_tetraScratchesTn.resize(psb->m_tetras.size()); - printf("Nodes: %u\r\n", psb->m_nodes.size()); - printf("Links: %u\r\n", psb->m_links.size()); - printf("Faces: %u\r\n", psb->m_faces.size()); - printf("Tetras: %u\r\n", psb->m_tetras.size()); - - fs.close(); - return psb; + ss.ignore(128, ' '); // ignore "4" + Index tet; + tet.resize(4); + for (size_t i = 0; i < 4; i++) + { + ss >> tet[i]; + //printf("%d ", tet[i]); + } + //printf("\n"); + indices[indices_count++] = tet; + } + } + btSoftBody* psb = new btSoftBody(&worldInfo, n_points, &X[0], 0); + + for (int i = 0; i < n_tets; ++i) + { + const Index& ni = indices[i]; + psb->appendTetra(ni[0], ni[1], ni[2], ni[3]); + { + psb->appendLink(ni[0], ni[1], 0, true); + psb->appendLink(ni[1], ni[2], 0, true); + psb->appendLink(ni[2], ni[0], 0, true); + psb->appendLink(ni[0], ni[3], 0, true); + psb->appendLink(ni[1], ni[3], 0, true); + psb->appendLink(ni[2], ni[3], 0, true); + } + } + + generateBoundaryFaces(psb); + psb->initializeDmInverse(); + psb->m_tetraScratches.resize(psb->m_tetras.size()); + psb->m_tetraScratchesTn.resize(psb->m_tetras.size()); + printf("Nodes: %u\r\n", psb->m_nodes.size()); + printf("Links: %u\r\n", psb->m_links.size()); + printf("Faces: %u\r\n", psb->m_faces.size()); + printf("Tetras: %u\r\n", psb->m_tetras.size()); + + fs.close(); + return psb; } void btSoftBodyHelpers::generateBoundaryFaces(btSoftBody* psb) { - int counter = 0; - for (int i = 0; i < psb->m_nodes.size(); ++i) - { - psb->m_nodes[i].index = counter++; - } - typedef btAlignedObjectArray<int> Index; - btAlignedObjectArray<Index> indices; - indices.resize(psb->m_tetras.size()); - for (int i = 0; i < indices.size(); ++i) - { - Index index; - index.push_back(psb->m_tetras[i].m_n[0]->index); - index.push_back(psb->m_tetras[i].m_n[1]->index); - index.push_back(psb->m_tetras[i].m_n[2]->index); - index.push_back(psb->m_tetras[i].m_n[3]->index); - indices[i] = index; - } - - std::map<std::vector<int>, std::vector<int> > dict; - for (int i = 0; i < indices.size(); ++i) - { - for (int j = 0; j < 4; ++j) - { - std::vector<int> f; - if (j == 0) - { - f.push_back(indices[i][1]); - f.push_back(indices[i][0]); - f.push_back(indices[i][2]); - } - if (j == 1) - { - f.push_back(indices[i][3]); - f.push_back(indices[i][0]); - f.push_back(indices[i][1]); - } - if (j == 2) - { - f.push_back(indices[i][3]); - f.push_back(indices[i][1]); - f.push_back(indices[i][2]); - } - if (j == 3) - { - f.push_back(indices[i][2]); - f.push_back(indices[i][0]); - f.push_back(indices[i][3]); - } - std::vector<int> f_sorted = f; - std::sort(f_sorted.begin(), f_sorted.end()); - if (dict.find(f_sorted) != dict.end()) - { - dict.erase(f_sorted); - } - else - { - dict.insert(std::make_pair(f_sorted, f)); - } - } - } - - for (std::map<std::vector<int>, std::vector<int> >::iterator it = dict.begin(); it != dict.end(); ++it) - { - std::vector<int> f = it->second; - psb->appendFace(f[0], f[1], f[2]); - } + int counter = 0; + for (int i = 0; i < psb->m_nodes.size(); ++i) + { + psb->m_nodes[i].index = counter++; + } + typedef btAlignedObjectArray<int> Index; + btAlignedObjectArray<Index> indices; + indices.resize(psb->m_tetras.size()); + for (int i = 0; i < indices.size(); ++i) + { + Index index; + index.push_back(psb->m_tetras[i].m_n[0]->index); + index.push_back(psb->m_tetras[i].m_n[1]->index); + index.push_back(psb->m_tetras[i].m_n[2]->index); + index.push_back(psb->m_tetras[i].m_n[3]->index); + indices[i] = index; + } + + std::map<std::vector<int>, std::vector<int> > dict; + for (int i = 0; i < indices.size(); ++i) + { + for (int j = 0; j < 4; ++j) + { + std::vector<int> f; + if (j == 0) + { + f.push_back(indices[i][1]); + f.push_back(indices[i][0]); + f.push_back(indices[i][2]); + } + if (j == 1) + { + f.push_back(indices[i][3]); + f.push_back(indices[i][0]); + f.push_back(indices[i][1]); + } + if (j == 2) + { + f.push_back(indices[i][3]); + f.push_back(indices[i][1]); + f.push_back(indices[i][2]); + } + if (j == 3) + { + f.push_back(indices[i][2]); + f.push_back(indices[i][0]); + f.push_back(indices[i][3]); + } + std::vector<int> f_sorted = f; + std::sort(f_sorted.begin(), f_sorted.end()); + if (dict.find(f_sorted) != dict.end()) + { + dict.erase(f_sorted); + } + else + { + dict.insert(std::make_pair(f_sorted, f)); + } + } + } + + for (std::map<std::vector<int>, std::vector<int> >::iterator it = dict.begin(); it != dict.end(); ++it) + { + std::vector<int> f = it->second; + psb->appendFace(f[0], f[1], f[2]); + //printf("f %d %d %d\n", f[0] + 1, f[1] + 1, f[2] + 1); + } } +//Write the surface mesh to an obj file. void btSoftBodyHelpers::writeObj(const char* filename, const btSoftBody* psb) { - std::ofstream fs; - fs.open(filename); - btAssert(fs); - for (int i = 0; i < psb->m_nodes.size(); ++i) - { - fs << "v"; - for (int d = 0; d < 3; d++) - { - fs << " " << psb->m_nodes[i].m_x[d]; - } - fs << "\n"; - } - - for (int i = 0; i < psb->m_faces.size(); ++i) - { - fs << "f"; - for (int n = 0; n < 3; n++) - { - fs << " " << psb->m_faces[i].m_n[n]->index + 1; - } - fs << "\n"; - } - fs.close(); + std::ofstream fs; + fs.open(filename); + btAssert(fs); + + if (psb->m_tetras.size() > 0) + { + // For tetrahedron mesh, we need to re-index the surface mesh for it to be in obj file/ + std::map<int, int> dict; + for (int i = 0; i < psb->m_faces.size(); i++) + { + for (int d = 0; d < 3; d++) + { + int index = psb->m_faces[i].m_n[d]->index; + if (dict.find(index) == dict.end()) + { + int dict_size = dict.size(); + dict[index] = dict_size; + fs << "v"; + for (int k = 0; k < 3; k++) + { + fs << " " << psb->m_nodes[index].m_x[k]; + } + fs << "\n"; + } + } + } + // Write surface mesh. + for (int i = 0; i < psb->m_faces.size(); ++i) + { + fs << "f"; + for (int n = 0; n < 3; n++) + { + fs << " " << dict[psb->m_faces[i].m_n[n]->index] + 1; + } + fs << "\n"; + } + } + else + { + // For trimesh, directly write out all the nodes and faces.xs + for (int i = 0; i < psb->m_nodes.size(); ++i) + { + fs << "v"; + for (int d = 0; d < 3; d++) + { + fs << " " << psb->m_nodes[i].m_x[d]; + } + fs << "\n"; + } + + for (int i = 0; i < psb->m_faces.size(); ++i) + { + fs << "f"; + for (int n = 0; n < 3; n++) + { + fs << " " << psb->m_faces[i].m_n[n]->index + 1; + } + fs << "\n"; + } + } + fs.close(); } void btSoftBodyHelpers::duplicateFaces(const char* filename, const btSoftBody* psb) { - std::ifstream fs_read; - fs_read.open(filename); - std::string line; - btVector3 pos; - btAlignedObjectArray<btAlignedObjectArray<int> > additional_faces; - while (std::getline(fs_read, line)) - { - std::stringstream ss(line); - if (line[0] == 'v') - { - } - else if (line[0] == 'f') - { - ss.ignore(); - int id0, id1, id2; - ss >> id0; - ss >> id1; - ss >> id2; - btAlignedObjectArray<int> new_face; - new_face.push_back(id1); - new_face.push_back(id0); - new_face.push_back(id2); - additional_faces.push_back(new_face); - } - } - fs_read.close(); - - std::ofstream fs_write; - fs_write.open(filename, std::ios_base::app); - for (int i = 0; i < additional_faces.size(); ++i) - { - fs_write << "f"; - for (int n = 0; n < 3; n++) - { - fs_write << " " << additional_faces[i][n]; - } - fs_write << "\n"; - } - fs_write.close(); + std::ifstream fs_read; + fs_read.open(filename); + std::string line; + btVector3 pos; + btAlignedObjectArray<btAlignedObjectArray<int> > additional_faces; + while (std::getline(fs_read, line)) + { + std::stringstream ss(line); + if (line[0] == 'v') + { + } + else if (line[0] == 'f') + { + ss.ignore(); + int id0, id1, id2; + ss >> id0; + ss >> id1; + ss >> id2; + btAlignedObjectArray<int> new_face; + new_face.push_back(id1); + new_face.push_back(id0); + new_face.push_back(id2); + additional_faces.push_back(new_face); + } + } + fs_read.close(); + + std::ofstream fs_write; + fs_write.open(filename, std::ios_base::app); + for (int i = 0; i < additional_faces.size(); ++i) + { + fs_write << "f"; + for (int n = 0; n < 3; n++) + { + fs_write << " " << additional_faces[i][n]; + } + fs_write << "\n"; + } + fs_write.close(); } // Given a simplex with vertices a,b,c,d, find the barycentric weights of p in this simplex void btSoftBodyHelpers::getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary) { - btVector3 vap = p - a; - btVector3 vbp = p - b; - - btVector3 vab = b - a; - btVector3 vac = c - a; - btVector3 vad = d - a; - - btVector3 vbc = c - b; - btVector3 vbd = d - b; - btScalar va6 = (vbp.cross(vbd)).dot(vbc); - btScalar vb6 = (vap.cross(vac)).dot(vad); - btScalar vc6 = (vap.cross(vad)).dot(vab); - btScalar vd6 = (vap.cross(vab)).dot(vac); - btScalar v6 = btScalar(1) / (vab.cross(vac).dot(vad)); - bary = btVector4(va6*v6, vb6*v6, vc6*v6, vd6*v6); + btVector3 vap = p - a; + btVector3 vbp = p - b; + + btVector3 vab = b - a; + btVector3 vac = c - a; + btVector3 vad = d - a; + + btVector3 vbc = c - b; + btVector3 vbd = d - b; + btScalar va6 = (vbp.cross(vbd)).dot(vbc); + btScalar vb6 = (vap.cross(vac)).dot(vad); + btScalar vc6 = (vap.cross(vad)).dot(vab); + btScalar vd6 = (vap.cross(vab)).dot(vac); + btScalar v6 = btScalar(1) / (vab.cross(vac).dot(vad)); + bary = btVector4(va6 * v6, vb6 * v6, vc6 * v6, vd6 * v6); } // Given a simplex with vertices a,b,c, find the barycentric weights of p in this simplex. bary[3] = 0. void btSoftBodyHelpers::getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary) { - btVector3 v0 = b - a, v1 = c - a, v2 = p - a; - btScalar d00 = btDot(v0, v0); - btScalar d01 = btDot(v0, v1); - btScalar d11 = btDot(v1, v1); - btScalar d20 = btDot(v2, v0); - btScalar d21 = btDot(v2, v1); - btScalar invDenom = 1.0 / (d00 * d11 - d01 * d01); - bary[1] = (d11 * d20 - d01 * d21) * invDenom; - bary[2] = (d00 * d21 - d01 * d20) * invDenom; - bary[0] = 1.0 - bary[1] - bary[2]; - bary[3] = 0; + btVector3 v0 = b - a, v1 = c - a, v2 = p - a; + btScalar d00 = btDot(v0, v0); + btScalar d01 = btDot(v0, v1); + btScalar d11 = btDot(v1, v1); + btScalar d20 = btDot(v2, v0); + btScalar d21 = btDot(v2, v1); + btScalar invDenom = 1.0 / (d00 * d11 - d01 * d01); + bary[1] = (d11 * d20 - d01 * d21) * invDenom; + bary[2] = (d00 * d21 - d01 * d20) * invDenom; + bary[0] = 1.0 - bary[1] - bary[2]; + bary[3] = 0; } // Iterate through all render nodes to find the simulation tetrahedron that contains the render node and record the barycentric weights // If the node is not inside any tetrahedron, assign it to the tetrahedron in which the node has the least negative barycentric weight void btSoftBodyHelpers::interpolateBarycentricWeights(btSoftBody* psb) { - psb->m_z.resize(0); - psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size()); - psb->m_renderNodesParents.resize(psb->m_renderNodes.size()); - for (int i = 0; i < psb->m_renderNodes.size(); ++i) - { - const btVector3& p = psb->m_renderNodes[i].m_x; - btVector4 bary; - btVector4 optimal_bary; - btScalar min_bary_weight = -1e3; - btAlignedObjectArray<const btSoftBody::Node*> optimal_parents; - for (int j = 0; j < psb->m_tetras.size(); ++j) - { - const btSoftBody::Tetra& t = psb->m_tetras[j]; - getBarycentricWeights(t.m_n[0]->m_x, t.m_n[1]->m_x, t.m_n[2]->m_x, t.m_n[3]->m_x, p, bary); - btScalar new_min_bary_weight = bary[0]; - for (int k = 1; k < 4; ++k) - { - new_min_bary_weight = btMin(new_min_bary_weight, bary[k]); - } - if (new_min_bary_weight > min_bary_weight) - { - btAlignedObjectArray<const btSoftBody::Node*> parents; - parents.push_back(t.m_n[0]); - parents.push_back(t.m_n[1]); - parents.push_back(t.m_n[2]); - parents.push_back(t.m_n[3]); - optimal_parents = parents; - optimal_bary = bary; - min_bary_weight = new_min_bary_weight; - // stop searching if p is inside the tetrahedron at hand - if (bary[0]>=0. && bary[1]>=0. && bary[2]>=0. && bary[3]>=0.) - { - break; - } - } - } - psb->m_renderNodesInterpolationWeights[i] = optimal_bary; - psb->m_renderNodesParents[i] = optimal_parents; - } + psb->m_z.resize(0); + psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size()); + psb->m_renderNodesParents.resize(psb->m_renderNodes.size()); + for (int i = 0; i < psb->m_renderNodes.size(); ++i) + { + const btVector3& p = psb->m_renderNodes[i].m_x; + btVector4 bary; + btVector4 optimal_bary; + btScalar min_bary_weight = -1e3; + btAlignedObjectArray<const btSoftBody::Node*> optimal_parents; + for (int j = 0; j < psb->m_tetras.size(); ++j) + { + const btSoftBody::Tetra& t = psb->m_tetras[j]; + getBarycentricWeights(t.m_n[0]->m_x, t.m_n[1]->m_x, t.m_n[2]->m_x, t.m_n[3]->m_x, p, bary); + btScalar new_min_bary_weight = bary[0]; + for (int k = 1; k < 4; ++k) + { + new_min_bary_weight = btMin(new_min_bary_weight, bary[k]); + } + if (new_min_bary_weight > min_bary_weight) + { + btAlignedObjectArray<const btSoftBody::Node*> parents; + parents.push_back(t.m_n[0]); + parents.push_back(t.m_n[1]); + parents.push_back(t.m_n[2]); + parents.push_back(t.m_n[3]); + optimal_parents = parents; + optimal_bary = bary; + min_bary_weight = new_min_bary_weight; + // stop searching if p is inside the tetrahedron at hand + if (bary[0] >= 0. && bary[1] >= 0. && bary[2] >= 0. && bary[3] >= 0.) + { + break; + } + } + } + psb->m_renderNodesInterpolationWeights[i] = optimal_bary; + psb->m_renderNodesParents[i] = optimal_parents; + } } - // Iterate through all render nodes to find the simulation triangle that's closest to the node in the barycentric sense. void btSoftBodyHelpers::extrapolateBarycentricWeights(btSoftBody* psb) { - psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size()); - psb->m_renderNodesParents.resize(psb->m_renderNodes.size()); - psb->m_z.resize(psb->m_renderNodes.size()); - for (int i = 0; i < psb->m_renderNodes.size(); ++i) - { - const btVector3& p = psb->m_renderNodes[i].m_x; - btVector4 bary; - btVector4 optimal_bary; - btScalar min_bary_weight = -SIMD_INFINITY; - btAlignedObjectArray<const btSoftBody::Node*> optimal_parents; - btScalar dist = 0, optimal_dist = 0; - for (int j = 0; j < psb->m_faces.size(); ++j) - { - const btSoftBody::Face& f = psb->m_faces[j]; - btVector3 n = btCross(f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x); - btVector3 unit_n = n.normalized(); - dist = (p-f.m_n[0]->m_x).dot(unit_n); - btVector3 proj_p = p - dist*unit_n; - getBarycentricWeights(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, proj_p, bary); - btScalar new_min_bary_weight = bary[0]; - for (int k = 1; k < 3; ++k) - { - new_min_bary_weight = btMin(new_min_bary_weight, bary[k]); - } - - // p is out of the current best triangle, we found a traingle that's better - bool better_than_closest_outisde = (new_min_bary_weight > min_bary_weight && min_bary_weight<0.); - // p is inside of the current best triangle, we found a triangle that's better - bool better_than_best_inside = (new_min_bary_weight>=0 && min_bary_weight>=0 && btFabs(dist)<btFabs(optimal_dist)); - - if (better_than_closest_outisde || better_than_best_inside) - { - btAlignedObjectArray<const btSoftBody::Node*> parents; - parents.push_back(f.m_n[0]); - parents.push_back(f.m_n[1]); - parents.push_back(f.m_n[2]); - optimal_parents = parents; - optimal_bary = bary; - optimal_dist = dist; - min_bary_weight = new_min_bary_weight; - } - } - psb->m_renderNodesInterpolationWeights[i] = optimal_bary; - psb->m_renderNodesParents[i] = optimal_parents; - psb->m_z[i] = optimal_dist; - } + psb->m_renderNodesInterpolationWeights.resize(psb->m_renderNodes.size()); + psb->m_renderNodesParents.resize(psb->m_renderNodes.size()); + psb->m_z.resize(psb->m_renderNodes.size()); + for (int i = 0; i < psb->m_renderNodes.size(); ++i) + { + const btVector3& p = psb->m_renderNodes[i].m_x; + btVector4 bary; + btVector4 optimal_bary; + btScalar min_bary_weight = -SIMD_INFINITY; + btAlignedObjectArray<const btSoftBody::Node*> optimal_parents; + btScalar dist = 0, optimal_dist = 0; + for (int j = 0; j < psb->m_faces.size(); ++j) + { + const btSoftBody::Face& f = psb->m_faces[j]; + btVector3 n = btCross(f.m_n[1]->m_x - f.m_n[0]->m_x, f.m_n[2]->m_x - f.m_n[0]->m_x); + btVector3 unit_n = n.normalized(); + dist = (p - f.m_n[0]->m_x).dot(unit_n); + btVector3 proj_p = p - dist * unit_n; + getBarycentricWeights(f.m_n[0]->m_x, f.m_n[1]->m_x, f.m_n[2]->m_x, proj_p, bary); + btScalar new_min_bary_weight = bary[0]; + for (int k = 1; k < 3; ++k) + { + new_min_bary_weight = btMin(new_min_bary_weight, bary[k]); + } + + // p is out of the current best triangle, we found a traingle that's better + bool better_than_closest_outisde = (new_min_bary_weight > min_bary_weight && min_bary_weight < 0.); + // p is inside of the current best triangle, we found a triangle that's better + bool better_than_best_inside = (new_min_bary_weight >= 0 && min_bary_weight >= 0 && btFabs(dist) < btFabs(optimal_dist)); + + if (better_than_closest_outisde || better_than_best_inside) + { + btAlignedObjectArray<const btSoftBody::Node*> parents; + parents.push_back(f.m_n[0]); + parents.push_back(f.m_n[1]); + parents.push_back(f.m_n[2]); + optimal_parents = parents; + optimal_bary = bary; + optimal_dist = dist; + min_bary_weight = new_min_bary_weight; + } + } + psb->m_renderNodesInterpolationWeights[i] = optimal_bary; + psb->m_renderNodesParents[i] = optimal_parents; + psb->m_z[i] = optimal_dist; + } } diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h index abe1870890..237d29761d 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h +++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyHelpers.h @@ -93,7 +93,7 @@ struct btSoftBodyHelpers int resy, int fixeds, bool gendiags, - btScalar perturbation = 0.); + btScalar perturbation = 0.); /* Create a patch with UV Texture Coordinates */ static btSoftBody* CreatePatchUV(btSoftBodyWorldInfo& worldInfo, const btVector3& corner00, @@ -142,21 +142,21 @@ struct btSoftBodyHelpers bool bfacelinks, bool btetralinks, bool bfacesfromtetras); - static btSoftBody* CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, const char* vtk_file); + static btSoftBody* CreateFromVtkFile(btSoftBodyWorldInfo& worldInfo, const char* vtk_file); - static void writeObj(const char* file, const btSoftBody* psb); - - static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary); - - static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary); - - static void interpolateBarycentricWeights(btSoftBody* psb); - - static void extrapolateBarycentricWeights(btSoftBody* psb); - - static void generateBoundaryFaces(btSoftBody* psb); - - static void duplicateFaces(const char* filename, const btSoftBody* psb); + static void writeObj(const char* file, const btSoftBody* psb); + + static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& d, const btVector3& p, btVector4& bary); + + static void getBarycentricWeights(const btVector3& a, const btVector3& b, const btVector3& c, const btVector3& p, btVector4& bary); + + static void interpolateBarycentricWeights(btSoftBody* psb); + + static void extrapolateBarycentricWeights(btSoftBody* psb); + + static void generateBoundaryFaces(btSoftBody* psb); + + static void duplicateFaces(const char* filename, const btSoftBody* psb); /// Sort the list of links to move link calculations that are dependent upon earlier /// ones as far as possible away from the calculation of those values /// This tends to make adjacent loop iterations not dependent upon one another, diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h b/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h index b9ebc95b6b..c17bbb5cd4 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h +++ b/thirdparty/bullet/BulletSoftBody/btSoftBodyInternals.h @@ -32,86 +32,85 @@ subject to the following restrictions: // Given a multibody link, a contact point and a contact direction, fill in the jacobian data needed to calculate the velocity change given an impulse in the contact direction static SIMD_FORCE_INLINE void findJacobian(const btMultiBodyLinkCollider* multibodyLinkCol, - btMultiBodyJacobianData& jacobianData, - const btVector3& contact_point, - const btVector3& dir) -{ - const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; - jacobianData.m_jacobians.resize(ndof); - jacobianData.m_deltaVelocitiesUnitImpulse.resize(ndof); - btScalar* jac = &jacobianData.m_jacobians[0]; - - multibodyLinkCol->m_multiBody->fillContactJacobianMultiDof(multibodyLinkCol->m_link, contact_point, dir, jac, jacobianData.scratch_r, jacobianData.scratch_v, jacobianData.scratch_m); - multibodyLinkCol->m_multiBody->calcAccelerationDeltasMultiDof(&jacobianData.m_jacobians[0], &jacobianData.m_deltaVelocitiesUnitImpulse[0], jacobianData.scratch_r, jacobianData.scratch_v); + btMultiBodyJacobianData& jacobianData, + const btVector3& contact_point, + const btVector3& dir) +{ + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + jacobianData.m_jacobians.resize(ndof); + jacobianData.m_deltaVelocitiesUnitImpulse.resize(ndof); + btScalar* jac = &jacobianData.m_jacobians[0]; + + multibodyLinkCol->m_multiBody->fillContactJacobianMultiDof(multibodyLinkCol->m_link, contact_point, dir, jac, jacobianData.scratch_r, jacobianData.scratch_v, jacobianData.scratch_m); + multibodyLinkCol->m_multiBody->calcAccelerationDeltasMultiDof(&jacobianData.m_jacobians[0], &jacobianData.m_deltaVelocitiesUnitImpulse[0], jacobianData.scratch_r, jacobianData.scratch_v); } static SIMD_FORCE_INLINE btVector3 generateUnitOrthogonalVector(const btVector3& u) { - btScalar ux = u.getX(); - btScalar uy = u.getY(); - btScalar uz = u.getZ(); - btScalar ax = std::abs(ux); - btScalar ay = std::abs(uy); - btScalar az = std::abs(uz); - btVector3 v; - if (ax <= ay && ax <= az) - v = btVector3(0, -uz, uy); - else if (ay <= ax && ay <= az) - v = btVector3(-uz, 0, ux); - else - v = btVector3(-uy, ux, 0); - v.normalize(); - return v; + btScalar ux = u.getX(); + btScalar uy = u.getY(); + btScalar uz = u.getZ(); + btScalar ax = std::abs(ux); + btScalar ay = std::abs(uy); + btScalar az = std::abs(uz); + btVector3 v; + if (ax <= ay && ax <= az) + v = btVector3(0, -uz, uy); + else if (ay <= ax && ay <= az) + v = btVector3(-uz, 0, ux); + else + v = btVector3(-uy, ux, 0); + v.normalize(); + return v; } static SIMD_FORCE_INLINE bool proximityTest(const btVector3& x1, const btVector3& x2, const btVector3& x3, const btVector3& x4, const btVector3& normal, const btScalar& mrg, btVector3& bary) { - btVector3 x43 = x4-x3; - if (std::abs(x43.dot(normal)) > mrg) - return false; - btVector3 x13 = x1-x3; - btVector3 x23 = x2-x3; - btScalar a11 = x13.length2(); - btScalar a22 = x23.length2(); - btScalar a12 = x13.dot(x23); - btScalar b1 = x13.dot(x43); - btScalar b2 = x23.dot(x43); - btScalar det = a11*a22 - a12*a12; - if (det < SIMD_EPSILON) - return false; - btScalar w1 = (b1*a22-b2*a12)/det; - btScalar w2 = (b2*a11-b1*a12)/det; - btScalar w3 = 1-w1-w2; - btScalar delta = mrg / std::sqrt(0.5*std::abs(x13.cross(x23).safeNorm())); - bary = btVector3(w1,w2,w3); - for (int i = 0; i < 3; ++i) - { - if (bary[i] < -delta || bary[i] > 1+delta) - return false; - } - return true; + btVector3 x43 = x4 - x3; + if (std::abs(x43.dot(normal)) > mrg) + return false; + btVector3 x13 = x1 - x3; + btVector3 x23 = x2 - x3; + btScalar a11 = x13.length2(); + btScalar a22 = x23.length2(); + btScalar a12 = x13.dot(x23); + btScalar b1 = x13.dot(x43); + btScalar b2 = x23.dot(x43); + btScalar det = a11 * a22 - a12 * a12; + if (det < SIMD_EPSILON) + return false; + btScalar w1 = (b1 * a22 - b2 * a12) / det; + btScalar w2 = (b2 * a11 - b1 * a12) / det; + btScalar w3 = 1 - w1 - w2; + btScalar delta = mrg / std::sqrt(0.5 * std::abs(x13.cross(x23).safeNorm())); + bary = btVector3(w1, w2, w3); + for (int i = 0; i < 3; ++i) + { + if (bary[i] < -delta || bary[i] > 1 + delta) + return false; + } + return true; } static const int KDOP_COUNT = 13; -static btVector3 dop[KDOP_COUNT]={btVector3(1,0,0), - btVector3(0,1,0), - btVector3(0,0,1), - btVector3(1,1,0), - btVector3(1,0,1), - btVector3(0,1,1), - btVector3(1,-1,0), - btVector3(1,0,-1), - btVector3(0,1,-1), - btVector3(1,1,1), - btVector3(1,-1,1), - btVector3(1,1,-1), - btVector3(1,-1,-1) -}; +static btVector3 dop[KDOP_COUNT] = {btVector3(1, 0, 0), + btVector3(0, 1, 0), + btVector3(0, 0, 1), + btVector3(1, 1, 0), + btVector3(1, 0, 1), + btVector3(0, 1, 1), + btVector3(1, -1, 0), + btVector3(1, 0, -1), + btVector3(0, 1, -1), + btVector3(1, 1, 1), + btVector3(1, -1, 1), + btVector3(1, 1, -1), + btVector3(1, -1, -1)}; static inline int getSign(const btVector3& n, const btVector3& x) { btScalar d = n.dot(x); - if (d>SIMD_EPSILON) + if (d > SIMD_EPSILON) return 1; - if (d<-SIMD_EPSILON) + if (d < -SIMD_EPSILON) return -1; return 0; } @@ -119,13 +118,12 @@ static inline int getSign(const btVector3& n, const btVector3& x) static SIMD_FORCE_INLINE bool hasSeparatingPlane(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt) { btVector3 hex[6] = {face->m_n[0]->m_x - node->m_x, - face->m_n[1]->m_x - node->m_x, - face->m_n[2]->m_x - node->m_x, - face->m_n[0]->m_x + dt*face->m_n[0]->m_v - node->m_x, - face->m_n[1]->m_x + dt*face->m_n[1]->m_v - node->m_x, - face->m_n[2]->m_x + dt*face->m_n[2]->m_v - node->m_x - }; - btVector3 segment = dt*node->m_v; + face->m_n[1]->m_x - node->m_x, + face->m_n[2]->m_x - node->m_x, + face->m_n[0]->m_x + dt * face->m_n[0]->m_v - node->m_x, + face->m_n[1]->m_x + dt * face->m_n[1]->m_v - node->m_x, + face->m_n[2]->m_x + dt * face->m_n[2]->m_v - node->m_x}; + btVector3 segment = dt * node->m_v; for (int i = 0; i < KDOP_COUNT; ++i) { int s = getSign(dop[i], segment); @@ -143,488 +141,494 @@ static SIMD_FORCE_INLINE bool hasSeparatingPlane(const btSoftBody::Face* face, c static SIMD_FORCE_INLINE bool nearZero(const btScalar& a) { - return (a>-SAFE_EPSILON && a<SAFE_EPSILON); + return (a > -SAFE_EPSILON && a < SAFE_EPSILON); } static SIMD_FORCE_INLINE bool sameSign(const btScalar& a, const btScalar& b) { - return (nearZero(a) || nearZero(b) || (a>SAFE_EPSILON && b>SAFE_EPSILON) || (a<-SAFE_EPSILON && b<-SAFE_EPSILON)); + return (nearZero(a) || nearZero(b) || (a > SAFE_EPSILON && b > SAFE_EPSILON) || (a < -SAFE_EPSILON && b < -SAFE_EPSILON)); } static SIMD_FORCE_INLINE bool diffSign(const btScalar& a, const btScalar& b) { - return !sameSign(a, b); -} -inline btScalar evaluateBezier2(const btScalar &p0, const btScalar &p1, const btScalar &p2, const btScalar &t, const btScalar &s) -{ - btScalar s2 = s*s; - btScalar t2 = t*t; - - return p0*s2+p1*btScalar(2.0)*s*t+p2*t2; -} -inline btScalar evaluateBezier(const btScalar &p0, const btScalar &p1, const btScalar &p2, const btScalar &p3, const btScalar &t, const btScalar &s) -{ - btScalar s2 = s*s; - btScalar s3 = s2*s; - btScalar t2 = t*t; - btScalar t3 = t2*t; - - return p0*s3+p1*btScalar(3.0)*s2*t+p2*btScalar(3.0)*s*t2+p3*t3; -} -static SIMD_FORCE_INLINE bool getSigns(bool type_c, const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, const btScalar& t1, btScalar <0, btScalar <1) -{ - if (sameSign(t0, t1)) { - lt0 = t0; - lt1 = t0; - return true; - } - - if (type_c || diffSign(k0, k3)) { - btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1); - if (t0<-0) - ft = -ft; - - if (sameSign(ft, k0)) { - lt0 = t1; - lt1 = t1; - } - else { - lt0 = t0; - lt1 = t0; - } - return true; - } - - if (!type_c) { - btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1); - if (t0<-0) - ft = -ft; - - if (diffSign(ft, k0)) { - lt0 = t0; - lt1 = t1; - return true; - } - - btScalar fk = evaluateBezier2(k1-k0, k2-k1, k3-k2, t0, -t1); - - if (sameSign(fk, k1-k0)) - lt0 = lt1 = t1; - else - lt0 = lt1 = t0; - - return true; - } - return false; + return !sameSign(a, b); +} +inline btScalar evaluateBezier2(const btScalar& p0, const btScalar& p1, const btScalar& p2, const btScalar& t, const btScalar& s) +{ + btScalar s2 = s * s; + btScalar t2 = t * t; + + return p0 * s2 + p1 * btScalar(2.0) * s * t + p2 * t2; +} +inline btScalar evaluateBezier(const btScalar& p0, const btScalar& p1, const btScalar& p2, const btScalar& p3, const btScalar& t, const btScalar& s) +{ + btScalar s2 = s * s; + btScalar s3 = s2 * s; + btScalar t2 = t * t; + btScalar t3 = t2 * t; + + return p0 * s3 + p1 * btScalar(3.0) * s2 * t + p2 * btScalar(3.0) * s * t2 + p3 * t3; +} +static SIMD_FORCE_INLINE bool getSigns(bool type_c, const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, const btScalar& t1, btScalar& lt0, btScalar& lt1) +{ + if (sameSign(t0, t1)) + { + lt0 = t0; + lt1 = t0; + return true; + } + + if (type_c || diffSign(k0, k3)) + { + btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1); + if (t0 < -0) + ft = -ft; + + if (sameSign(ft, k0)) + { + lt0 = t1; + lt1 = t1; + } + else + { + lt0 = t0; + lt1 = t0; + } + return true; + } + + if (!type_c) + { + btScalar ft = evaluateBezier(k0, k1, k2, k3, t0, -t1); + if (t0 < -0) + ft = -ft; + + if (diffSign(ft, k0)) + { + lt0 = t0; + lt1 = t1; + return true; + } + + btScalar fk = evaluateBezier2(k1 - k0, k2 - k1, k3 - k2, t0, -t1); + + if (sameSign(fk, k1 - k0)) + lt0 = lt1 = t1; + else + lt0 = lt1 = t0; + + return true; + } + return false; } static SIMD_FORCE_INLINE void getBernsteinCoeff(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, btScalar& k0, btScalar& k1, btScalar& k2, btScalar& k3) { - const btVector3& n0 = face->m_n0; - const btVector3& n1 = face->m_n1; - btVector3 n_hat = n0 + n1 - face->m_vn; - btVector3 p0ma0 = node->m_x - face->m_n[0]->m_x; - btVector3 p1ma1 = node->m_q - face->m_n[0]->m_q; - k0 = (p0ma0).dot(n0) * 3.0; - k1 = (p0ma0).dot(n_hat) + (p1ma1).dot(n0); - k2 = (p1ma1).dot(n_hat) + (p0ma0).dot(n1); - k3 = (p1ma1).dot(n1) * 3.0; + const btVector3& n0 = face->m_n0; + const btVector3& n1 = face->m_n1; + btVector3 n_hat = n0 + n1 - face->m_vn; + btVector3 p0ma0 = node->m_x - face->m_n[0]->m_x; + btVector3 p1ma1 = node->m_q - face->m_n[0]->m_q; + k0 = (p0ma0).dot(n0) * 3.0; + k1 = (p0ma0).dot(n_hat) + (p1ma1).dot(n0); + k2 = (p1ma1).dot(n_hat) + (p0ma0).dot(n1); + k3 = (p1ma1).dot(n1) * 3.0; } static SIMD_FORCE_INLINE void polyDecomposition(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& j0, const btScalar& j1, const btScalar& j2, btScalar& u0, btScalar& u1, btScalar& v0, btScalar& v1) { - btScalar denom = 4.0 * (j1-j2) * (j1-j0) + (j2-j0) * (j2-j0); - u0 = (2.0*(j1-j2)*(3.0*k1-2.0*k0-k3) - (j0-j2)*(3.0*k2-2.0*k3-k0)) / denom; - u1 = (2.0*(j1-j0)*(3.0*k2-2.0*k3-k0) - (j2-j0)*(3.0*k1-2.0*k0-k3)) / denom; - v0 = k0-u0*j0; - v1 = k3-u1*j2; + btScalar denom = 4.0 * (j1 - j2) * (j1 - j0) + (j2 - j0) * (j2 - j0); + u0 = (2.0 * (j1 - j2) * (3.0 * k1 - 2.0 * k0 - k3) - (j0 - j2) * (3.0 * k2 - 2.0 * k3 - k0)) / denom; + u1 = (2.0 * (j1 - j0) * (3.0 * k2 - 2.0 * k3 - k0) - (j2 - j0) * (3.0 * k1 - 2.0 * k0 - k3)) / denom; + v0 = k0 - u0 * j0; + v1 = k3 - u1 * j2; } static SIMD_FORCE_INLINE bool rootFindingLemma(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3) { - btScalar u0, u1, v0, v1; - btScalar j0 = 3.0*(k1-k0); - btScalar j1 = 3.0*(k2-k1); - btScalar j2 = 3.0*(k3-k2); - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (sameSign(v0, v1)) - { - btScalar Ypa = j0*(1.0-v0)*(1.0-v0) + 2.0*j1*v0*(1.0-v0) + j2*v0*v0; // Y'(v0) - if (sameSign(Ypa, j0)) - { - return (diffSign(k0,v1)); - } - } - return diffSign(k0,v0); -} - -static SIMD_FORCE_INLINE void getJs(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Node* a, const btSoftBody::Node* b, const btSoftBody::Node* c, const btSoftBody::Node* p, const btScalar& dt, btScalar& j0, btScalar& j1, btScalar& j2) -{ - const btVector3& a0 = a->m_x; - const btVector3& b0 = b->m_x; - const btVector3& c0 = c->m_x; - const btVector3& va = a->m_v; - const btVector3& vb = b->m_v; - const btVector3& vc = c->m_v; - const btVector3 a1 = a0 + dt*va; - const btVector3 b1 = b0 + dt*vb; - const btVector3 c1 = c0 + dt*vc; - btVector3 n0 = (b0-a0).cross(c0-a0); - btVector3 n1 = (b1-a1).cross(c1-a1); - btVector3 n_hat = n0+n1 - dt*dt*(vb-va).cross(vc-va); - const btVector3& p0 = p->m_x; - const btVector3& vp = p->m_v; - btVector3 p1 = p0 + dt*vp; - btVector3 m0 = (b0-p0).cross(c0-p0); - btVector3 m1 = (b1-p1).cross(c1-p1); - btVector3 m_hat = m0+m1 - dt*dt*(vb-vp).cross(vc-vp); - btScalar l0 = m0.dot(n0); - btScalar l1 = 0.25 * (m0.dot(n_hat) + m_hat.dot(n0)); - btScalar l2 = btScalar(1)/btScalar(6)*(m0.dot(n1) + m_hat.dot(n_hat) + m1.dot(n0)); - btScalar l3 = 0.25 * (m_hat.dot(n1) + m1.dot(n_hat)); - btScalar l4 = m1.dot(n1); - - btScalar k1p = 0.25 * k0 + 0.75 * k1; - btScalar k2p = 0.5 * k1 + 0.5 * k2; - btScalar k3p = 0.75 * k2 + 0.25 * k3; - - btScalar s0 = (l1 * k0 - l0 * k1p)*4.0; - btScalar s1 = (l2 * k0 - l0 * k2p)*2.0; - btScalar s2 = (l3 * k0 - l0 * k3p)*btScalar(4)/btScalar(3); - btScalar s3 = l4 * k0 - l0 * k3; - - j0 = (s1*k0 - s0*k1) * 3.0; - j1 = (s2*k0 - s0*k2) * 1.5; - j2 = (s3*k0 - s0*k3); + btScalar u0, u1, v0, v1; + btScalar j0 = 3.0 * (k1 - k0); + btScalar j1 = 3.0 * (k2 - k1); + btScalar j2 = 3.0 * (k3 - k2); + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (sameSign(v0, v1)) + { + btScalar Ypa = j0 * (1.0 - v0) * (1.0 - v0) + 2.0 * j1 * v0 * (1.0 - v0) + j2 * v0 * v0; // Y'(v0) + if (sameSign(Ypa, j0)) + { + return (diffSign(k0, v1)); + } + } + return diffSign(k0, v0); +} + +static SIMD_FORCE_INLINE void getJs(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Node* a, const btSoftBody::Node* b, const btSoftBody::Node* c, const btSoftBody::Node* p, const btScalar& dt, btScalar& j0, btScalar& j1, btScalar& j2) +{ + const btVector3& a0 = a->m_x; + const btVector3& b0 = b->m_x; + const btVector3& c0 = c->m_x; + const btVector3& va = a->m_v; + const btVector3& vb = b->m_v; + const btVector3& vc = c->m_v; + const btVector3 a1 = a0 + dt * va; + const btVector3 b1 = b0 + dt * vb; + const btVector3 c1 = c0 + dt * vc; + btVector3 n0 = (b0 - a0).cross(c0 - a0); + btVector3 n1 = (b1 - a1).cross(c1 - a1); + btVector3 n_hat = n0 + n1 - dt * dt * (vb - va).cross(vc - va); + const btVector3& p0 = p->m_x; + const btVector3& vp = p->m_v; + btVector3 p1 = p0 + dt * vp; + btVector3 m0 = (b0 - p0).cross(c0 - p0); + btVector3 m1 = (b1 - p1).cross(c1 - p1); + btVector3 m_hat = m0 + m1 - dt * dt * (vb - vp).cross(vc - vp); + btScalar l0 = m0.dot(n0); + btScalar l1 = 0.25 * (m0.dot(n_hat) + m_hat.dot(n0)); + btScalar l2 = btScalar(1) / btScalar(6) * (m0.dot(n1) + m_hat.dot(n_hat) + m1.dot(n0)); + btScalar l3 = 0.25 * (m_hat.dot(n1) + m1.dot(n_hat)); + btScalar l4 = m1.dot(n1); + + btScalar k1p = 0.25 * k0 + 0.75 * k1; + btScalar k2p = 0.5 * k1 + 0.5 * k2; + btScalar k3p = 0.75 * k2 + 0.25 * k3; + + btScalar s0 = (l1 * k0 - l0 * k1p) * 4.0; + btScalar s1 = (l2 * k0 - l0 * k2p) * 2.0; + btScalar s2 = (l3 * k0 - l0 * k3p) * btScalar(4) / btScalar(3); + btScalar s3 = l4 * k0 - l0 * k3; + + j0 = (s1 * k0 - s0 * k1) * 3.0; + j1 = (s2 * k0 - s0 * k2) * 1.5; + j2 = (s3 * k0 - s0 * k3); } static SIMD_FORCE_INLINE bool signDetermination1Internal(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& u0, const btScalar& u1, const btScalar& v0, const btScalar& v1) { - btScalar Yu0 = k0*(1.0-u0)*(1.0-u0)*(1.0-u0) + 3.0*k1*u0*(1.0-u0)*(1.0-u0) + 3.0*k2*u0*u0*(1.0-u0) + k3*u0*u0*u0; // Y(u0) - btScalar Yv0 = k0*(1.0-v0)*(1.0-v0)*(1.0-v0) + 3.0*k1*v0*(1.0-v0)*(1.0-v0) + 3.0*k2*v0*v0*(1.0-v0) + k3*v0*v0*v0; // Y(v0) + btScalar Yu0 = k0 * (1.0 - u0) * (1.0 - u0) * (1.0 - u0) + 3.0 * k1 * u0 * (1.0 - u0) * (1.0 - u0) + 3.0 * k2 * u0 * u0 * (1.0 - u0) + k3 * u0 * u0 * u0; // Y(u0) + btScalar Yv0 = k0 * (1.0 - v0) * (1.0 - v0) * (1.0 - v0) + 3.0 * k1 * v0 * (1.0 - v0) * (1.0 - v0) + 3.0 * k2 * v0 * v0 * (1.0 - v0) + k3 * v0 * v0 * v0; // Y(v0) - btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0; - btScalar L = sameSign(sign_Ytp, k0) ? u1 : u0; - sign_Ytp = (v0 > v1) ? Yv0 : -Yv0; - btScalar K = (sameSign(sign_Ytp,k0)) ? v1 : v0; - return diffSign(L,K); + btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0; + btScalar L = sameSign(sign_Ytp, k0) ? u1 : u0; + sign_Ytp = (v0 > v1) ? Yv0 : -Yv0; + btScalar K = (sameSign(sign_Ytp, k0)) ? v1 : v0; + return diffSign(L, K); } static SIMD_FORCE_INLINE bool signDetermination2Internal(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& j0, const btScalar& j1, const btScalar& j2, const btScalar& u0, const btScalar& u1, const btScalar& v0, const btScalar& v1) { - btScalar Yu0 = k0*(1.0-u0)*(1.0-u0)*(1.0-u0) + 3.0*k1*u0*(1.0-u0)*(1.0-u0) + 3.0*k2*u0*u0*(1.0-u0) + k3*u0*u0*u0; // Y(u0) - btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0, L1, L2; - if (diffSign(sign_Ytp,k0)) - { - L1 = u0; - L2 = u1; - } - else - { - btScalar Yp_u0 = j0*(1.0-u0)*(1.0-u0) + 2.0*j1*(1.0-u0)*u0 + j2*u0*u0; - if (sameSign(Yp_u0,j0)) - { - L1 = u1; - L2 = u1; - } - else - { - L1 = u0; - L2 = u0; - } - } - btScalar Yv0 = k0*(1.0-v0)*(1.0-v0)*(1.0-v0) + 3.0*k1*v0*(1.0-v0)*(1.0-v0) + 3.0*k2*v0*v0*(1.0-v0) + k3*v0*v0*v0; // Y(uv0) - sign_Ytp = (v0 > v1) ? Yv0 : -Yv0; - btScalar K1, K2; - if (diffSign(sign_Ytp,k0)) - { - K1 = v0; - K2 = v1; - } - else - { - btScalar Yp_v0 = j0*(1.0-v0)*(1.0-v0) + 2.0*j1*(1.0-v0)*v0 + j2*v0*v0; - if (sameSign(Yp_v0,j0)) - { - K1 = v1; - K2 = v1; - } - else - { - K1 = v0; - K2 = v0; - } - } - return (diffSign(K1, L1) || diffSign(L2, K2)); + btScalar Yu0 = k0 * (1.0 - u0) * (1.0 - u0) * (1.0 - u0) + 3.0 * k1 * u0 * (1.0 - u0) * (1.0 - u0) + 3.0 * k2 * u0 * u0 * (1.0 - u0) + k3 * u0 * u0 * u0; // Y(u0) + btScalar sign_Ytp = (u0 > u1) ? Yu0 : -Yu0, L1, L2; + if (diffSign(sign_Ytp, k0)) + { + L1 = u0; + L2 = u1; + } + else + { + btScalar Yp_u0 = j0 * (1.0 - u0) * (1.0 - u0) + 2.0 * j1 * (1.0 - u0) * u0 + j2 * u0 * u0; + if (sameSign(Yp_u0, j0)) + { + L1 = u1; + L2 = u1; + } + else + { + L1 = u0; + L2 = u0; + } + } + btScalar Yv0 = k0 * (1.0 - v0) * (1.0 - v0) * (1.0 - v0) + 3.0 * k1 * v0 * (1.0 - v0) * (1.0 - v0) + 3.0 * k2 * v0 * v0 * (1.0 - v0) + k3 * v0 * v0 * v0; // Y(uv0) + sign_Ytp = (v0 > v1) ? Yv0 : -Yv0; + btScalar K1, K2; + if (diffSign(sign_Ytp, k0)) + { + K1 = v0; + K2 = v1; + } + else + { + btScalar Yp_v0 = j0 * (1.0 - v0) * (1.0 - v0) + 2.0 * j1 * (1.0 - v0) * v0 + j2 * v0 * v0; + if (sameSign(Yp_v0, j0)) + { + K1 = v1; + K2 = v1; + } + else + { + K1 = v0; + K2 = v0; + } + } + return (diffSign(K1, L1) || diffSign(L2, K2)); } static SIMD_FORCE_INLINE bool signDetermination1(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt) { - btScalar j0, j1, j2, u0, u1, v0, v1; - // p1 - getJs(k0,k1,k2,k3,face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2); - if (nearZero(j0+j2-j1*2.0)) - { - btScalar lt0, lt1; - getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1); - if (lt0 < -SAFE_EPSILON) - return false; - } - else - { - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1)) - return false; - } - // p2 - getJs(k0,k1,k2,k3,face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2); - if (nearZero(j0+j2-j1*2.0)) - { - btScalar lt0, lt1; - getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1); - if (lt0 < -SAFE_EPSILON) - return false; - } - else - { - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1)) - return false; - } - // p3 - getJs(k0,k1,k2,k3,face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2); - if (nearZero(j0+j2-j1*2.0)) - { - btScalar lt0, lt1; - getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1); - if (lt0 < -SAFE_EPSILON) - return false; - } - else - { - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (!signDetermination1Internal(k0,k1,k2,k3,u0,u1,v0,v1)) - return false; - } - return true; + btScalar j0, j1, j2, u0, u1, v0, v1; + // p1 + getJs(k0, k1, k2, k3, face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2); + if (nearZero(j0 + j2 - j1 * 2.0)) + { + btScalar lt0, lt1; + getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1); + if (lt0 < -SAFE_EPSILON) + return false; + } + else + { + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (!signDetermination1Internal(k0, k1, k2, k3, u0, u1, v0, v1)) + return false; + } + // p2 + getJs(k0, k1, k2, k3, face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2); + if (nearZero(j0 + j2 - j1 * 2.0)) + { + btScalar lt0, lt1; + getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1); + if (lt0 < -SAFE_EPSILON) + return false; + } + else + { + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (!signDetermination1Internal(k0, k1, k2, k3, u0, u1, v0, v1)) + return false; + } + // p3 + getJs(k0, k1, k2, k3, face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2); + if (nearZero(j0 + j2 - j1 * 2.0)) + { + btScalar lt0, lt1; + getSigns(true, k0, k1, k2, k3, j0, j2, lt0, lt1); + if (lt0 < -SAFE_EPSILON) + return false; + } + else + { + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (!signDetermination1Internal(k0, k1, k2, k3, u0, u1, v0, v1)) + return false; + } + return true; } static SIMD_FORCE_INLINE bool signDetermination2(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt) { - btScalar j0, j1, j2, u0, u1, v0, v1; - // p1 - getJs(k0,k1,k2,k3,face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2); - if (nearZero(j0+j2-j1*2.0)) - { - btScalar lt0, lt1; - bool bt0 = true, bt1=true; - getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1); - if (lt0 < -SAFE_EPSILON) - bt0 = false; - if (lt1 < -SAFE_EPSILON) - bt1 = false; - if (!bt0 && !bt1) - return false; - } - else - { - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1)) - return false; - } - // p2 - getJs(k0,k1,k2,k3,face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2); - if (nearZero(j0+j2-j1*2.0)) - { - btScalar lt0, lt1; - bool bt0=true, bt1=true; - getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1); - if (lt0 < -SAFE_EPSILON) - bt0 = false; - if (lt1 < -SAFE_EPSILON) - bt1 = false; - if (!bt0 && !bt1) - return false; - } - else - { - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1)) - return false; - } - // p3 - getJs(k0,k1,k2,k3,face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2); - if (nearZero(j0+j2-j1*2.0)) - { - btScalar lt0, lt1; - bool bt0=true, bt1=true; - getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1); - if (lt0 < -SAFE_EPSILON) - bt0 = false; - if (lt1 < -SAFE_EPSILON) - bt1 = false; - if (!bt0 && !bt1) - return false; - } - else - { - polyDecomposition(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1); - if (!signDetermination2Internal(k0,k1,k2,k3,j0,j1,j2,u0,u1,v0,v1)) - return false; - } - return true; + btScalar j0, j1, j2, u0, u1, v0, v1; + // p1 + getJs(k0, k1, k2, k3, face->m_n[0], face->m_n[1], face->m_n[2], node, dt, j0, j1, j2); + if (nearZero(j0 + j2 - j1 * 2.0)) + { + btScalar lt0, lt1; + bool bt0 = true, bt1 = true; + getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1); + if (lt0 < -SAFE_EPSILON) + bt0 = false; + if (lt1 < -SAFE_EPSILON) + bt1 = false; + if (!bt0 && !bt1) + return false; + } + else + { + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (!signDetermination2Internal(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1)) + return false; + } + // p2 + getJs(k0, k1, k2, k3, face->m_n[1], face->m_n[2], face->m_n[0], node, dt, j0, j1, j2); + if (nearZero(j0 + j2 - j1 * 2.0)) + { + btScalar lt0, lt1; + bool bt0 = true, bt1 = true; + getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1); + if (lt0 < -SAFE_EPSILON) + bt0 = false; + if (lt1 < -SAFE_EPSILON) + bt1 = false; + if (!bt0 && !bt1) + return false; + } + else + { + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (!signDetermination2Internal(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1)) + return false; + } + // p3 + getJs(k0, k1, k2, k3, face->m_n[2], face->m_n[0], face->m_n[1], node, dt, j0, j1, j2); + if (nearZero(j0 + j2 - j1 * 2.0)) + { + btScalar lt0, lt1; + bool bt0 = true, bt1 = true; + getSigns(false, k0, k1, k2, k3, j0, j2, lt0, lt1); + if (lt0 < -SAFE_EPSILON) + bt0 = false; + if (lt1 < -SAFE_EPSILON) + bt1 = false; + if (!bt0 && !bt1) + return false; + } + else + { + polyDecomposition(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1); + if (!signDetermination2Internal(k0, k1, k2, k3, j0, j1, j2, u0, u1, v0, v1)) + return false; + } + return true; } static SIMD_FORCE_INLINE bool coplanarAndInsideTest(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt) { - // Coplanar test - if (diffSign(k1-k0, k3-k2)) - { - // Case b: - if (sameSign(k0, k3) && !rootFindingLemma(k0,k1,k2,k3)) - return false; - // inside test - return signDetermination2(k0, k1, k2, k3, face, node, dt); - } - else - { - // Case c: - if (sameSign(k0, k3)) - return false; - // inside test - return signDetermination1(k0, k1, k2, k3, face, node, dt); - } - return false; + // Coplanar test + if (diffSign(k1 - k0, k3 - k2)) + { + // Case b: + if (sameSign(k0, k3) && !rootFindingLemma(k0, k1, k2, k3)) + return false; + // inside test + return signDetermination2(k0, k1, k2, k3, face, node, dt); + } + else + { + // Case c: + if (sameSign(k0, k3)) + return false; + // inside test + return signDetermination1(k0, k1, k2, k3, face, node, dt); + } + return false; } static SIMD_FORCE_INLINE bool conservativeCulling(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& mrg) { - if (k0 > mrg && k1 > mrg && k2 > mrg && k3 > mrg) - return true; - if (k0 < -mrg && k1 < -mrg && k2 < -mrg && k3 < -mrg) - return true; - return false; + if (k0 > mrg && k1 > mrg && k2 > mrg && k3 > mrg) + return true; + if (k0 < -mrg && k1 < -mrg && k2 < -mrg && k3 < -mrg) + return true; + return false; } static SIMD_FORCE_INLINE bool bernsteinVFTest(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& mrg, const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt) { - if (conservativeCulling(k0, k1, k2, k3, mrg)) - return false; - return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt); + if (conservativeCulling(k0, k1, k2, k3, mrg)) + return false; + return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt); } static SIMD_FORCE_INLINE void deCasteljau(const btScalar& k0, const btScalar& k1, const btScalar& k2, const btScalar& k3, const btScalar& t0, btScalar& k10, btScalar& k20, btScalar& k30, btScalar& k21, btScalar& k12) { - k10 = k0*(1.0-t0) + k1*t0; - btScalar k11 = k1*(1.0-t0) + k2*t0; - k12 = k2*(1.0-t0) + k3*t0; - k20 = k10*(1.0-t0) + k11*t0; - k21 = k11*(1.0-t0) + k12*t0; - k30 = k20*(1.0-t0) + k21*t0; + k10 = k0 * (1.0 - t0) + k1 * t0; + btScalar k11 = k1 * (1.0 - t0) + k2 * t0; + k12 = k2 * (1.0 - t0) + k3 * t0; + k20 = k10 * (1.0 - t0) + k11 * t0; + k21 = k11 * (1.0 - t0) + k12 * t0; + k30 = k20 * (1.0 - t0) + k21 * t0; } static SIMD_FORCE_INLINE bool bernsteinVFTest(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg) { - btScalar k0, k1, k2, k3; - getBernsteinCoeff(face, node, dt, k0, k1, k2, k3); - if (conservativeCulling(k0, k1, k2, k3, mrg)) - return false; - return true; - if (diffSign(k2-2.0*k1+k0, k3-2.0*k2+k1)) - { - btScalar k10, k20, k30, k21, k12; - btScalar t0 = (k2-2.0*k1+k0)/(k0-3.0*k1+3.0*k2-k3); - deCasteljau(k0, k1, k2, k3, t0, k10, k20, k30, k21, k12); - return bernsteinVFTest(k0, k10, k20, k30, mrg, face, node, dt) || bernsteinVFTest(k30, k21, k12, k3, mrg, face, node, dt); - } - return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt); + btScalar k0, k1, k2, k3; + getBernsteinCoeff(face, node, dt, k0, k1, k2, k3); + if (conservativeCulling(k0, k1, k2, k3, mrg)) + return false; + return true; + if (diffSign(k2 - 2.0 * k1 + k0, k3 - 2.0 * k2 + k1)) + { + btScalar k10, k20, k30, k21, k12; + btScalar t0 = (k2 - 2.0 * k1 + k0) / (k0 - 3.0 * k1 + 3.0 * k2 - k3); + deCasteljau(k0, k1, k2, k3, t0, k10, k20, k30, k21, k12); + return bernsteinVFTest(k0, k10, k20, k30, mrg, face, node, dt) || bernsteinVFTest(k30, k21, k12, k3, mrg, face, node, dt); + } + return coplanarAndInsideTest(k0, k1, k2, k3, face, node, dt); } static SIMD_FORCE_INLINE bool continuousCollisionDetection(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg, btVector3& bary) { - if (hasSeparatingPlane(face, node, dt)) - return false; - btVector3 x21 = face->m_n[1]->m_x - face->m_n[0]->m_x; - btVector3 x31 = face->m_n[2]->m_x - face->m_n[0]->m_x; - btVector3 x41 = node->m_x - face->m_n[0]->m_x; - btVector3 v21 = face->m_n[1]->m_v - face->m_n[0]->m_v; - btVector3 v31 = face->m_n[2]->m_v - face->m_n[0]->m_v; - btVector3 v41 = node->m_v - face->m_n[0]->m_v; - btVector3 a = x21.cross(x31); - btVector3 b = x21.cross(v31) + v21.cross(x31); - btVector3 c = v21.cross(v31); - btVector3 d = x41; - btVector3 e = v41; - btScalar a0 = a.dot(d); - btScalar a1 = a.dot(e) + b.dot(d); - btScalar a2 = c.dot(d) + b.dot(e); - btScalar a3 = c.dot(e); - btScalar eps = SAFE_EPSILON; - int num_roots = 0; - btScalar roots[3]; - if (std::abs(a3) < eps) - { - // cubic term is zero - if (std::abs(a2) < eps) - { - if (std::abs(a1) < eps) - { - if (std::abs(a0) < eps) - { - num_roots = 2; - roots[0] = 0; - roots[1] = dt; - } - } - else - { - num_roots = 1; - roots[0] = -a0/a1; - } - } - else - { - num_roots = SolveP2(roots, a1/a2, a0/a2); - } - } - else - { - num_roots = SolveP3(roots, a2/a3, a1/a3, a0/a3); - } -// std::sort(roots, roots+num_roots); - if (num_roots > 1) - { - if (roots[0] > roots[1]) - btSwap(roots[0], roots[1]); - } - if (num_roots > 2) - { - if (roots[0] > roots[2]) - btSwap(roots[0], roots[2]); - if (roots[1] > roots[2]) - btSwap(roots[1], roots[2]); - } - for (int r = 0; r < num_roots; ++r) - { - double root = roots[r]; - if (root <= 0) - continue; - if (root > dt + SIMD_EPSILON) - return false; - btVector3 x1 = face->m_n[0]->m_x + root * face->m_n[0]->m_v; - btVector3 x2 = face->m_n[1]->m_x + root * face->m_n[1]->m_v; - btVector3 x3 = face->m_n[2]->m_x + root * face->m_n[2]->m_v; - btVector3 x4 = node->m_x + root * node->m_v; - btVector3 normal = (x2-x1).cross(x3-x1); - normal.safeNormalize(); - if (proximityTest(x1, x2, x3, x4, normal, mrg, bary)) - return true; - } - return false; + if (hasSeparatingPlane(face, node, dt)) + return false; + btVector3 x21 = face->m_n[1]->m_x - face->m_n[0]->m_x; + btVector3 x31 = face->m_n[2]->m_x - face->m_n[0]->m_x; + btVector3 x41 = node->m_x - face->m_n[0]->m_x; + btVector3 v21 = face->m_n[1]->m_v - face->m_n[0]->m_v; + btVector3 v31 = face->m_n[2]->m_v - face->m_n[0]->m_v; + btVector3 v41 = node->m_v - face->m_n[0]->m_v; + btVector3 a = x21.cross(x31); + btVector3 b = x21.cross(v31) + v21.cross(x31); + btVector3 c = v21.cross(v31); + btVector3 d = x41; + btVector3 e = v41; + btScalar a0 = a.dot(d); + btScalar a1 = a.dot(e) + b.dot(d); + btScalar a2 = c.dot(d) + b.dot(e); + btScalar a3 = c.dot(e); + btScalar eps = SAFE_EPSILON; + int num_roots = 0; + btScalar roots[3]; + if (std::abs(a3) < eps) + { + // cubic term is zero + if (std::abs(a2) < eps) + { + if (std::abs(a1) < eps) + { + if (std::abs(a0) < eps) + { + num_roots = 2; + roots[0] = 0; + roots[1] = dt; + } + } + else + { + num_roots = 1; + roots[0] = -a0 / a1; + } + } + else + { + num_roots = SolveP2(roots, a1 / a2, a0 / a2); + } + } + else + { + num_roots = SolveP3(roots, a2 / a3, a1 / a3, a0 / a3); + } + // std::sort(roots, roots+num_roots); + if (num_roots > 1) + { + if (roots[0] > roots[1]) + btSwap(roots[0], roots[1]); + } + if (num_roots > 2) + { + if (roots[0] > roots[2]) + btSwap(roots[0], roots[2]); + if (roots[1] > roots[2]) + btSwap(roots[1], roots[2]); + } + for (int r = 0; r < num_roots; ++r) + { + double root = roots[r]; + if (root <= 0) + continue; + if (root > dt + SIMD_EPSILON) + return false; + btVector3 x1 = face->m_n[0]->m_x + root * face->m_n[0]->m_v; + btVector3 x2 = face->m_n[1]->m_x + root * face->m_n[1]->m_v; + btVector3 x3 = face->m_n[2]->m_x + root * face->m_n[2]->m_v; + btVector3 x4 = node->m_x + root * node->m_v; + btVector3 normal = (x2 - x1).cross(x3 - x1); + normal.safeNormalize(); + if (proximityTest(x1, x2, x3, x4, normal, mrg, bary)) + return true; + } + return false; } static SIMD_FORCE_INLINE bool bernsteinCCD(const btSoftBody::Face* face, const btSoftBody::Node* node, const btScalar& dt, const btScalar& mrg, btVector3& bary) { - if (!bernsteinVFTest(face, node, dt, mrg)) - return false; - if (!continuousCollisionDetection(face, node, dt, 1e-6, bary)) - return false; - return true; + if (!bernsteinVFTest(face, node, dt, mrg)) + return false; + if (!continuousCollisionDetection(face, node, dt, 1e-6, bary)) + return false; + return true; } // @@ -902,62 +906,61 @@ static inline btMatrix3x3 Diagonal(btScalar x) static inline btMatrix3x3 Diagonal(const btVector3& v) { - btMatrix3x3 m; - m[0] = btVector3(v.getX(), 0, 0); - m[1] = btVector3(0, v.getY(), 0); - m[2] = btVector3(0, 0, v.getZ()); - return (m); -} - -static inline btScalar Dot(const btScalar* a,const btScalar* b, int ndof) -{ - btScalar result = 0; - for (int i = 0; i < ndof; ++i) - result += a[i] * b[i]; - return result; -} - -static inline btMatrix3x3 OuterProduct(const btScalar* v1,const btScalar* v2,const btScalar* v3, - const btScalar* u1, const btScalar* u2, const btScalar* u3, int ndof) -{ - btMatrix3x3 m; - btScalar a11 = Dot(v1,u1,ndof); - btScalar a12 = Dot(v1,u2,ndof); - btScalar a13 = Dot(v1,u3,ndof); - - btScalar a21 = Dot(v2,u1,ndof); - btScalar a22 = Dot(v2,u2,ndof); - btScalar a23 = Dot(v2,u3,ndof); - - btScalar a31 = Dot(v3,u1,ndof); - btScalar a32 = Dot(v3,u2,ndof); - btScalar a33 = Dot(v3,u3,ndof); - m[0] = btVector3(a11, a12, a13); - m[1] = btVector3(a21, a22, a23); - m[2] = btVector3(a31, a32, a33); - return (m); -} - -static inline btMatrix3x3 OuterProduct(const btVector3& v1,const btVector3& v2) -{ - btMatrix3x3 m; - btScalar a11 = v1[0] * v2[0]; - btScalar a12 = v1[0] * v2[1]; - btScalar a13 = v1[0] * v2[2]; - - btScalar a21 = v1[1] * v2[0]; - btScalar a22 = v1[1] * v2[1]; - btScalar a23 = v1[1] * v2[2]; - - btScalar a31 = v1[2] * v2[0]; - btScalar a32 = v1[2] * v2[1]; - btScalar a33 = v1[2] * v2[2]; - m[0] = btVector3(a11, a12, a13); - m[1] = btVector3(a21, a22, a23); - m[2] = btVector3(a31, a32, a33); - return (m); + btMatrix3x3 m; + m[0] = btVector3(v.getX(), 0, 0); + m[1] = btVector3(0, v.getY(), 0); + m[2] = btVector3(0, 0, v.getZ()); + return (m); +} + +static inline btScalar Dot(const btScalar* a, const btScalar* b, int ndof) +{ + btScalar result = 0; + for (int i = 0; i < ndof; ++i) + result += a[i] * b[i]; + return result; } +static inline btMatrix3x3 OuterProduct(const btScalar* v1, const btScalar* v2, const btScalar* v3, + const btScalar* u1, const btScalar* u2, const btScalar* u3, int ndof) +{ + btMatrix3x3 m; + btScalar a11 = Dot(v1, u1, ndof); + btScalar a12 = Dot(v1, u2, ndof); + btScalar a13 = Dot(v1, u3, ndof); + + btScalar a21 = Dot(v2, u1, ndof); + btScalar a22 = Dot(v2, u2, ndof); + btScalar a23 = Dot(v2, u3, ndof); + + btScalar a31 = Dot(v3, u1, ndof); + btScalar a32 = Dot(v3, u2, ndof); + btScalar a33 = Dot(v3, u3, ndof); + m[0] = btVector3(a11, a12, a13); + m[1] = btVector3(a21, a22, a23); + m[2] = btVector3(a31, a32, a33); + return (m); +} + +static inline btMatrix3x3 OuterProduct(const btVector3& v1, const btVector3& v2) +{ + btMatrix3x3 m; + btScalar a11 = v1[0] * v2[0]; + btScalar a12 = v1[0] * v2[1]; + btScalar a13 = v1[0] * v2[2]; + + btScalar a21 = v1[1] * v2[0]; + btScalar a22 = v1[1] * v2[1]; + btScalar a23 = v1[1] * v2[2]; + + btScalar a31 = v1[2] * v2[0]; + btScalar a32 = v1[2] * v2[1]; + btScalar a33 = v1[2] * v2[2]; + m[0] = btVector3(a11, a12, a13); + m[1] = btVector3(a21, a22, a23); + m[2] = btVector3(a31, a32, a33); + return (m); +} // static inline btMatrix3x3 Add(const btMatrix3x3& a, @@ -1008,6 +1011,20 @@ static inline btMatrix3x3 ImpulseMatrix(btScalar dt, } // +static inline btMatrix3x3 ImpulseMatrix(btScalar dt, + const btMatrix3x3& effective_mass_inv, + btScalar imb, + const btMatrix3x3& iwi, + const btVector3& r) +{ + return (Diagonal(1 / dt) * Add(effective_mass_inv, MassMatrix(imb, iwi, r)).inverse()); + // btMatrix3x3 iimb = MassMatrix(imb, iwi, r); + // if (iimb.determinant() == 0) + // return effective_mass_inv.inverse(); + // return effective_mass_inv.inverse() * Add(effective_mass_inv.inverse(), iimb.inverse()).inverse() * iimb.inverse(); +} + +// static inline btMatrix3x3 ImpulseMatrix(btScalar ima, const btMatrix3x3& iia, const btVector3& ra, btScalar imb, const btMatrix3x3& iib, const btVector3& rb) { @@ -1091,73 +1108,70 @@ static inline void ProjectOrigin(const btVector3& a, // static inline bool rayIntersectsTriangle(const btVector3& origin, const btVector3& dir, const btVector3& v0, const btVector3& v1, const btVector3& v2, btScalar& t) { - btScalar a, f, u, v; - - btVector3 e1 = v1 - v0; - btVector3 e2 = v2 - v0; - btVector3 h = dir.cross(e2); - a = e1.dot(h); - - if (a > -0.00001 && a < 0.00001) - return (false); - - f = btScalar(1) / a; - btVector3 s = origin - v0; - u = f * s.dot(h); - - if (u < 0.0 || u > 1.0) - return (false); - - btVector3 q = s.cross(e1); - v = f * dir.dot(q); - if (v < 0.0 || u + v > 1.0) - return (false); - // at this stage we can compute t to find out where - // the intersection point is on the line - t = f * e2.dot(q); - if (t > 0) // ray intersection - return (true); - else // this means that there is a line intersection - // but not a ray intersection - return (false); + btScalar a, f, u, v; + + btVector3 e1 = v1 - v0; + btVector3 e2 = v2 - v0; + btVector3 h = dir.cross(e2); + a = e1.dot(h); + + if (a > -0.00001 && a < 0.00001) + return (false); + + f = btScalar(1) / a; + btVector3 s = origin - v0; + u = f * s.dot(h); + + if (u < 0.0 || u > 1.0) + return (false); + + btVector3 q = s.cross(e1); + v = f * dir.dot(q); + if (v < 0.0 || u + v > 1.0) + return (false); + // at this stage we can compute t to find out where + // the intersection point is on the line + t = f * e2.dot(q); + if (t > 0) // ray intersection + return (true); + else // this means that there is a line intersection + // but not a ray intersection + return (false); } static inline bool lineIntersectsTriangle(const btVector3& rayStart, const btVector3& rayEnd, const btVector3& p1, const btVector3& p2, const btVector3& p3, btVector3& sect, btVector3& normal) { - btVector3 dir = rayEnd - rayStart; - btScalar dir_norm = dir.norm(); - if (dir_norm < SIMD_EPSILON) - return false; - dir.normalize(); - - btScalar t; - - bool ret = rayIntersectsTriangle(rayStart, dir, p1, p2, p3, t); - - if (ret) - { - if (t <= dir_norm) - { - sect = rayStart + dir * t; - } - else - { - ret = false; - } - } - - if (ret) - { - btVector3 n = (p3-p1).cross(p2-p1); - n.safeNormalize(); - if (n.dot(dir) < 0) - normal = n; - else - normal = -n; - } - return ret; -} + btVector3 dir = rayEnd - rayStart; + btScalar dir_norm = dir.norm(); + if (dir_norm < SIMD_EPSILON) + return false; + dir.normalize(); + btScalar t; + bool ret = rayIntersectsTriangle(rayStart, dir, p1, p2, p3, t); + + if (ret) + { + if (t <= dir_norm) + { + sect = rayStart + dir * t; + } + else + { + ret = false; + } + } + if (ret) + { + btVector3 n = (p3 - p1).cross(p2 - p1); + n.safeNormalize(); + if (n.dot(dir) < 0) + normal = n; + else + normal = -n; + } + return ret; +} // template <typename T> @@ -1586,57 +1600,57 @@ struct btSoftColliders psa->m_cdbvt.collideTT(psa->m_cdbvt.m_root, psb->m_cdbvt.m_root, *this); } }; - // - // CollideSDF_RS - // - struct CollideSDF_RS : btDbvt::ICollide - { - void Process(const btDbvtNode* leaf) - { - btSoftBody::Node* node = (btSoftBody::Node*)leaf->data; - DoNode(*node); - } - void DoNode(btSoftBody::Node& n) const - { - const btScalar m = n.m_im > 0 ? dynmargin : stamargin; - btSoftBody::RContact c; - - if ((!n.m_battach) && - psb->checkContact(m_colObj1Wrap, n.m_x, m, c.m_cti)) - { - const btScalar ima = n.m_im; - const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f; - const btScalar ms = ima + imb; - if (ms > 0) - { - const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform(); - static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); - const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic; - const btVector3 ra = n.m_x - wtr.getOrigin(); - const btVector3 va = m_rigidBody ? m_rigidBody->getVelocityInLocalPoint(ra) * psb->m_sst.sdt : btVector3(0, 0, 0); - const btVector3 vb = n.m_x - n.m_q; - const btVector3 vr = vb - va; - const btScalar dn = btDot(vr, c.m_cti.m_normal); - const btVector3 fv = vr - c.m_cti.m_normal * dn; - const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction(); - c.m_node = &n; - c.m_c0 = ImpulseMatrix(psb->m_sst.sdt, ima, imb, iwi, ra); - c.m_c1 = ra; - c.m_c2 = ima * psb->m_sst.sdt; - c.m_c3 = fv.length2() < (dn * fc * dn * fc) ? 0 : 1 - fc; - c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR; - psb->m_rcontacts.push_back(c); - if (m_rigidBody) - m_rigidBody->activate(); - } - } - } - btSoftBody* psb; - const btCollisionObjectWrapper* m_colObj1Wrap; - btRigidBody* m_rigidBody; - btScalar dynmargin; - btScalar stamargin; - }; + // + // CollideSDF_RS + // + struct CollideSDF_RS : btDbvt::ICollide + { + void Process(const btDbvtNode* leaf) + { + btSoftBody::Node* node = (btSoftBody::Node*)leaf->data; + DoNode(*node); + } + void DoNode(btSoftBody::Node& n) const + { + const btScalar m = n.m_im > 0 ? dynmargin : stamargin; + btSoftBody::RContact c; + + if ((!n.m_battach) && + psb->checkContact(m_colObj1Wrap, n.m_x, m, c.m_cti)) + { + const btScalar ima = n.m_im; + const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f; + const btScalar ms = ima + imb; + if (ms > 0) + { + const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform(); + static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); + const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic; + const btVector3 ra = n.m_x - wtr.getOrigin(); + const btVector3 va = m_rigidBody ? m_rigidBody->getVelocityInLocalPoint(ra) * psb->m_sst.sdt : btVector3(0, 0, 0); + const btVector3 vb = n.m_x - n.m_q; + const btVector3 vr = vb - va; + const btScalar dn = btDot(vr, c.m_cti.m_normal); + const btVector3 fv = vr - c.m_cti.m_normal * dn; + const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction(); + c.m_node = &n; + c.m_c0 = ImpulseMatrix(psb->m_sst.sdt, ima, imb, iwi, ra); + c.m_c1 = ra; + c.m_c2 = ima * psb->m_sst.sdt; + c.m_c3 = fv.length2() < (dn * fc * dn * fc) ? 0 : 1 - fc; + c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR; + psb->m_rcontacts.push_back(c); + if (m_rigidBody) + m_rigidBody->activate(); + } + } + } + btSoftBody* psb; + const btCollisionObjectWrapper* m_colObj1Wrap; + btRigidBody* m_rigidBody; + btScalar dynmargin; + btScalar stamargin; + }; // // CollideSDF_RD @@ -1654,72 +1668,74 @@ struct btSoftColliders btSoftBody::DeformableNodeRigidContact c; if (!n.m_battach) - { + { // check for collision at x_{n+1}^* if (psb->checkDeformableContact(m_colObj1Wrap, n.m_q, m, c.m_cti, /*predict = */ true)) - { - const btScalar ima = n.m_im; - // todo: collision between multibody and fixed deformable node will be missed. - const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f; - const btScalar ms = ima + imb; - if (ms > 0) - { - // resolve contact at x_n - psb->checkDeformableContact(m_colObj1Wrap, n.m_x, m, c.m_cti, /*predict = */ false); - btSoftBody::sCti& cti = c.m_cti; - c.m_node = &n; - const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction(); - c.m_c2 = ima; - c.m_c3 = fc; - c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR; - - if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) - { - const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform(); - static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); - const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic; - const btVector3 ra = n.m_x - wtr.getOrigin(); - - c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra); - c.m_c1 = ra; - } - else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); - if (multibodyLinkCol) - { - btVector3 normal = cti.m_normal; - btVector3 t1 = generateUnitOrthogonalVector(normal); - btVector3 t2 = btCross(normal, t1); - btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; - findJacobian(multibodyLinkCol, jacobianData_normal, c.m_node->m_x, normal); - findJacobian(multibodyLinkCol, jacobianData_t1, c.m_node->m_x, t1); - findJacobian(multibodyLinkCol, jacobianData_t2, c.m_node->m_x, t2); - - btScalar* J_n = &jacobianData_normal.m_jacobians[0]; - btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; - btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; - - btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; - - btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), - t1.getX(), t1.getY(), t1.getZ(), - t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame - const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; - btMatrix3x3 local_impulse_matrix = (Diagonal(n.m_im) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); - c.m_c0 = rot.transpose() * local_impulse_matrix * rot; - c.jacobianData_normal = jacobianData_normal; - c.jacobianData_t1 = jacobianData_t1; - c.jacobianData_t2 = jacobianData_t2; - c.t1 = t1; - c.t2 = t2; - } - } - psb->m_nodeRigidContacts.push_back(c); - } - } + { + const btScalar ima = n.m_im; + // todo: collision between multibody and fixed deformable node will be missed. + const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f; + const btScalar ms = ima + imb; + if (ms > 0) + { + // resolve contact at x_n + psb->checkDeformableContact(m_colObj1Wrap, n.m_x, m, c.m_cti, /*predict = */ false); + btSoftBody::sCti& cti = c.m_cti; + c.m_node = &n; + const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction(); + c.m_c2 = ima; + c.m_c3 = fc; + c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR; + c.m_c5 = n.m_effectiveMass_inv; + + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform(); + static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); + const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic; + const btVector3 ra = n.m_x - wtr.getOrigin(); + + c.m_c0 = ImpulseMatrix(1, n.m_effectiveMass_inv, imb, iwi, ra); + // c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra); + c.m_c1 = ra; + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + btVector3 normal = cti.m_normal; + btVector3 t1 = generateUnitOrthogonalVector(normal); + btVector3 t2 = btCross(normal, t1); + btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; + findJacobian(multibodyLinkCol, jacobianData_normal, c.m_node->m_x, normal); + findJacobian(multibodyLinkCol, jacobianData_t1, c.m_node->m_x, t1); + findJacobian(multibodyLinkCol, jacobianData_t2, c.m_node->m_x, t2); + + btScalar* J_n = &jacobianData_normal.m_jacobians[0]; + btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; + btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; + + btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; + + btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), + t1.getX(), t1.getY(), t1.getZ(), + t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + btMatrix3x3 local_impulse_matrix = (n.m_effectiveMass_inv + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); + c.m_c0 = rot.transpose() * local_impulse_matrix * rot; + c.jacobianData_normal = jacobianData_normal; + c.jacobianData_t1 = jacobianData_t1; + c.jacobianData_t2 = jacobianData_t2; + c.t1 = t1; + c.t2 = t2; + } + } + psb->m_nodeRigidContacts.push_back(c); + } + } } } btSoftBody* psb; @@ -1728,112 +1744,111 @@ struct btSoftColliders btScalar dynmargin; btScalar stamargin; }; - - // - // CollideSDF_RDF - // - struct CollideSDF_RDF : btDbvt::ICollide - { - void Process(const btDbvtNode* leaf) - { - btSoftBody::Face* face = (btSoftBody::Face*)leaf->data; - DoNode(*face); - } - void DoNode(btSoftBody::Face& f) const - { - btSoftBody::Node* n0 = f.m_n[0]; - btSoftBody::Node* n1 = f.m_n[1]; - btSoftBody::Node* n2 = f.m_n[2]; - const btScalar m = (n0->m_im > 0 && n1->m_im > 0 && n2->m_im > 0 )? dynmargin : stamargin; - btSoftBody::DeformableFaceRigidContact c; - btVector3 contact_point; - btVector3 bary; - if (psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, true)) - { - f.m_pcontact[3] = 1; - btScalar ima = n0->m_im + n1->m_im + n2->m_im; - const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f; - // todo: collision between multibody and fixed deformable face will be missed. - const btScalar ms = ima + imb; - if (ms > 0) - { - // resolve contact at x_n -// psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, /*predict = */ false); - btSoftBody::sCti& cti = c.m_cti; - c.m_contactPoint = contact_point; - c.m_bary = bary; - // todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices - c.m_weights = btScalar(2)/(btScalar(1) + bary.length2()) * bary; - c.m_face = &f; + + // + // CollideSDF_RDF + // + struct CollideSDF_RDF : btDbvt::ICollide + { + void Process(const btDbvtNode* leaf) + { + btSoftBody::Face* face = (btSoftBody::Face*)leaf->data; + DoNode(*face); + } + void DoNode(btSoftBody::Face& f) const + { + btSoftBody::Node* n0 = f.m_n[0]; + btSoftBody::Node* n1 = f.m_n[1]; + btSoftBody::Node* n2 = f.m_n[2]; + const btScalar m = (n0->m_im > 0 && n1->m_im > 0 && n2->m_im > 0) ? dynmargin : stamargin; + btSoftBody::DeformableFaceRigidContact c; + btVector3 contact_point; + btVector3 bary; + if (psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, true)) + { + btScalar ima = n0->m_im + n1->m_im + n2->m_im; + const btScalar imb = m_rigidBody ? m_rigidBody->getInvMass() : 0.f; + // todo: collision between multibody and fixed deformable face will be missed. + const btScalar ms = ima + imb; + if (ms > 0) + { + // resolve contact at x_n + // psb->checkDeformableFaceContact(m_colObj1Wrap, f, contact_point, bary, m, c.m_cti, /*predict = */ false); + btSoftBody::sCti& cti = c.m_cti; + c.m_contactPoint = contact_point; + c.m_bary = bary; + // todo xuchenhan@: this is assuming mass of all vertices are the same. Need to modify if mass are different for distinct vertices + c.m_weights = btScalar(2) / (btScalar(1) + bary.length2()) * bary; + c.m_face = &f; // friction is handled by the nodes to prevent sticking -// const btScalar fc = 0; - const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction(); - - // the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf - ima = bary.getX()*c.m_weights.getX() * n0->m_im + bary.getY()*c.m_weights.getY() * n1->m_im + bary.getZ()*c.m_weights.getZ() * n2->m_im; - c.m_c2 = ima; - c.m_c3 = fc; - c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR; - if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) - { - const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform(); - static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); - const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic; - const btVector3 ra = contact_point - wtr.getOrigin(); - - // we do not scale the impulse matrix by dt - c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra); - c.m_c1 = ra; - } - else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) - { - btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); - if (multibodyLinkCol) - { - btVector3 normal = cti.m_normal; - btVector3 t1 = generateUnitOrthogonalVector(normal); - btVector3 t2 = btCross(normal, t1); - btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; - findJacobian(multibodyLinkCol, jacobianData_normal, contact_point, normal); - findJacobian(multibodyLinkCol, jacobianData_t1, contact_point, t1); - findJacobian(multibodyLinkCol, jacobianData_t2, contact_point, t2); - - btScalar* J_n = &jacobianData_normal.m_jacobians[0]; - btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; - btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; - - btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; - btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; - - btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), - t1.getX(), t1.getY(), t1.getZ(), - t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame - const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; - btMatrix3x3 local_impulse_matrix = (Diagonal(ima) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); - c.m_c0 = rot.transpose() * local_impulse_matrix * rot; - c.jacobianData_normal = jacobianData_normal; - c.jacobianData_t1 = jacobianData_t1; - c.jacobianData_t2 = jacobianData_t2; - c.t1 = t1; - c.t2 = t2; - } - } - psb->m_faceRigidContacts.push_back(c); - } - } - else - { - f.m_pcontact[3] = 0; - } - } - btSoftBody* psb; - const btCollisionObjectWrapper* m_colObj1Wrap; - btRigidBody* m_rigidBody; - btScalar dynmargin; - btScalar stamargin; - }; - + // const btScalar fc = 0; + const btScalar fc = psb->m_cfg.kDF * m_colObj1Wrap->getCollisionObject()->getFriction(); + + // the effective inverse mass of the face as in https://graphics.stanford.edu/papers/cloth-sig02/cloth.pdf + ima = bary.getX() * c.m_weights.getX() * n0->m_im + bary.getY() * c.m_weights.getY() * n1->m_im + bary.getZ() * c.m_weights.getZ() * n2->m_im; + c.m_c2 = ima; + c.m_c3 = fc; + c.m_c4 = m_colObj1Wrap->getCollisionObject()->isStaticOrKinematicObject() ? psb->m_cfg.kKHR : psb->m_cfg.kCHR; + c.m_c5 = Diagonal(ima); + if (cti.m_colObj->getInternalType() == btCollisionObject::CO_RIGID_BODY) + { + const btTransform& wtr = m_rigidBody ? m_rigidBody->getWorldTransform() : m_colObj1Wrap->getCollisionObject()->getWorldTransform(); + static const btMatrix3x3 iwiStatic(0, 0, 0, 0, 0, 0, 0, 0, 0); + const btMatrix3x3& iwi = m_rigidBody ? m_rigidBody->getInvInertiaTensorWorld() : iwiStatic; + const btVector3 ra = contact_point - wtr.getOrigin(); + + // we do not scale the impulse matrix by dt + c.m_c0 = ImpulseMatrix(1, ima, imb, iwi, ra); + c.m_c1 = ra; + } + else if (cti.m_colObj->getInternalType() == btCollisionObject::CO_FEATHERSTONE_LINK) + { + btMultiBodyLinkCollider* multibodyLinkCol = (btMultiBodyLinkCollider*)btMultiBodyLinkCollider::upcast(cti.m_colObj); + if (multibodyLinkCol) + { + btVector3 normal = cti.m_normal; + btVector3 t1 = generateUnitOrthogonalVector(normal); + btVector3 t2 = btCross(normal, t1); + btMultiBodyJacobianData jacobianData_normal, jacobianData_t1, jacobianData_t2; + findJacobian(multibodyLinkCol, jacobianData_normal, contact_point, normal); + findJacobian(multibodyLinkCol, jacobianData_t1, contact_point, t1); + findJacobian(multibodyLinkCol, jacobianData_t2, contact_point, t2); + + btScalar* J_n = &jacobianData_normal.m_jacobians[0]; + btScalar* J_t1 = &jacobianData_t1.m_jacobians[0]; + btScalar* J_t2 = &jacobianData_t2.m_jacobians[0]; + + btScalar* u_n = &jacobianData_normal.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t1 = &jacobianData_t1.m_deltaVelocitiesUnitImpulse[0]; + btScalar* u_t2 = &jacobianData_t2.m_deltaVelocitiesUnitImpulse[0]; + + btMatrix3x3 rot(normal.getX(), normal.getY(), normal.getZ(), + t1.getX(), t1.getY(), t1.getZ(), + t2.getX(), t2.getY(), t2.getZ()); // world frame to local frame + const int ndof = multibodyLinkCol->m_multiBody->getNumDofs() + 6; + btMatrix3x3 local_impulse_matrix = (Diagonal(ima) + OuterProduct(J_n, J_t1, J_t2, u_n, u_t1, u_t2, ndof)).inverse(); + c.m_c0 = rot.transpose() * local_impulse_matrix * rot; + c.jacobianData_normal = jacobianData_normal; + c.jacobianData_t1 = jacobianData_t1; + c.jacobianData_t2 = jacobianData_t2; + c.t1 = t1; + c.t2 = t2; + } + } + psb->m_faceRigidContacts.push_back(c); + } + } + // Set caching barycenters to be false after collision detection. + // Only turn on when contact is static. + f.m_pcontact[3] = 0; + } + btSoftBody* psb; + const btCollisionObjectWrapper* m_colObj1Wrap; + btRigidBody* m_rigidBody; + btScalar dynmargin; + btScalar stamargin; + }; + // // CollideVF_SS // @@ -1844,12 +1859,12 @@ struct btSoftColliders { btSoftBody::Node* node = (btSoftBody::Node*)lnode->data; btSoftBody::Face* face = (btSoftBody::Face*)lface->data; - for (int i = 0; i < 3; ++i) - { - if (face->m_n[i] == node) - continue; - } - + for (int i = 0; i < 3; ++i) + { + if (face->m_n[i] == node) + continue; + } + btVector3 o = node->m_x; btVector3 p; btScalar d = SIMD_INFINITY; @@ -1879,7 +1894,7 @@ struct btSoftColliders c.m_node = node; c.m_face = face; c.m_weights = w; - c.m_friction = btMax (psb[0]->m_cfg.kDF, psb[1]->m_cfg.kDF); + c.m_friction = btMax(psb[0]->m_cfg.kDF, psb[1]->m_cfg.kDF); c.m_cfm[0] = ma / ms * psb[0]->m_cfg.kSHR; c.m_cfm[1] = mb / ms * psb[1]->m_cfg.kSHR; psb[0]->m_scontacts.push_back(c); @@ -1889,206 +1904,205 @@ struct btSoftColliders btSoftBody* psb[2]; btScalar mrg; }; - - - // - // CollideVF_DD - // - struct CollideVF_DD : btDbvt::ICollide - { - void Process(const btDbvtNode* lnode, - const btDbvtNode* lface) - { - btSoftBody::Node* node = (btSoftBody::Node*)lnode->data; - btSoftBody::Face* face = (btSoftBody::Face*)lface->data; - btVector3 bary; - if (proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary)) - { - const btSoftBody::Node* n[] = {face->m_n[0], face->m_n[1], face->m_n[2]}; - const btVector3 w = bary; - const btScalar ma = node->m_im; - btScalar mb = BaryEval(n[0]->m_im, n[1]->m_im, n[2]->m_im, w); - if ((n[0]->m_im <= 0) || - (n[1]->m_im <= 0) || - (n[2]->m_im <= 0)) - { - mb = 0; - } - const btScalar ms = ma + mb; - if (ms > 0) - { - btSoftBody::DeformableFaceNodeContact c; - c.m_normal = face->m_normal; - if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) - c.m_normal = -face->m_normal; - c.m_margin = mrg; - c.m_node = node; - c.m_face = face; - c.m_bary = w; - c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; - psb[0]->m_faceNodeContacts.push_back(c); - } - } - } - btSoftBody* psb[2]; - btScalar mrg; - bool useFaceNormal; - }; - - // - // CollideFF_DD - // - struct CollideFF_DD : btDbvt::ICollide - { - void Process(const btDbvntNode* lface1, - const btDbvntNode* lface2) - { - btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data; - btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data; - if (f1 != f2) - { - Repel(f1, f2); - Repel(f2, f1); - } - } - void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2) - { - //#define REPEL_NEIGHBOR 1 + + // + // CollideVF_DD + // + struct CollideVF_DD : btDbvt::ICollide + { + void Process(const btDbvtNode* lnode, + const btDbvtNode* lface) + { + btSoftBody::Node* node = (btSoftBody::Node*)lnode->data; + btSoftBody::Face* face = (btSoftBody::Face*)lface->data; + btVector3 bary; + if (proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary)) + { + const btSoftBody::Node* n[] = {face->m_n[0], face->m_n[1], face->m_n[2]}; + const btVector3 w = bary; + const btScalar ma = node->m_im; + btScalar mb = BaryEval(n[0]->m_im, n[1]->m_im, n[2]->m_im, w); + if ((n[0]->m_im <= 0) || + (n[1]->m_im <= 0) || + (n[2]->m_im <= 0)) + { + mb = 0; + } + const btScalar ms = ma + mb; + if (ms > 0) + { + btSoftBody::DeformableFaceNodeContact c; + c.m_normal = face->m_normal; + if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) + c.m_normal = -face->m_normal; + c.m_margin = mrg; + c.m_node = node; + c.m_face = face; + c.m_bary = w; + c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; + psb[0]->m_faceNodeContacts.push_back(c); + } + } + } + btSoftBody* psb[2]; + btScalar mrg; + bool useFaceNormal; + }; + + // + // CollideFF_DD + // + struct CollideFF_DD : btDbvt::ICollide + { + void Process(const btDbvntNode* lface1, + const btDbvntNode* lface2) + { + btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data; + btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data; + if (f1 != f2) + { + Repel(f1, f2); + Repel(f2, f1); + } + } + void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2) + { + //#define REPEL_NEIGHBOR 1 #ifndef REPEL_NEIGHBOR - for (int node_id = 0; node_id < 3; ++node_id) - { - btSoftBody::Node* node = f1->m_n[node_id]; - for (int i = 0; i < 3; ++i) - { - if (f2->m_n[i] == node) - return; - } - } + for (int node_id = 0; node_id < 3; ++node_id) + { + btSoftBody::Node* node = f1->m_n[node_id]; + for (int i = 0; i < 3; ++i) + { + if (f2->m_n[i] == node) + return; + } + } #endif - bool skip = false; - for (int node_id = 0; node_id < 3; ++node_id) - { - btSoftBody::Node* node = f1->m_n[node_id]; + bool skip = false; + for (int node_id = 0; node_id < 3; ++node_id) + { + btSoftBody::Node* node = f1->m_n[node_id]; #ifdef REPEL_NEIGHBOR - for (int i = 0; i < 3; ++i) - { - if (f2->m_n[i] == node) - { - skip = true; - break; - } - } - if (skip) - { - skip = false; - continue; - } + for (int i = 0; i < 3; ++i) + { + if (f2->m_n[i] == node) + { + skip = true; + break; + } + } + if (skip) + { + skip = false; + continue; + } #endif - btSoftBody::Face* face = f2; - btVector3 bary; - if (!proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary)) - continue; - btSoftBody::DeformableFaceNodeContact c; - c.m_normal = face->m_normal; - if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) - c.m_normal = -face->m_normal; - c.m_margin = mrg; - c.m_node = node; - c.m_face = face; - c.m_bary = bary; - c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; - psb[0]->m_faceNodeContacts.push_back(c); - } - } - btSoftBody* psb[2]; - btScalar mrg; - bool useFaceNormal; - }; - - struct CollideCCD : btDbvt::ICollide - { - void Process(const btDbvtNode* lnode, - const btDbvtNode* lface) - { - btSoftBody::Node* node = (btSoftBody::Node*)lnode->data; - btSoftBody::Face* face = (btSoftBody::Face*)lface->data; - btVector3 bary; - if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary)) - { - btSoftBody::DeformableFaceNodeContact c; - c.m_normal = face->m_normal; - if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) - c.m_normal = -face->m_normal; - c.m_node = node; - c.m_face = face; - c.m_bary = bary; - c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; - psb[0]->m_faceNodeContacts.push_back(c); - } - } - void Process(const btDbvntNode* lface1, - const btDbvntNode* lface2) - { - btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data; - btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data; - if (f1 != f2) - { - Repel(f1, f2); - Repel(f2, f1); - } - } - void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2) - { - //#define REPEL_NEIGHBOR 1 + btSoftBody::Face* face = f2; + btVector3 bary; + if (!proximityTest(face->m_n[0]->m_x, face->m_n[1]->m_x, face->m_n[2]->m_x, node->m_x, face->m_normal, mrg, bary)) + continue; + btSoftBody::DeformableFaceNodeContact c; + c.m_normal = face->m_normal; + if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) + c.m_normal = -face->m_normal; + c.m_margin = mrg; + c.m_node = node; + c.m_face = face; + c.m_bary = bary; + c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; + psb[0]->m_faceNodeContacts.push_back(c); + } + } + btSoftBody* psb[2]; + btScalar mrg; + bool useFaceNormal; + }; + + struct CollideCCD : btDbvt::ICollide + { + void Process(const btDbvtNode* lnode, + const btDbvtNode* lface) + { + btSoftBody::Node* node = (btSoftBody::Node*)lnode->data; + btSoftBody::Face* face = (btSoftBody::Face*)lface->data; + btVector3 bary; + if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary)) + { + btSoftBody::DeformableFaceNodeContact c; + c.m_normal = face->m_normal; + if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) + c.m_normal = -face->m_normal; + c.m_node = node; + c.m_face = face; + c.m_bary = bary; + c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; + psb[0]->m_faceNodeContacts.push_back(c); + } + } + void Process(const btDbvntNode* lface1, + const btDbvntNode* lface2) + { + btSoftBody::Face* f1 = (btSoftBody::Face*)lface1->data; + btSoftBody::Face* f2 = (btSoftBody::Face*)lface2->data; + if (f1 != f2) + { + Repel(f1, f2); + Repel(f2, f1); + } + } + void Repel(btSoftBody::Face* f1, btSoftBody::Face* f2) + { + //#define REPEL_NEIGHBOR 1 #ifndef REPEL_NEIGHBOR - for (int node_id = 0; node_id < 3; ++node_id) - { - btSoftBody::Node* node = f1->m_n[node_id]; - for (int i = 0; i < 3; ++i) - { - if (f2->m_n[i] == node) - return; - } - } + for (int node_id = 0; node_id < 3; ++node_id) + { + btSoftBody::Node* node = f1->m_n[node_id]; + for (int i = 0; i < 3; ++i) + { + if (f2->m_n[i] == node) + return; + } + } #endif - bool skip = false; - for (int node_id = 0; node_id < 3; ++node_id) - { - btSoftBody::Node* node = f1->m_n[node_id]; + bool skip = false; + for (int node_id = 0; node_id < 3; ++node_id) + { + btSoftBody::Node* node = f1->m_n[node_id]; #ifdef REPEL_NEIGHBOR - for (int i = 0; i < 3; ++i) - { - if (f2->m_n[i] == node) - { - skip = true; - break; - } - } - if (skip) - { - skip = false; - continue; - } + for (int i = 0; i < 3; ++i) + { + if (f2->m_n[i] == node) + { + skip = true; + break; + } + } + if (skip) + { + skip = false; + continue; + } #endif - btSoftBody::Face* face = f2; - btVector3 bary; + btSoftBody::Face* face = f2; + btVector3 bary; if (bernsteinCCD(face, node, dt, SAFE_EPSILON, bary)) - { - btSoftBody::DeformableFaceNodeContact c; - c.m_normal = face->m_normal; - if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) - c.m_normal = -face->m_normal; - c.m_node = node; - c.m_face = face; - c.m_bary = bary; - c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; - psb[0]->m_faceNodeContacts.push_back(c); - } - } - } - btSoftBody* psb[2]; - btScalar dt, mrg; - bool useFaceNormal; - }; + { + btSoftBody::DeformableFaceNodeContact c; + c.m_normal = face->m_normal; + if (!useFaceNormal && c.m_normal.dot(node->m_x - face->m_n[2]->m_x) < 0) + c.m_normal = -face->m_normal; + c.m_node = node; + c.m_face = face; + c.m_bary = bary; + c.m_friction = psb[0]->m_cfg.kDF * psb[1]->m_cfg.kDF; + psb[0]->m_faceNodeContacts.push_back(c); + } + } + } + btSoftBody* psb[2]; + btScalar dt, mrg; + bool useFaceNormal; + }; }; #endif //_BT_SOFT_BODY_INTERNALS_H diff --git a/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h b/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h index c4ac4141aa..dbb2624eee 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h +++ b/thirdparty/bullet/BulletSoftBody/btSoftBodySolvers.h @@ -36,7 +36,7 @@ public: CL_SIMD_SOLVER, DX_SOLVER, DX_SIMD_SOLVER, - DEFORMABLE_SOLVER + DEFORMABLE_SOLVER }; protected: diff --git a/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp b/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp index 282dbf75f0..329bd19d71 100644 --- a/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp +++ b/thirdparty/bullet/BulletSoftBody/btSoftMultiBodyDynamicsWorld.cpp @@ -100,6 +100,11 @@ void btSoftMultiBodyDynamicsWorld::internalSingleStepSimulation(btScalar timeSte ///update soft bodies m_softBodySolver->updateSoftBodies(); + for (int i = 0; i < m_softBodies.size(); i++) + { + btSoftBody* psb = (btSoftBody*)m_softBodies[i]; + psb->interpolateRenderMesh(); + } // End solver-wise simulation step // /////////////////////////////// } diff --git a/thirdparty/bullet/BulletSoftBody/btSparseSDF.h b/thirdparty/bullet/BulletSoftBody/btSparseSDF.h index eb290a1dbd..d611726bcd 100644 --- a/thirdparty/bullet/BulletSoftBody/btSparseSDF.h +++ b/thirdparty/bullet/BulletSoftBody/btSparseSDF.h @@ -22,36 +22,36 @@ subject to the following restrictions: // Fast Hash -#if !defined (get16bits) -#define get16bits(d) ((((unsigned int)(((const unsigned char *)(d))[1])) << 8)\ -+(unsigned int)(((const unsigned char *)(d))[0]) ) +#if !defined(get16bits) +#define get16bits(d) ((((unsigned int)(((const unsigned char*)(d))[1])) << 8) + (unsigned int)(((const unsigned char*)(d))[0])) #endif // // super hash function by Paul Hsieh // -inline unsigned int HsiehHash (const char * data, int len) { - unsigned int hash = len, tmp; - len>>=2; - - /* Main loop */ - for (;len > 0; len--) { - hash += get16bits (data); - tmp = (get16bits (data+2) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - data += 2*sizeof (unsigned short); - hash += hash >> 11; - } +inline unsigned int HsiehHash(const char* data, int len) +{ + unsigned int hash = len, tmp; + len >>= 2; + /* Main loop */ + for (; len > 0; len--) + { + hash += get16bits(data); + tmp = (get16bits(data + 2) << 11) ^ hash; + hash = (hash << 16) ^ tmp; + data += 2 * sizeof(unsigned short); + hash += hash >> 11; + } - /* Force "avalanching" of final 127 bits */ - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - hash += hash >> 6; + /* Force "avalanching" of final 127 bits */ + hash ^= hash << 3; + hash += hash >> 5; + hash ^= hash << 4; + hash += hash >> 17; + hash ^= hash << 25; + hash += hash >> 6; - return hash; + return hash; } template <const int CELLSIZE> @@ -81,7 +81,7 @@ struct btSparseSdf btAlignedObjectArray<Cell*> cells; btScalar voxelsz; - btScalar m_defaultVoxelsz; + btScalar m_defaultVoxelsz; int puid; int ncells; int m_clampCells; @@ -103,16 +103,16 @@ struct btSparseSdf //if this limit is reached, the SDF is reset (at the cost of some performance during the reset) m_clampCells = clampCells; cells.resize(hashsize, 0); - m_defaultVoxelsz = 0.25; + m_defaultVoxelsz = 0.25; Reset(); } // - - void setDefaultVoxelsz(btScalar sz) - { - m_defaultVoxelsz = sz; - } - + + void setDefaultVoxelsz(btScalar sz) + { + m_defaultVoxelsz = sz; + } + void Reset() { for (int i = 0, ni = cells.size(); i < ni; ++i) @@ -162,7 +162,7 @@ struct btSparseSdf nqueries = 1; nprobes = 1; ++puid; ///@todo: Reset puid's when int range limit is reached */ - /* else setup a priority list... */ + /* else setup a priority list... */ } // int RemoveReferences(btCollisionShape* pcs) @@ -221,7 +221,7 @@ struct btSparseSdf else { // printf("c->hash/c[0][1][2]=%d,%d,%d,%d\n", c->hash, c->c[0], c->c[1],c->c[2]); - //printf("h,ixb,iyb,izb=%d,%d,%d,%d\n", h,ix.b, iy.b, iz.b); + //printf("h,ixb,iyb,izb=%d,%d,%d,%d\n", h,ix.b, iy.b, iz.b); c = c->next; } @@ -363,7 +363,7 @@ struct btSparseSdf myset.p = (void*)shape; const char* ptr = (const char*)&myset; - unsigned int result = HsiehHash(ptr, sizeof(btS) ); + unsigned int result = HsiehHash(ptr, sizeof(btS)); return result; } diff --git a/thirdparty/bullet/BulletSoftBody/poly34.cpp b/thirdparty/bullet/BulletSoftBody/poly34.cpp index 819d0c79f7..ec7549c8e8 100644 --- a/thirdparty/bullet/BulletSoftBody/poly34.cpp +++ b/thirdparty/bullet/BulletSoftBody/poly34.cpp @@ -6,7 +6,7 @@ // #include <math.h> -#include "poly34.h" // solution of cubic and quartic equation +#include "poly34.h" // solution of cubic and quartic equation #define TwoPi 6.28318530717958648 const btScalar eps = SIMD_EPSILON; @@ -15,50 +15,53 @@ const btScalar eps = SIMD_EPSILON; //============================================================================= static SIMD_FORCE_INLINE btScalar _root3(btScalar x) { - btScalar s = 1.; - while (x < 1.) { - x *= 8.; - s *= 0.5; - } - while (x > 8.) { - x *= 0.125; - s *= 2.; - } - btScalar r = 1.5; - r -= 1. / 3. * (r - x / (r * r)); - r -= 1. / 3. * (r - x / (r * r)); - r -= 1. / 3. * (r - x / (r * r)); - r -= 1. / 3. * (r - x / (r * r)); - r -= 1. / 3. * (r - x / (r * r)); - r -= 1. / 3. * (r - x / (r * r)); - return r * s; + btScalar s = 1.; + while (x < 1.) + { + x *= 8.; + s *= 0.5; + } + while (x > 8.) + { + x *= 0.125; + s *= 2.; + } + btScalar r = 1.5; + r -= 1. / 3. * (r - x / (r * r)); + r -= 1. / 3. * (r - x / (r * r)); + r -= 1. / 3. * (r - x / (r * r)); + r -= 1. / 3. * (r - x / (r * r)); + r -= 1. / 3. * (r - x / (r * r)); + r -= 1. / 3. * (r - x / (r * r)); + return r * s; } btScalar SIMD_FORCE_INLINE root3(btScalar x) { - if (x > 0) - return _root3(x); - else if (x < 0) - return -_root3(-x); - else - return 0.; + if (x > 0) + return _root3(x); + else if (x < 0) + return -_root3(-x); + else + return 0.; } // x - array of size 2 // return 2: 2 real roots x[0], x[1] // return 0: pair of complex roots: x[0]i*x[1] int SolveP2(btScalar* x, btScalar a, btScalar b) -{ // solve equation x^2 + a*x + b = 0 - btScalar D = 0.25 * a * a - b; - if (D >= 0) { - D = sqrt(D); - x[0] = -0.5 * a + D; - x[1] = -0.5 * a - D; - return 2; - } - x[0] = -0.5 * a; - x[1] = sqrt(-D); - return 0; +{ // solve equation x^2 + a*x + b = 0 + btScalar D = 0.25 * a * a - b; + if (D >= 0) + { + D = sqrt(D); + x[0] = -0.5 * a + D; + x[1] = -0.5 * a - D; + return 2; + } + x[0] = -0.5 * a; + x[1] = sqrt(-D); + return 0; } //--------------------------------------------------------------------------- // x - array of size 3 @@ -66,217 +69,228 @@ int SolveP2(btScalar* x, btScalar a, btScalar b) // 2 real roots: x[0], x[1], return 2 // 1 real root : x[0], x[1] i*x[2], return 1 int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c) -{ // solve cubic equation x^3 + a*x^2 + b*x + c = 0 - btScalar a2 = a * a; - btScalar q = (a2 - 3 * b) / 9; - if (q < 0) - q = eps; - btScalar r = (a * (2 * a2 - 9 * b) + 27 * c) / 54; - // equation x^3 + q*x + r = 0 - btScalar r2 = r * r; - btScalar q3 = q * q * q; - btScalar A, B; - if (r2 <= (q3 + eps)) { //<<-- FIXED! - btScalar t = r / sqrt(q3); - if (t < -1) - t = -1; - if (t > 1) - t = 1; - t = acos(t); - a /= 3; - q = -2 * sqrt(q); - x[0] = q * cos(t / 3) - a; - x[1] = q * cos((t + TwoPi) / 3) - a; - x[2] = q * cos((t - TwoPi) / 3) - a; - return (3); - } - else { - //A =-pow(fabs(r)+sqrt(r2-q3),1./3); - A = -root3(fabs(r) + sqrt(r2 - q3)); - if (r < 0) - A = -A; - B = (A == 0 ? 0 : q / A); - - a /= 3; - x[0] = (A + B) - a; - x[1] = -0.5 * (A + B) - a; - x[2] = 0.5 * sqrt(3.) * (A - B); - if (fabs(x[2]) < eps) { - x[2] = x[1]; - return (2); - } - return (1); - } -} // SolveP3(btScalar *x,btScalar a,btScalar b,btScalar c) { +{ // solve cubic equation x^3 + a*x^2 + b*x + c = 0 + btScalar a2 = a * a; + btScalar q = (a2 - 3 * b) / 9; + if (q < 0) + q = eps; + btScalar r = (a * (2 * a2 - 9 * b) + 27 * c) / 54; + // equation x^3 + q*x + r = 0 + btScalar r2 = r * r; + btScalar q3 = q * q * q; + btScalar A, B; + if (r2 <= (q3 + eps)) + { //<<-- FIXED! + btScalar t = r / sqrt(q3); + if (t < -1) + t = -1; + if (t > 1) + t = 1; + t = acos(t); + a /= 3; + q = -2 * sqrt(q); + x[0] = q * cos(t / 3) - a; + x[1] = q * cos((t + TwoPi) / 3) - a; + x[2] = q * cos((t - TwoPi) / 3) - a; + return (3); + } + else + { + //A =-pow(fabs(r)+sqrt(r2-q3),1./3); + A = -root3(fabs(r) + sqrt(r2 - q3)); + if (r < 0) + A = -A; + B = (A == 0 ? 0 : q / A); + + a /= 3; + x[0] = (A + B) - a; + x[1] = -0.5 * (A + B) - a; + x[2] = 0.5 * sqrt(3.) * (A - B); + if (fabs(x[2]) < eps) + { + x[2] = x[1]; + return (2); + } + return (1); + } +} // SolveP3(btScalar *x,btScalar a,btScalar b,btScalar c) { //--------------------------------------------------------------------------- // a>=0! -void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b) // returns: a+i*s = sqrt(x+i*y) +void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b) // returns: a+i*s = sqrt(x+i*y) { - btScalar r = sqrt(x * x + y * y); - if (y == 0) { - r = sqrt(r); - if (x >= 0) { - a = r; - b = 0; - } - else { - a = 0; - b = r; - } - } - else { // y != 0 - a = sqrt(0.5 * (x + r)); - b = 0.5 * y / a; - } + btScalar r = sqrt(x * x + y * y); + if (y == 0) + { + r = sqrt(r); + if (x >= 0) + { + a = r; + b = 0; + } + else + { + a = 0; + b = r; + } + } + else + { // y != 0 + a = sqrt(0.5 * (x + r)); + b = 0.5 * y / a; + } } //--------------------------------------------------------------------------- -int SolveP4Bi(btScalar* x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 + d = 0 +int SolveP4Bi(btScalar* x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 + d = 0 { - btScalar D = b * b - 4 * d; - if (D >= 0) { - btScalar sD = sqrt(D); - btScalar x1 = (-b + sD) / 2; - btScalar x2 = (-b - sD) / 2; // x2 <= x1 - if (x2 >= 0) // 0 <= x2 <= x1, 4 real roots - { - btScalar sx1 = sqrt(x1); - btScalar sx2 = sqrt(x2); - x[0] = -sx1; - x[1] = sx1; - x[2] = -sx2; - x[3] = sx2; - return 4; - } - if (x1 < 0) // x2 <= x1 < 0, two pair of imaginary roots - { - btScalar sx1 = sqrt(-x1); - btScalar sx2 = sqrt(-x2); - x[0] = 0; - x[1] = sx1; - x[2] = 0; - x[3] = sx2; - return 0; - } - // now x2 < 0 <= x1 , two real roots and one pair of imginary root - btScalar sx1 = sqrt(x1); - btScalar sx2 = sqrt(-x2); - x[0] = -sx1; - x[1] = sx1; - x[2] = 0; - x[3] = sx2; - return 2; - } - else { // if( D < 0 ), two pair of compex roots - btScalar sD2 = 0.5 * sqrt(-D); - CSqrt(-0.5 * b, sD2, x[0], x[1]); - CSqrt(-0.5 * b, -sD2, x[2], x[3]); - return 0; - } // if( D>=0 ) -} // SolveP4Bi(btScalar *x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 d + btScalar D = b * b - 4 * d; + if (D >= 0) + { + btScalar sD = sqrt(D); + btScalar x1 = (-b + sD) / 2; + btScalar x2 = (-b - sD) / 2; // x2 <= x1 + if (x2 >= 0) // 0 <= x2 <= x1, 4 real roots + { + btScalar sx1 = sqrt(x1); + btScalar sx2 = sqrt(x2); + x[0] = -sx1; + x[1] = sx1; + x[2] = -sx2; + x[3] = sx2; + return 4; + } + if (x1 < 0) // x2 <= x1 < 0, two pair of imaginary roots + { + btScalar sx1 = sqrt(-x1); + btScalar sx2 = sqrt(-x2); + x[0] = 0; + x[1] = sx1; + x[2] = 0; + x[3] = sx2; + return 0; + } + // now x2 < 0 <= x1 , two real roots and one pair of imginary root + btScalar sx1 = sqrt(x1); + btScalar sx2 = sqrt(-x2); + x[0] = -sx1; + x[1] = sx1; + x[2] = 0; + x[3] = sx2; + return 2; + } + else + { // if( D < 0 ), two pair of compex roots + btScalar sD2 = 0.5 * sqrt(-D); + CSqrt(-0.5 * b, sD2, x[0], x[1]); + CSqrt(-0.5 * b, -sD2, x[2], x[3]); + return 0; + } // if( D>=0 ) +} // SolveP4Bi(btScalar *x, btScalar b, btScalar d) // solve equation x^4 + b*x^2 d //--------------------------------------------------------------------------- #define SWAP(a, b) \ -{ \ -t = b; \ -b = a; \ -a = t; \ -} -static void dblSort3(btScalar& a, btScalar& b, btScalar& c) // make: a <= b <= c + { \ + t = b; \ + b = a; \ + a = t; \ + } +static void dblSort3(btScalar& a, btScalar& b, btScalar& c) // make: a <= b <= c { - btScalar t; - if (a > b) - SWAP(a, b); // now a<=b - if (c < b) { - SWAP(b, c); // now a<=b, b<=c - if (a > b) - SWAP(a, b); // now a<=b - } + btScalar t; + if (a > b) + SWAP(a, b); // now a<=b + if (c < b) + { + SWAP(b, c); // now a<=b, b<=c + if (a > b) + SWAP(a, b); // now a<=b + } } //--------------------------------------------------------------------------- -int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d +int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d { - //if( c==0 ) return SolveP4Bi(x,b,d); // After that, c!=0 - if (fabs(c) < 1e-14 * (fabs(b) + fabs(d))) - return SolveP4Bi(x, b, d); // After that, c!=0 - - int res3 = SolveP3(x, 2 * b, b * b - 4 * d, -c * c); // solve resolvent - // by Viet theorem: x1*x2*x3=-c*c not equals to 0, so x1!=0, x2!=0, x3!=0 - if (res3 > 1) // 3 real roots, - { - dblSort3(x[0], x[1], x[2]); // sort roots to x[0] <= x[1] <= x[2] - // Note: x[0]*x[1]*x[2]= c*c > 0 - if (x[0] > 0) // all roots are positive - { - btScalar sz1 = sqrt(x[0]); - btScalar sz2 = sqrt(x[1]); - btScalar sz3 = sqrt(x[2]); - // Note: sz1*sz2*sz3= -c (and not equal to 0) - if (c > 0) { - x[0] = (-sz1 - sz2 - sz3) / 2; - x[1] = (-sz1 + sz2 + sz3) / 2; - x[2] = (+sz1 - sz2 + sz3) / 2; - x[3] = (+sz1 + sz2 - sz3) / 2; - return 4; - } - // now: c<0 - x[0] = (-sz1 - sz2 + sz3) / 2; - x[1] = (-sz1 + sz2 - sz3) / 2; - x[2] = (+sz1 - sz2 - sz3) / 2; - x[3] = (+sz1 + sz2 + sz3) / 2; - return 4; - } // if( x[0] > 0) // all roots are positive - // now x[0] <= x[1] < 0, x[2] > 0 - // two pair of comlex roots - btScalar sz1 = sqrt(-x[0]); - btScalar sz2 = sqrt(-x[1]); - btScalar sz3 = sqrt(x[2]); - - if (c > 0) // sign = -1 - { - x[0] = -sz3 / 2; - x[1] = (sz1 - sz2) / 2; // x[0]i*x[1] - x[2] = sz3 / 2; - x[3] = (-sz1 - sz2) / 2; // x[2]i*x[3] - return 0; - } - // now: c<0 , sign = +1 - x[0] = sz3 / 2; - x[1] = (-sz1 + sz2) / 2; - x[2] = -sz3 / 2; - x[3] = (sz1 + sz2) / 2; - return 0; - } // if( res3>1 ) // 3 real roots, - // now resoventa have 1 real and pair of compex roots - // x[0] - real root, and x[0]>0, - // x[1]i*x[2] - complex roots, - // x[0] must be >=0. But one times x[0]=~ 1e-17, so: - if (x[0] < 0) - x[0] = 0; - btScalar sz1 = sqrt(x[0]); - btScalar szr, szi; - CSqrt(x[1], x[2], szr, szi); // (szr+i*szi)^2 = x[1]+i*x[2] - if (c > 0) // sign = -1 - { - x[0] = -sz1 / 2 - szr; // 1st real root - x[1] = -sz1 / 2 + szr; // 2nd real root - x[2] = sz1 / 2; - x[3] = szi; - return 2; - } - // now: c<0 , sign = +1 - x[0] = sz1 / 2 - szr; // 1st real root - x[1] = sz1 / 2 + szr; // 2nd real root - x[2] = -sz1 / 2; - x[3] = szi; - return 2; -} // SolveP4De(btScalar *x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d + //if( c==0 ) return SolveP4Bi(x,b,d); // After that, c!=0 + if (fabs(c) < 1e-14 * (fabs(b) + fabs(d))) + return SolveP4Bi(x, b, d); // After that, c!=0 + + int res3 = SolveP3(x, 2 * b, b * b - 4 * d, -c * c); // solve resolvent + // by Viet theorem: x1*x2*x3=-c*c not equals to 0, so x1!=0, x2!=0, x3!=0 + if (res3 > 1) // 3 real roots, + { + dblSort3(x[0], x[1], x[2]); // sort roots to x[0] <= x[1] <= x[2] + // Note: x[0]*x[1]*x[2]= c*c > 0 + if (x[0] > 0) // all roots are positive + { + btScalar sz1 = sqrt(x[0]); + btScalar sz2 = sqrt(x[1]); + btScalar sz3 = sqrt(x[2]); + // Note: sz1*sz2*sz3= -c (and not equal to 0) + if (c > 0) + { + x[0] = (-sz1 - sz2 - sz3) / 2; + x[1] = (-sz1 + sz2 + sz3) / 2; + x[2] = (+sz1 - sz2 + sz3) / 2; + x[3] = (+sz1 + sz2 - sz3) / 2; + return 4; + } + // now: c<0 + x[0] = (-sz1 - sz2 + sz3) / 2; + x[1] = (-sz1 + sz2 - sz3) / 2; + x[2] = (+sz1 - sz2 - sz3) / 2; + x[3] = (+sz1 + sz2 + sz3) / 2; + return 4; + } // if( x[0] > 0) // all roots are positive + // now x[0] <= x[1] < 0, x[2] > 0 + // two pair of comlex roots + btScalar sz1 = sqrt(-x[0]); + btScalar sz2 = sqrt(-x[1]); + btScalar sz3 = sqrt(x[2]); + + if (c > 0) // sign = -1 + { + x[0] = -sz3 / 2; + x[1] = (sz1 - sz2) / 2; // x[0]i*x[1] + x[2] = sz3 / 2; + x[3] = (-sz1 - sz2) / 2; // x[2]i*x[3] + return 0; + } + // now: c<0 , sign = +1 + x[0] = sz3 / 2; + x[1] = (-sz1 + sz2) / 2; + x[2] = -sz3 / 2; + x[3] = (sz1 + sz2) / 2; + return 0; + } // if( res3>1 ) // 3 real roots, + // now resoventa have 1 real and pair of compex roots + // x[0] - real root, and x[0]>0, + // x[1]i*x[2] - complex roots, + // x[0] must be >=0. But one times x[0]=~ 1e-17, so: + if (x[0] < 0) + x[0] = 0; + btScalar sz1 = sqrt(x[0]); + btScalar szr, szi; + CSqrt(x[1], x[2], szr, szi); // (szr+i*szi)^2 = x[1]+i*x[2] + if (c > 0) // sign = -1 + { + x[0] = -sz1 / 2 - szr; // 1st real root + x[1] = -sz1 / 2 + szr; // 2nd real root + x[2] = sz1 / 2; + x[3] = szi; + return 2; + } + // now: c<0 , sign = +1 + x[0] = sz1 / 2 - szr; // 1st real root + x[1] = sz1 / 2 + szr; // 2nd real root + x[2] = -sz1 / 2; + x[3] = szi; + return 2; +} // SolveP4De(btScalar *x, btScalar b, btScalar c, btScalar d) // solve equation x^4 + b*x^2 + c*x + d //----------------------------------------------------------------------------- -btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d +btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d { - btScalar fxs = ((4 * x + 3 * a) * x + 2 * b) * x + c; // f'(x) - if (fxs == 0) - return x; //return 1e99; <<-- FIXED! - btScalar fx = (((x + a) * x + b) * x + c) * x + d; // f(x) - return x - fx / fxs; + btScalar fxs = ((4 * x + 3 * a) * x + 2 * b) * x + c; // f'(x) + if (fxs == 0) + return x; //return 1e99; <<-- FIXED! + btScalar fx = (((x + a) * x + b) * x + c) * x + d; // f(x) + return x - fx / fxs; } //----------------------------------------------------------------------------- // x - array of size 4 @@ -284,136 +298,150 @@ btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d) // o // return 2: 2 real roots x[0], x[1] and complex x[2]i*x[3], // return 0: two pair of complex roots: x[0]i*x[1], x[2]i*x[3], int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d) -{ // solve equation x^4 + a*x^3 + b*x^2 + c*x + d by Dekart-Euler method - // move to a=0: - btScalar d1 = d + 0.25 * a * (0.25 * b * a - 3. / 64 * a * a * a - c); - btScalar c1 = c + 0.5 * a * (0.25 * a * a - b); - btScalar b1 = b - 0.375 * a * a; - int res = SolveP4De(x, b1, c1, d1); - if (res == 4) { - x[0] -= a / 4; - x[1] -= a / 4; - x[2] -= a / 4; - x[3] -= a / 4; - } - else if (res == 2) { - x[0] -= a / 4; - x[1] -= a / 4; - x[2] -= a / 4; - } - else { - x[0] -= a / 4; - x[2] -= a / 4; - } - // one Newton step for each real root: - if (res > 0) { - x[0] = N4Step(x[0], a, b, c, d); - x[1] = N4Step(x[1], a, b, c, d); - } - if (res > 2) { - x[2] = N4Step(x[2], a, b, c, d); - x[3] = N4Step(x[3], a, b, c, d); - } - return res; +{ // solve equation x^4 + a*x^3 + b*x^2 + c*x + d by Dekart-Euler method + // move to a=0: + btScalar d1 = d + 0.25 * a * (0.25 * b * a - 3. / 64 * a * a * a - c); + btScalar c1 = c + 0.5 * a * (0.25 * a * a - b); + btScalar b1 = b - 0.375 * a * a; + int res = SolveP4De(x, b1, c1, d1); + if (res == 4) + { + x[0] -= a / 4; + x[1] -= a / 4; + x[2] -= a / 4; + x[3] -= a / 4; + } + else if (res == 2) + { + x[0] -= a / 4; + x[1] -= a / 4; + x[2] -= a / 4; + } + else + { + x[0] -= a / 4; + x[2] -= a / 4; + } + // one Newton step for each real root: + if (res > 0) + { + x[0] = N4Step(x[0], a, b, c, d); + x[1] = N4Step(x[1], a, b, c, d); + } + if (res > 2) + { + x[2] = N4Step(x[2], a, b, c, d); + x[3] = N4Step(x[3], a, b, c, d); + } + return res; } //----------------------------------------------------------------------------- #define F5(t) (((((t + a) * t + b) * t + c) * t + d) * t + e) //----------------------------------------------------------------------------- -btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 +btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 { - int cnt; - if (fabs(e) < eps) - return 0; - - btScalar brd = fabs(a); // brd - border of real roots - if (fabs(b) > brd) - brd = fabs(b); - if (fabs(c) > brd) - brd = fabs(c); - if (fabs(d) > brd) - brd = fabs(d); - if (fabs(e) > brd) - brd = fabs(e); - brd++; // brd - border of real roots - - btScalar x0, f0; // less than root - btScalar x1, f1; // greater than root - btScalar x2, f2, f2s; // next values, f(x2), f'(x2) - btScalar dx = 0; - - if (e < 0) { - x0 = 0; - x1 = brd; - f0 = e; - f1 = F5(x1); - x2 = 0.01 * brd; - } // positive root - else { - x0 = -brd; - x1 = 0; - f0 = F5(x0); - f1 = e; - x2 = -0.01 * brd; - } // negative root - - if (fabs(f0) < eps) - return x0; - if (fabs(f1) < eps) - return x1; - - // now x0<x1, f(x0)<0, f(x1)>0 - // Firstly 10 bisections - for (cnt = 0; cnt < 10; cnt++) { - x2 = (x0 + x1) / 2; // next point - //x2 = x0 - f0*(x1 - x0) / (f1 - f0); // next point - f2 = F5(x2); // f(x2) - if (fabs(f2) < eps) - return x2; - if (f2 > 0) { - x1 = x2; - f1 = f2; - } - else { - x0 = x2; - f0 = f2; - } - } - - // At each step: - // x0<x1, f(x0)<0, f(x1)>0. - // x2 - next value - // we hope that x0 < x2 < x1, but not necessarily - do { - if (cnt++ > 50) - break; - if (x2 <= x0 || x2 >= x1) - x2 = (x0 + x1) / 2; // now x0 < x2 < x1 - f2 = F5(x2); // f(x2) - if (fabs(f2) < eps) - return x2; - if (f2 > 0) { - x1 = x2; - f1 = f2; - } - else { - x0 = x2; - f0 = f2; - } - f2s = (((5 * x2 + 4 * a) * x2 + 3 * b) * x2 + 2 * c) * x2 + d; // f'(x2) - if (fabs(f2s) < eps) { - x2 = 1e99; - continue; - } - dx = f2 / f2s; - x2 -= dx; - } while (fabs(dx) > eps); - return x2; -} // SolveP5_1(btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 + int cnt; + if (fabs(e) < eps) + return 0; + + btScalar brd = fabs(a); // brd - border of real roots + if (fabs(b) > brd) + brd = fabs(b); + if (fabs(c) > brd) + brd = fabs(c); + if (fabs(d) > brd) + brd = fabs(d); + if (fabs(e) > brd) + brd = fabs(e); + brd++; // brd - border of real roots + + btScalar x0, f0; // less than root + btScalar x1, f1; // greater than root + btScalar x2, f2, f2s; // next values, f(x2), f'(x2) + btScalar dx = 0; + + if (e < 0) + { + x0 = 0; + x1 = brd; + f0 = e; + f1 = F5(x1); + x2 = 0.01 * brd; + } // positive root + else + { + x0 = -brd; + x1 = 0; + f0 = F5(x0); + f1 = e; + x2 = -0.01 * brd; + } // negative root + + if (fabs(f0) < eps) + return x0; + if (fabs(f1) < eps) + return x1; + + // now x0<x1, f(x0)<0, f(x1)>0 + // Firstly 10 bisections + for (cnt = 0; cnt < 10; cnt++) + { + x2 = (x0 + x1) / 2; // next point + //x2 = x0 - f0*(x1 - x0) / (f1 - f0); // next point + f2 = F5(x2); // f(x2) + if (fabs(f2) < eps) + return x2; + if (f2 > 0) + { + x1 = x2; + f1 = f2; + } + else + { + x0 = x2; + f0 = f2; + } + } + + // At each step: + // x0<x1, f(x0)<0, f(x1)>0. + // x2 - next value + // we hope that x0 < x2 < x1, but not necessarily + do + { + if (cnt++ > 50) + break; + if (x2 <= x0 || x2 >= x1) + x2 = (x0 + x1) / 2; // now x0 < x2 < x1 + f2 = F5(x2); // f(x2) + if (fabs(f2) < eps) + return x2; + if (f2 > 0) + { + x1 = x2; + f1 = f2; + } + else + { + x0 = x2; + f0 = f2; + } + f2s = (((5 * x2 + 4 * a) * x2 + 3 * b) * x2 + 2 * c) * x2 + d; // f'(x2) + if (fabs(f2s) < eps) + { + x2 = 1e99; + continue; + } + dx = f2 / f2s; + x2 -= dx; + } while (fabs(dx) > eps); + return x2; +} // SolveP5_1(btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 //----------------------------------------------------------------------------- -int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 +int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 { - btScalar r = x[0] = SolveP5_1(a, b, c, d, e); - btScalar a1 = a + r, b1 = b + r * a1, c1 = c + r * b1, d1 = d + r * c1; - return 1 + SolveP4(x + 1, a1, b1, c1, d1); -} // SolveP5(btScalar *x,btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 + btScalar r = x[0] = SolveP5_1(a, b, c, d, e); + btScalar a1 = a + r, b1 = b + r * a1, c1 = c + r * b1, d1 = d + r * c1; + return 1 + SolveP4(x + 1, a1, b1, c1, d1); +} // SolveP5(btScalar *x,btScalar a,btScalar b,btScalar c,btScalar d,btScalar e) // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 //----------------------------------------------------------------------------- diff --git a/thirdparty/bullet/BulletSoftBody/poly34.h b/thirdparty/bullet/BulletSoftBody/poly34.h index 32ad5d7da5..35a52c5fec 100644 --- a/thirdparty/bullet/BulletSoftBody/poly34.h +++ b/thirdparty/bullet/BulletSoftBody/poly34.h @@ -8,31 +8,31 @@ // x - array of size 2 // return 2: 2 real roots x[0], x[1] // return 0: pair of complex roots: x[0]i*x[1] -int SolveP2(btScalar* x, btScalar a, btScalar b); // solve equation x^2 + a*x + b = 0 +int SolveP2(btScalar* x, btScalar a, btScalar b); // solve equation x^2 + a*x + b = 0 // x - array of size 3 // return 3: 3 real roots x[0], x[1], x[2] // return 1: 1 real root x[0] and pair of complex roots: x[1]i*x[2] -int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c); // solve cubic equation x^3 + a*x^2 + b*x + c = 0 +int SolveP3(btScalar* x, btScalar a, btScalar b, btScalar c); // solve cubic equation x^3 + a*x^2 + b*x + c = 0 // x - array of size 4 // return 4: 4 real roots x[0], x[1], x[2], x[3], possible multiple roots // return 2: 2 real roots x[0], x[1] and complex x[2]i*x[3], // return 0: two pair of complex roots: x[0]i*x[1], x[2]i*x[3], -int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d); // solve equation x^4 + a*x^3 + b*x^2 + c*x + d = 0 by Dekart-Euler method +int SolveP4(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d); // solve equation x^4 + a*x^3 + b*x^2 + c*x + d = 0 by Dekart-Euler method // x - array of size 5 // return 5: 5 real roots x[0], x[1], x[2], x[3], x[4], possible multiple roots // return 3: 3 real roots x[0], x[1], x[2] and complex x[3]i*x[4], // return 1: 1 real root x[0] and two pair of complex roots: x[1]i*x[2], x[3]i*x[4], -int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 +int SolveP5(btScalar* x, btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // solve equation x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 //----------------------------------------------------------------------------- // And some additional functions for internal use. // Your may remove this definitions from here -int SolveP4Bi(btScalar* x, btScalar b, btScalar d); // solve equation x^4 + b*x^2 + d = 0 -int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d); // solve equation x^4 + b*x^2 + c*x + d = 0 -void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b); // returns as a+i*s, sqrt(x+i*y) -btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d); // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d -btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 +int SolveP4Bi(btScalar* x, btScalar b, btScalar d); // solve equation x^4 + b*x^2 + d = 0 +int SolveP4De(btScalar* x, btScalar b, btScalar c, btScalar d); // solve equation x^4 + b*x^2 + c*x + d = 0 +void CSqrt(btScalar x, btScalar y, btScalar& a, btScalar& b); // returns as a+i*s, sqrt(x+i*y) +btScalar N4Step(btScalar x, btScalar a, btScalar b, btScalar c, btScalar d); // one Newton step for x^4 + a*x^3 + b*x^2 + c*x + d +btScalar SolveP5_1(btScalar a, btScalar b, btScalar c, btScalar d, btScalar e); // return real root of x^5 + a*x^4 + b*x^3 + c*x^2 + d*x + e = 0 #endif diff --git a/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp b/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp index 39b302b600..be8f8aa6d0 100644 --- a/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp +++ b/thirdparty/bullet/LinearMath/btAlignedAllocator.cpp @@ -138,7 +138,7 @@ struct btDebugPtrMagic }; }; -void *btAlignedAllocInternal(size_t size, int alignment, int line, char *filename) +void *btAlignedAllocInternal(size_t size, int alignment, int line, const char *filename) { if (size == 0) { @@ -195,7 +195,7 @@ void *btAlignedAllocInternal(size_t size, int alignment, int line, char *filenam return (ret); } -void btAlignedFreeInternal(void *ptr, int line, char *filename) +void btAlignedFreeInternal(void *ptr, int line, const char *filename) { void *real; diff --git a/thirdparty/bullet/LinearMath/btAlignedAllocator.h b/thirdparty/bullet/LinearMath/btAlignedAllocator.h index ce4d3585f1..971f62bfb0 100644 --- a/thirdparty/bullet/LinearMath/btAlignedAllocator.h +++ b/thirdparty/bullet/LinearMath/btAlignedAllocator.h @@ -35,9 +35,9 @@ int btDumpMemoryLeaks(); #define btAlignedFree(ptr) \ btAlignedFreeInternal(ptr, __LINE__, __FILE__) -void* btAlignedAllocInternal(size_t size, int alignment, int line, char* filename); +void* btAlignedAllocInternal(size_t size, int alignment, int line, const char* filename); -void btAlignedFreeInternal(void* ptr, int line, char* filename); +void btAlignedFreeInternal(void* ptr, int line, const char* filename); #else void* btAlignedAllocInternal(size_t size, int alignment); diff --git a/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp b/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp index 8bbfdc5f25..12125fd2de 100644 --- a/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp +++ b/thirdparty/bullet/LinearMath/btConvexHullComputer.cpp @@ -105,7 +105,7 @@ public: Point64 cross(const Point32& b) const { - return Point64(y * b.z - z * b.y, z * b.x - x * b.z, x * b.y - y * b.x); + return Point64(((int64_t)y) * b.z - ((int64_t)z) * b.y, ((int64_t)z) * b.x - ((int64_t)x) * b.z, ((int64_t)x) * b.y - ((int64_t)y) * b.x); } Point64 cross(const Point64& b) const @@ -115,7 +115,7 @@ public: int64_t dot(const Point32& b) const { - return x * b.x + y * b.y + z * b.z; + return ((int64_t)x) * b.x + ((int64_t)y) * b.y + ((int64_t)z) * b.z; } int64_t dot(const Point64& b) const @@ -2673,6 +2673,7 @@ btScalar btConvexHullComputer::compute(const void* coords, bool doubleCoords, in } vertices.resize(0); + original_vertex_index.resize(0); edges.resize(0); faces.resize(0); @@ -2683,6 +2684,7 @@ btScalar btConvexHullComputer::compute(const void* coords, bool doubleCoords, in { btConvexHullInternal::Vertex* v = oldVertices[copied]; vertices.push_back(hull.getCoordinates(v)); + original_vertex_index.push_back(v->point.index); btConvexHullInternal::Edge* firstEdge = v->edges; if (firstEdge) { diff --git a/thirdparty/bullet/LinearMath/btConvexHullComputer.h b/thirdparty/bullet/LinearMath/btConvexHullComputer.h index cba684f2dc..18b26eea9a 100644 --- a/thirdparty/bullet/LinearMath/btConvexHullComputer.h +++ b/thirdparty/bullet/LinearMath/btConvexHullComputer.h @@ -66,6 +66,9 @@ public: // Vertices of the output hull btAlignedObjectArray<btVector3> vertices; + // The original vertex index in the input coords array + btAlignedObjectArray<int> original_vertex_index; + // Edges of the output hull btAlignedObjectArray<Edge> edges; diff --git a/thirdparty/bullet/LinearMath/btReducedVector.h b/thirdparty/bullet/LinearMath/btReducedVector.h index 83b5e581e5..313a4271f0 100644 --- a/thirdparty/bullet/LinearMath/btReducedVector.h +++ b/thirdparty/bullet/LinearMath/btReducedVector.h @@ -267,7 +267,7 @@ public: std::sort(tuples.begin(), tuples.end()); btAlignedObjectArray<int> new_indices; btAlignedObjectArray<btVector3> new_vecs; - for (int i = 0; i < tuples.size(); ++i) + for (size_t i = 0; i < tuples.size(); ++i) { new_indices.push_back(tuples[i].b); new_vecs.push_back(m_vecs[tuples[i].a]); diff --git a/thirdparty/bullet/LinearMath/btScalar.h b/thirdparty/bullet/LinearMath/btScalar.h index 86d94e8974..36b90cc944 100644 --- a/thirdparty/bullet/LinearMath/btScalar.h +++ b/thirdparty/bullet/LinearMath/btScalar.h @@ -25,7 +25,7 @@ subject to the following restrictions: #include <float.h> /* SVN $Revision$ on $Date$ from http://bullet.googlecode.com*/ -#define BT_BULLET_VERSION 289 +#define BT_BULLET_VERSION 307 inline int btGetVersion() { diff --git a/thirdparty/bullet/LinearMath/btSerializer.h b/thirdparty/bullet/LinearMath/btSerializer.h index 2ee712047f..9abcf031d0 100644 --- a/thirdparty/bullet/LinearMath/btSerializer.h +++ b/thirdparty/bullet/LinearMath/btSerializer.h @@ -479,9 +479,9 @@ public: buffer[8] = 'V'; } - buffer[9] = '2'; - buffer[10] = '8'; - buffer[11] = '9'; + buffer[9] = '3'; + buffer[10] = '0'; + buffer[11] = '7'; } virtual void startSerialization() diff --git a/thirdparty/mbedtls/include/mbedtls/bignum.h b/thirdparty/mbedtls/include/mbedtls/bignum.h index 4bb9fa3d43..4d04b336e7 100644 --- a/thirdparty/mbedtls/include/mbedtls/bignum.h +++ b/thirdparty/mbedtls/include/mbedtls/bignum.h @@ -88,12 +88,12 @@ * Maximum window size used for modular exponentiation. Default: 6 * Minimum value: 1. Maximum value: 6. * - * Result is an array of ( 2 << MBEDTLS_MPI_WINDOW_SIZE ) MPIs used + * Result is an array of ( 2 ** MBEDTLS_MPI_WINDOW_SIZE ) MPIs used * for the sliding window calculation. (So 64 by default) * * Reduction in size, reduces speed. */ -#define MBEDTLS_MPI_WINDOW_SIZE 6 /**< Maximum windows size used. */ +#define MBEDTLS_MPI_WINDOW_SIZE 6 /**< Maximum window size used. */ #endif /* !MBEDTLS_MPI_WINDOW_SIZE */ #if !defined(MBEDTLS_MPI_MAX_SIZE) diff --git a/thirdparty/mbedtls/include/mbedtls/ccm.h b/thirdparty/mbedtls/include/mbedtls/ccm.h index 3dcdc91894..d50c6ec993 100644 --- a/thirdparty/mbedtls/include/mbedtls/ccm.h +++ b/thirdparty/mbedtls/include/mbedtls/ccm.h @@ -175,7 +175,7 @@ void mbedtls_ccm_free( mbedtls_ccm_context *ctx ); * than zero, \p output must be a writable buffer of at least * that length. * \param tag The buffer holding the authentication field. This must be a - * readable buffer of at least \p tag_len Bytes. + * writable buffer of at least \p tag_len Bytes. * \param tag_len The length of the authentication field to generate in Bytes: * 4, 6, 8, 10, 12, 14 or 16. * @@ -220,7 +220,7 @@ int mbedtls_ccm_encrypt_and_tag( mbedtls_ccm_context *ctx, size_t length, * than zero, \p output must be a writable buffer of at least * that length. * \param tag The buffer holding the authentication field. This must be a - * readable buffer of at least \p tag_len Bytes. + * writable buffer of at least \p tag_len Bytes. * \param tag_len The length of the authentication field to generate in Bytes: * 0, 4, 6, 8, 10, 12, 14 or 16. * diff --git a/thirdparty/mbedtls/include/mbedtls/config.h b/thirdparty/mbedtls/include/mbedtls/config.h index 217998a5eb..e17bc7e306 100644 --- a/thirdparty/mbedtls/include/mbedtls/config.h +++ b/thirdparty/mbedtls/include/mbedtls/config.h @@ -3128,7 +3128,7 @@ */ /* MPI / BIGNUM options */ -//#define MBEDTLS_MPI_WINDOW_SIZE 6 /**< Maximum windows size used. */ +//#define MBEDTLS_MPI_WINDOW_SIZE 6 /**< Maximum window size used. */ //#define MBEDTLS_MPI_MAX_SIZE 1024 /**< Maximum number of bytes for usable MPIs. */ /* CTR_DRBG options */ diff --git a/thirdparty/mbedtls/include/mbedtls/ctr_drbg.h b/thirdparty/mbedtls/include/mbedtls/ctr_drbg.h index 7e5f2e5769..278fbbbb7a 100644 --- a/thirdparty/mbedtls/include/mbedtls/ctr_drbg.h +++ b/thirdparty/mbedtls/include/mbedtls/ctr_drbg.h @@ -224,6 +224,11 @@ mbedtls_ctr_drbg_context; * and prepares it for mbedtls_ctr_drbg_seed() * or mbedtls_ctr_drbg_free(). * + * \note The reseed interval is + * #MBEDTLS_CTR_DRBG_RESEED_INTERVAL by default. + * You can override it by calling + * mbedtls_ctr_drbg_set_reseed_interval(). + * * \param ctx The CTR_DRBG context to initialize. */ void mbedtls_ctr_drbg_init( mbedtls_ctr_drbg_context *ctx ); @@ -305,7 +310,8 @@ int mbedtls_ctr_drbg_seed( mbedtls_ctr_drbg_context *ctx, size_t len ); /** - * \brief This function clears CTR_CRBG context data. + * \brief This function resets CTR_DRBG context to the state immediately + * after initial call of mbedtls_ctr_drbg_init(). * * \param ctx The CTR_DRBG context to clear. */ diff --git a/thirdparty/mbedtls/include/mbedtls/gcm.h b/thirdparty/mbedtls/include/mbedtls/gcm.h index 4e4434ed4d..1201fbd4f1 100644 --- a/thirdparty/mbedtls/include/mbedtls/gcm.h +++ b/thirdparty/mbedtls/include/mbedtls/gcm.h @@ -182,7 +182,7 @@ int mbedtls_gcm_setkey( mbedtls_gcm_context *ctx, * than zero, this must be a writable buffer of at least that * size in Bytes. * \param tag_len The length of the tag to generate. - * \param tag The buffer for holding the tag. This must be a readable + * \param tag The buffer for holding the tag. This must be a writable * buffer of at least \p tag_len Bytes. * * \return \c 0 if the encryption or decryption was performed @@ -310,7 +310,7 @@ int mbedtls_gcm_update( mbedtls_gcm_context *ctx, * tag. The tag can have a maximum length of 16 Bytes. * * \param ctx The GCM context. This must be initialized. - * \param tag The buffer for holding the tag. This must be a readable + * \param tag The buffer for holding the tag. This must be a writable * buffer of at least \p tag_len Bytes. * \param tag_len The length of the tag to generate. This must be at least * four. diff --git a/thirdparty/mbedtls/include/mbedtls/hmac_drbg.h b/thirdparty/mbedtls/include/mbedtls/hmac_drbg.h index 6883678204..970c033c15 100644 --- a/thirdparty/mbedtls/include/mbedtls/hmac_drbg.h +++ b/thirdparty/mbedtls/include/mbedtls/hmac_drbg.h @@ -138,6 +138,10 @@ typedef struct mbedtls_hmac_drbg_context * This function makes the context ready for mbedtls_hmac_drbg_seed(), * mbedtls_hmac_drbg_seed_buf() or mbedtls_hmac_drbg_free(). * + * \note The reseed interval is #MBEDTLS_HMAC_DRBG_RESEED_INTERVAL + * by default. Override this value by calling + * mbedtls_hmac_drbg_set_reseed_interval(). + * * \param ctx HMAC_DRBG context to be initialized. */ void mbedtls_hmac_drbg_init( mbedtls_hmac_drbg_context *ctx ); @@ -361,7 +365,8 @@ int mbedtls_hmac_drbg_random_with_add( void *p_rng, int mbedtls_hmac_drbg_random( void *p_rng, unsigned char *output, size_t out_len ); /** - * \brief Free an HMAC_DRBG context + * \brief This function resets HMAC_DRBG context to the state immediately + * after initial call of mbedtls_hmac_drbg_init(). * * \param ctx The HMAC_DRBG context to free. */ diff --git a/thirdparty/mbedtls/include/mbedtls/sha512.h b/thirdparty/mbedtls/include/mbedtls/sha512.h index 9ff78ecf41..5e5a15e000 100644 --- a/thirdparty/mbedtls/include/mbedtls/sha512.h +++ b/thirdparty/mbedtls/include/mbedtls/sha512.h @@ -152,8 +152,7 @@ int mbedtls_sha512_update_ret( mbedtls_sha512_context *ctx, /** * \brief This function finishes the SHA-512 operation, and writes - * the result to the output buffer. This function is for - * internal use only. + * the result to the output buffer. * * \param ctx The SHA-512 context. This must be initialized * and have a hash operation started. @@ -169,6 +168,7 @@ int mbedtls_sha512_finish_ret( mbedtls_sha512_context *ctx, /** * \brief This function processes a single data block within * the ongoing SHA-512 computation. + * This function is for internal use only. * * \param ctx The SHA-512 context. This must be initialized. * \param data The buffer holding one block of data. This diff --git a/thirdparty/mbedtls/include/mbedtls/ssl.h b/thirdparty/mbedtls/include/mbedtls/ssl.h index d3ee3c4e6f..fe33ac8d57 100644 --- a/thirdparty/mbedtls/include/mbedtls/ssl.h +++ b/thirdparty/mbedtls/include/mbedtls/ssl.h @@ -1409,7 +1409,7 @@ void mbedtls_ssl_conf_dbg( mbedtls_ssl_config *conf, * \note For DTLS, you need to provide either a non-NULL * f_recv_timeout callback, or a f_recv that doesn't block. * - * \note See the documentations of \c mbedtls_ssl_sent_t, + * \note See the documentations of \c mbedtls_ssl_send_t, * \c mbedtls_ssl_recv_t and \c mbedtls_ssl_recv_timeout_t for * the conventions those callbacks must follow. * diff --git a/thirdparty/mbedtls/include/mbedtls/version.h b/thirdparty/mbedtls/include/mbedtls/version.h index d09b45002d..5f0a8f114c 100644 --- a/thirdparty/mbedtls/include/mbedtls/version.h +++ b/thirdparty/mbedtls/include/mbedtls/version.h @@ -65,16 +65,16 @@ */ #define MBEDTLS_VERSION_MAJOR 2 #define MBEDTLS_VERSION_MINOR 16 -#define MBEDTLS_VERSION_PATCH 8 +#define MBEDTLS_VERSION_PATCH 9 /** * The single version number has the following structure: * MMNNPP00 * Major version | Minor version | Patch version */ -#define MBEDTLS_VERSION_NUMBER 0x02100800 -#define MBEDTLS_VERSION_STRING "2.16.8" -#define MBEDTLS_VERSION_STRING_FULL "mbed TLS 2.16.8" +#define MBEDTLS_VERSION_NUMBER 0x02100900 +#define MBEDTLS_VERSION_STRING "2.16.9" +#define MBEDTLS_VERSION_STRING_FULL "mbed TLS 2.16.9" #if defined(MBEDTLS_VERSION_C) diff --git a/thirdparty/mbedtls/library/aes.c b/thirdparty/mbedtls/library/aes.c index 9b337505fd..da0e5b6bdc 100644 --- a/thirdparty/mbedtls/library/aes.c +++ b/thirdparty/mbedtls/library/aes.c @@ -760,6 +760,7 @@ exit: return( ret ); } +#endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */ #if defined(MBEDTLS_CIPHER_MODE_XTS) static int mbedtls_aes_xts_decode_keys( const unsigned char *key, @@ -838,8 +839,6 @@ int mbedtls_aes_xts_setkey_dec( mbedtls_aes_xts_context *ctx, } #endif /* MBEDTLS_CIPHER_MODE_XTS */ -#endif /* !MBEDTLS_AES_SETKEY_DEC_ALT */ - #define AES_FROUND(X0,X1,X2,X3,Y0,Y1,Y2,Y3) \ do \ { \ @@ -897,63 +896,56 @@ int mbedtls_internal_aes_encrypt( mbedtls_aes_context *ctx, unsigned char output[16] ) { int i; - uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; - - RK = ctx->rk; + uint32_t *RK = ctx->rk; + struct + { + uint32_t X[4]; + uint32_t Y[4]; + } t; - GET_UINT32_LE( X0, input, 0 ); X0 ^= *RK++; - GET_UINT32_LE( X1, input, 4 ); X1 ^= *RK++; - GET_UINT32_LE( X2, input, 8 ); X2 ^= *RK++; - GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++; + GET_UINT32_LE( t.X[0], input, 0 ); t.X[0] ^= *RK++; + GET_UINT32_LE( t.X[1], input, 4 ); t.X[1] ^= *RK++; + GET_UINT32_LE( t.X[2], input, 8 ); t.X[2] ^= *RK++; + GET_UINT32_LE( t.X[3], input, 12 ); t.X[3] ^= *RK++; for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- ) { - AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); - AES_FROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); + AES_FROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); + AES_FROUND( t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3] ); } - AES_FROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); - - X0 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y0 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); - - X1 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y1 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); - - X2 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y2 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); - - X3 = *RK++ ^ \ - ( (uint32_t) FSb[ ( Y3 ) & 0xFF ] ) ^ - ( (uint32_t) FSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) FSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) FSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); - - PUT_UINT32_LE( X0, output, 0 ); - PUT_UINT32_LE( X1, output, 4 ); - PUT_UINT32_LE( X2, output, 8 ); - PUT_UINT32_LE( X3, output, 12 ); - - mbedtls_platform_zeroize( &X0, sizeof( X0 ) ); - mbedtls_platform_zeroize( &X1, sizeof( X1 ) ); - mbedtls_platform_zeroize( &X2, sizeof( X2 ) ); - mbedtls_platform_zeroize( &X3, sizeof( X3 ) ); - - mbedtls_platform_zeroize( &Y0, sizeof( Y0 ) ); - mbedtls_platform_zeroize( &Y1, sizeof( Y1 ) ); - mbedtls_platform_zeroize( &Y2, sizeof( Y2 ) ); - mbedtls_platform_zeroize( &Y3, sizeof( Y3 ) ); - - mbedtls_platform_zeroize( &RK, sizeof( RK ) ); + AES_FROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); + + t.X[0] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[0] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[1] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[2] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[3] >> 24 ) & 0xFF ] << 24 ); + + t.X[1] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[1] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[2] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[3] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[0] >> 24 ) & 0xFF ] << 24 ); + + t.X[2] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[2] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[3] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[0] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[1] >> 24 ) & 0xFF ] << 24 ); + + t.X[3] = *RK++ ^ \ + ( (uint32_t) FSb[ ( t.Y[3] ) & 0xFF ] ) ^ + ( (uint32_t) FSb[ ( t.Y[0] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) FSb[ ( t.Y[1] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) FSb[ ( t.Y[2] >> 24 ) & 0xFF ] << 24 ); + + PUT_UINT32_LE( t.X[0], output, 0 ); + PUT_UINT32_LE( t.X[1], output, 4 ); + PUT_UINT32_LE( t.X[2], output, 8 ); + PUT_UINT32_LE( t.X[3], output, 12 ); + + mbedtls_platform_zeroize( &t, sizeof( t ) ); return( 0 ); } @@ -977,63 +969,56 @@ int mbedtls_internal_aes_decrypt( mbedtls_aes_context *ctx, unsigned char output[16] ) { int i; - uint32_t *RK, X0, X1, X2, X3, Y0, Y1, Y2, Y3; - - RK = ctx->rk; + uint32_t *RK = ctx->rk; + struct + { + uint32_t X[4]; + uint32_t Y[4]; + } t; - GET_UINT32_LE( X0, input, 0 ); X0 ^= *RK++; - GET_UINT32_LE( X1, input, 4 ); X1 ^= *RK++; - GET_UINT32_LE( X2, input, 8 ); X2 ^= *RK++; - GET_UINT32_LE( X3, input, 12 ); X3 ^= *RK++; + GET_UINT32_LE( t.X[0], input, 0 ); t.X[0] ^= *RK++; + GET_UINT32_LE( t.X[1], input, 4 ); t.X[1] ^= *RK++; + GET_UINT32_LE( t.X[2], input, 8 ); t.X[2] ^= *RK++; + GET_UINT32_LE( t.X[3], input, 12 ); t.X[3] ^= *RK++; for( i = ( ctx->nr >> 1 ) - 1; i > 0; i-- ) { - AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); - AES_RROUND( X0, X1, X2, X3, Y0, Y1, Y2, Y3 ); + AES_RROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); + AES_RROUND( t.X[0], t.X[1], t.X[2], t.X[3], t.Y[0], t.Y[1], t.Y[2], t.Y[3] ); } - AES_RROUND( Y0, Y1, Y2, Y3, X0, X1, X2, X3 ); - - X0 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y0 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y3 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y2 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y1 >> 24 ) & 0xFF ] << 24 ); - - X1 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y1 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y0 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y3 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y2 >> 24 ) & 0xFF ] << 24 ); - - X2 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y2 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y1 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y0 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y3 >> 24 ) & 0xFF ] << 24 ); - - X3 = *RK++ ^ \ - ( (uint32_t) RSb[ ( Y3 ) & 0xFF ] ) ^ - ( (uint32_t) RSb[ ( Y2 >> 8 ) & 0xFF ] << 8 ) ^ - ( (uint32_t) RSb[ ( Y1 >> 16 ) & 0xFF ] << 16 ) ^ - ( (uint32_t) RSb[ ( Y0 >> 24 ) & 0xFF ] << 24 ); - - PUT_UINT32_LE( X0, output, 0 ); - PUT_UINT32_LE( X1, output, 4 ); - PUT_UINT32_LE( X2, output, 8 ); - PUT_UINT32_LE( X3, output, 12 ); - - mbedtls_platform_zeroize( &X0, sizeof( X0 ) ); - mbedtls_platform_zeroize( &X1, sizeof( X1 ) ); - mbedtls_platform_zeroize( &X2, sizeof( X2 ) ); - mbedtls_platform_zeroize( &X3, sizeof( X3 ) ); - - mbedtls_platform_zeroize( &Y0, sizeof( Y0 ) ); - mbedtls_platform_zeroize( &Y1, sizeof( Y1 ) ); - mbedtls_platform_zeroize( &Y2, sizeof( Y2 ) ); - mbedtls_platform_zeroize( &Y3, sizeof( Y3 ) ); - - mbedtls_platform_zeroize( &RK, sizeof( RK ) ); + AES_RROUND( t.Y[0], t.Y[1], t.Y[2], t.Y[3], t.X[0], t.X[1], t.X[2], t.X[3] ); + + t.X[0] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[0] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[3] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[2] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[1] >> 24 ) & 0xFF ] << 24 ); + + t.X[1] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[1] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[0] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[3] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[2] >> 24 ) & 0xFF ] << 24 ); + + t.X[2] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[2] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[1] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[0] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[3] >> 24 ) & 0xFF ] << 24 ); + + t.X[3] = *RK++ ^ \ + ( (uint32_t) RSb[ ( t.Y[3] ) & 0xFF ] ) ^ + ( (uint32_t) RSb[ ( t.Y[2] >> 8 ) & 0xFF ] << 8 ) ^ + ( (uint32_t) RSb[ ( t.Y[1] >> 16 ) & 0xFF ] << 16 ) ^ + ( (uint32_t) RSb[ ( t.Y[0] >> 24 ) & 0xFF ] << 24 ); + + PUT_UINT32_LE( t.X[0], output, 0 ); + PUT_UINT32_LE( t.X[1], output, 4 ); + PUT_UINT32_LE( t.X[2], output, 8 ); + PUT_UINT32_LE( t.X[3], output, 12 ); + + mbedtls_platform_zeroize( &t, sizeof( t ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/bignum.c b/thirdparty/mbedtls/library/bignum.c index dfe976d648..2feb727d89 100644 --- a/thirdparty/mbedtls/library/bignum.c +++ b/thirdparty/mbedtls/library/bignum.c @@ -1364,7 +1364,10 @@ int mbedtls_mpi_sub_abs( mbedtls_mpi *X, const mbedtls_mpi *A, const mbedtls_mpi /* If we ran out of space for the carry, it means that the result * is negative. */ if( n == X->n ) - return( MBEDTLS_ERR_MPI_NEGATIVE_VALUE ); + { + ret = MBEDTLS_ERR_MPI_NEGATIVE_VALUE; + goto cleanup; + } --X->p[n]; } @@ -2044,7 +2047,7 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, size_t i, j, nblimbs; size_t bufsize, nbits; mbedtls_mpi_uint ei, mm, state; - mbedtls_mpi RR, T, W[ 2 << MBEDTLS_MPI_WINDOW_SIZE ], Apos; + mbedtls_mpi RR, T, W[ 1 << MBEDTLS_MPI_WINDOW_SIZE ], Apos; int neg; MPI_VALIDATE_RET( X != NULL ); @@ -2058,6 +2061,10 @@ int mbedtls_mpi_exp_mod( mbedtls_mpi *X, const mbedtls_mpi *A, if( mbedtls_mpi_cmp_int( E, 0 ) < 0 ) return( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + if( mbedtls_mpi_bitlen( E ) > MBEDTLS_MPI_MAX_BITS || + mbedtls_mpi_bitlen( N ) > MBEDTLS_MPI_MAX_BITS ) + return ( MBEDTLS_ERR_MPI_BAD_INPUT_DATA ); + /* * Init temps and window size */ @@ -2334,7 +2341,7 @@ int mbedtls_mpi_fill_random( mbedtls_mpi *X, size_t size, MBEDTLS_MPI_CHK( mbedtls_mpi_lset( X, 0 ) ); Xp = (unsigned char*) X->p; - f_rng( p_rng, Xp + overhead, size ); + MBEDTLS_MPI_CHK( f_rng( p_rng, Xp + overhead, size ) ); mpi_bigendian_to_host( X->p, limbs ); diff --git a/thirdparty/mbedtls/library/cipher_wrap.c b/thirdparty/mbedtls/library/cipher_wrap.c index 1dcac21be1..5973ca6ba2 100644 --- a/thirdparty/mbedtls/library/cipher_wrap.c +++ b/thirdparty/mbedtls/library/cipher_wrap.c @@ -779,7 +779,7 @@ static const mbedtls_cipher_info_t camellia_128_ecb_info = { MBEDTLS_MODE_ECB, 128, "CAMELLIA-128-ECB", - 16, + 0, 0, 16, &camellia_info @@ -790,7 +790,7 @@ static const mbedtls_cipher_info_t camellia_192_ecb_info = { MBEDTLS_MODE_ECB, 192, "CAMELLIA-192-ECB", - 16, + 0, 0, 16, &camellia_info @@ -801,7 +801,7 @@ static const mbedtls_cipher_info_t camellia_256_ecb_info = { MBEDTLS_MODE_ECB, 256, "CAMELLIA-256-ECB", - 16, + 0, 0, 16, &camellia_info @@ -1155,7 +1155,7 @@ static const mbedtls_cipher_info_t aria_128_ecb_info = { MBEDTLS_MODE_ECB, 128, "ARIA-128-ECB", - 16, + 0, 0, 16, &aria_info @@ -1166,7 +1166,7 @@ static const mbedtls_cipher_info_t aria_192_ecb_info = { MBEDTLS_MODE_ECB, 192, "ARIA-192-ECB", - 16, + 0, 0, 16, &aria_info @@ -1177,7 +1177,7 @@ static const mbedtls_cipher_info_t aria_256_ecb_info = { MBEDTLS_MODE_ECB, 256, "ARIA-256-ECB", - 16, + 0, 0, 16, &aria_info @@ -1579,7 +1579,7 @@ static const mbedtls_cipher_info_t des_ecb_info = { MBEDTLS_MODE_ECB, MBEDTLS_KEY_LENGTH_DES, "DES-ECB", - 8, + 0, 0, 8, &des_info @@ -1630,7 +1630,7 @@ static const mbedtls_cipher_info_t des_ede_ecb_info = { MBEDTLS_MODE_ECB, MBEDTLS_KEY_LENGTH_DES_EDE, "DES-EDE-ECB", - 8, + 0, 0, 8, &des_ede_info @@ -1681,7 +1681,7 @@ static const mbedtls_cipher_info_t des_ede3_ecb_info = { MBEDTLS_MODE_ECB, MBEDTLS_KEY_LENGTH_DES_EDE3, "DES-EDE3-ECB", - 8, + 0, 0, 8, &des_ede3_info @@ -1796,7 +1796,7 @@ static const mbedtls_cipher_info_t blowfish_ecb_info = { MBEDTLS_MODE_ECB, 128, "BLOWFISH-ECB", - 8, + 0, MBEDTLS_CIPHER_VARIABLE_KEY_LEN, 8, &blowfish_info diff --git a/thirdparty/mbedtls/library/cmac.c b/thirdparty/mbedtls/library/cmac.c index 1a1200b52b..409f67958e 100644 --- a/thirdparty/mbedtls/library/cmac.c +++ b/thirdparty/mbedtls/library/cmac.c @@ -450,7 +450,7 @@ exit: */ int mbedtls_aes_cmac_prf_128( const unsigned char *key, size_t key_length, const unsigned char *input, size_t in_len, - unsigned char *output ) + unsigned char output[16] ) { int ret; const mbedtls_cipher_info_t *cipher_info; diff --git a/thirdparty/mbedtls/library/ctr_drbg.c b/thirdparty/mbedtls/library/ctr_drbg.c index b98df29a9b..e92008bbe8 100644 --- a/thirdparty/mbedtls/library/ctr_drbg.c +++ b/thirdparty/mbedtls/library/ctr_drbg.c @@ -82,11 +82,17 @@ void mbedtls_ctr_drbg_init( mbedtls_ctr_drbg_context *ctx ) { memset( ctx, 0, sizeof( mbedtls_ctr_drbg_context ) ); + ctx->reseed_interval = MBEDTLS_CTR_DRBG_RESEED_INTERVAL; + #if defined(MBEDTLS_THREADING_C) mbedtls_mutex_init( &ctx->mutex ); #endif } +/* + * This function resets CTR_DRBG context to the state immediately + * after initial call of mbedtls_ctr_drbg_init(). + */ void mbedtls_ctr_drbg_free( mbedtls_ctr_drbg_context *ctx ) { if( ctx == NULL ) @@ -97,6 +103,10 @@ void mbedtls_ctr_drbg_free( mbedtls_ctr_drbg_context *ctx ) #endif mbedtls_aes_free( &ctx->aes_ctx ); mbedtls_platform_zeroize( ctx, sizeof( mbedtls_ctr_drbg_context ) ); + ctx->reseed_interval = MBEDTLS_CTR_DRBG_RESEED_INTERVAL; +#if defined(MBEDTLS_THREADING_C) + mbedtls_mutex_init( &ctx->mutex ); +#endif } void mbedtls_ctr_drbg_set_prediction_resistance( mbedtls_ctr_drbg_context *ctx, int resistance ) @@ -419,7 +429,6 @@ int mbedtls_ctr_drbg_seed( mbedtls_ctr_drbg_context *ctx, if( ctx->entropy_len == 0 ) ctx->entropy_len = MBEDTLS_CTR_DRBG_ENTROPY_LEN; - ctx->reseed_interval = MBEDTLS_CTR_DRBG_RESEED_INTERVAL; /* * Initialize with an empty key diff --git a/thirdparty/mbedtls/library/ecp_curves.c b/thirdparty/mbedtls/library/ecp_curves.c index cc4c5b71c0..b04596b561 100644 --- a/thirdparty/mbedtls/library/ecp_curves.c +++ b/thirdparty/mbedtls/library/ecp_curves.c @@ -1044,7 +1044,7 @@ static inline void sub32( uint32_t *dst, uint32_t src, signed char *carry ) STORE32; i++; \ cur = c > 0 ? c : 0; STORE32; \ cur = 0; while( ++i < MAX32 ) { STORE32; } \ - if( c < 0 ) fix_negative( N, c, &C, bits ); + if( c < 0 ) MBEDTLS_MPI_CHK( fix_negative( N, c, &C, bits ) ); /* * If the result is negative, we get it in the form diff --git a/thirdparty/mbedtls/library/entropy_poll.c b/thirdparty/mbedtls/library/entropy_poll.c index 26b7e4e2b9..2095a7dd34 100644 --- a/thirdparty/mbedtls/library/entropy_poll.c +++ b/thirdparty/mbedtls/library/entropy_poll.c @@ -44,7 +44,7 @@ * ********** */ -#if defined(__linux__) +#if defined(__linux__) && !defined(_GNU_SOURCE) /* Ensure that syscall() is available even when compiling with -std=c99 */ #define _GNU_SOURCE #endif diff --git a/thirdparty/mbedtls/library/error.c b/thirdparty/mbedtls/library/error.c index eb52052b51..b83b8d1f1b 100644 --- a/thirdparty/mbedtls/library/error.c +++ b/thirdparty/mbedtls/library/error.c @@ -51,20 +51,19 @@ #endif #if defined(MBEDTLS_ERROR_C) || defined(MBEDTLS_ERROR_STRERROR_DUMMY) + #include "mbedtls/error.h" -#include <string.h> -#endif + +#if defined(MBEDTLS_ERROR_C) #if defined(MBEDTLS_PLATFORM_C) #include "mbedtls/platform.h" #else #define mbedtls_snprintf snprintf -#define mbedtls_time_t time_t #endif -#if defined(MBEDTLS_ERROR_C) - #include <stdio.h> +#include <string.h> #if defined(MBEDTLS_AES_C) #include "mbedtls/aes.h" @@ -929,8 +928,6 @@ void mbedtls_strerror( int ret, char *buf, size_t buflen ) #else /* MBEDTLS_ERROR_C */ -#if defined(MBEDTLS_ERROR_STRERROR_DUMMY) - /* * Provide an non-function in case MBEDTLS_ERROR_C is not defined */ @@ -942,6 +939,6 @@ void mbedtls_strerror( int ret, char *buf, size_t buflen ) buf[0] = '\0'; } -#endif /* MBEDTLS_ERROR_STRERROR_DUMMY */ - #endif /* MBEDTLS_ERROR_C */ + +#endif /* MBEDTLS_ERROR_C || MBEDTLS_ERROR_STRERROR_DUMMY */ diff --git a/thirdparty/mbedtls/library/hmac_drbg.c b/thirdparty/mbedtls/library/hmac_drbg.c index 9fbfc30660..10cbd462ba 100644 --- a/thirdparty/mbedtls/library/hmac_drbg.c +++ b/thirdparty/mbedtls/library/hmac_drbg.c @@ -83,6 +83,8 @@ void mbedtls_hmac_drbg_init( mbedtls_hmac_drbg_context *ctx ) { memset( ctx, 0, sizeof( mbedtls_hmac_drbg_context ) ); + ctx->reseed_interval = MBEDTLS_HMAC_DRBG_RESEED_INTERVAL; + #if defined(MBEDTLS_THREADING_C) mbedtls_mutex_init( &ctx->mutex ); #endif @@ -296,8 +298,6 @@ int mbedtls_hmac_drbg_seed( mbedtls_hmac_drbg_context *ctx, ctx->f_entropy = f_entropy; ctx->p_entropy = p_entropy; - ctx->reseed_interval = MBEDTLS_HMAC_DRBG_RESEED_INTERVAL; - if( ctx->entropy_len == 0 ) { /* @@ -442,7 +442,8 @@ int mbedtls_hmac_drbg_random( void *p_rng, unsigned char *output, size_t out_len } /* - * Free an HMAC_DRBG context + * This function resets HMAC_DRBG context to the state immediately + * after initial call of mbedtls_hmac_drbg_init(). */ void mbedtls_hmac_drbg_free( mbedtls_hmac_drbg_context *ctx ) { @@ -454,6 +455,10 @@ void mbedtls_hmac_drbg_free( mbedtls_hmac_drbg_context *ctx ) #endif mbedtls_md_free( &ctx->md_ctx ); mbedtls_platform_zeroize( ctx, sizeof( mbedtls_hmac_drbg_context ) ); + ctx->reseed_interval = MBEDTLS_HMAC_DRBG_RESEED_INTERVAL; +#if defined(MBEDTLS_THREADING_C) + mbedtls_mutex_init( &ctx->mutex ); +#endif } #if defined(MBEDTLS_FS_IO) diff --git a/thirdparty/mbedtls/library/md2.c b/thirdparty/mbedtls/library/md2.c index cbdaaabdc7..fdcb630a1f 100644 --- a/thirdparty/mbedtls/library/md2.c +++ b/thirdparty/mbedtls/library/md2.c @@ -177,6 +177,9 @@ int mbedtls_internal_md2_process( mbedtls_md2_context *ctx ) t = ctx->cksum[i]; } + /* Zeroise variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &t, sizeof( t ) ); + return( 0 ); } diff --git a/thirdparty/mbedtls/library/md4.c b/thirdparty/mbedtls/library/md4.c index cb16dce54a..95e893e654 100644 --- a/thirdparty/mbedtls/library/md4.c +++ b/thirdparty/mbedtls/library/md4.c @@ -143,31 +143,34 @@ void mbedtls_md4_starts( mbedtls_md4_context *ctx ) int mbedtls_internal_md4_process( mbedtls_md4_context *ctx, const unsigned char data[64] ) { - uint32_t X[16], A, B, C, D; - - GET_UINT32_LE( X[ 0], data, 0 ); - GET_UINT32_LE( X[ 1], data, 4 ); - GET_UINT32_LE( X[ 2], data, 8 ); - GET_UINT32_LE( X[ 3], data, 12 ); - GET_UINT32_LE( X[ 4], data, 16 ); - GET_UINT32_LE( X[ 5], data, 20 ); - GET_UINT32_LE( X[ 6], data, 24 ); - GET_UINT32_LE( X[ 7], data, 28 ); - GET_UINT32_LE( X[ 8], data, 32 ); - GET_UINT32_LE( X[ 9], data, 36 ); - GET_UINT32_LE( X[10], data, 40 ); - GET_UINT32_LE( X[11], data, 44 ); - GET_UINT32_LE( X[12], data, 48 ); - GET_UINT32_LE( X[13], data, 52 ); - GET_UINT32_LE( X[14], data, 56 ); - GET_UINT32_LE( X[15], data, 60 ); + struct + { + uint32_t X[16], A, B, C, D; + } local; + + GET_UINT32_LE( local.X[ 0], data, 0 ); + GET_UINT32_LE( local.X[ 1], data, 4 ); + GET_UINT32_LE( local.X[ 2], data, 8 ); + GET_UINT32_LE( local.X[ 3], data, 12 ); + GET_UINT32_LE( local.X[ 4], data, 16 ); + GET_UINT32_LE( local.X[ 5], data, 20 ); + GET_UINT32_LE( local.X[ 6], data, 24 ); + GET_UINT32_LE( local.X[ 7], data, 28 ); + GET_UINT32_LE( local.X[ 8], data, 32 ); + GET_UINT32_LE( local.X[ 9], data, 36 ); + GET_UINT32_LE( local.X[10], data, 40 ); + GET_UINT32_LE( local.X[11], data, 44 ); + GET_UINT32_LE( local.X[12], data, 48 ); + GET_UINT32_LE( local.X[13], data, 52 ); + GET_UINT32_LE( local.X[14], data, 56 ); + GET_UINT32_LE( local.X[15], data, 60 ); #define S(x,n) (((x) << (n)) | (((x) & 0xFFFFFFFF) >> (32 - (n)))) - A = ctx->state[0]; - B = ctx->state[1]; - C = ctx->state[2]; - D = ctx->state[3]; + local.A = ctx->state[0]; + local.B = ctx->state[1]; + local.C = ctx->state[2]; + local.D = ctx->state[3]; #define F(x, y, z) (((x) & (y)) | ((~(x)) & (z))) #define P(a,b,c,d,x,s) \ @@ -178,22 +181,22 @@ int mbedtls_internal_md4_process( mbedtls_md4_context *ctx, } while( 0 ) - P( A, B, C, D, X[ 0], 3 ); - P( D, A, B, C, X[ 1], 7 ); - P( C, D, A, B, X[ 2], 11 ); - P( B, C, D, A, X[ 3], 19 ); - P( A, B, C, D, X[ 4], 3 ); - P( D, A, B, C, X[ 5], 7 ); - P( C, D, A, B, X[ 6], 11 ); - P( B, C, D, A, X[ 7], 19 ); - P( A, B, C, D, X[ 8], 3 ); - P( D, A, B, C, X[ 9], 7 ); - P( C, D, A, B, X[10], 11 ); - P( B, C, D, A, X[11], 19 ); - P( A, B, C, D, X[12], 3 ); - P( D, A, B, C, X[13], 7 ); - P( C, D, A, B, X[14], 11 ); - P( B, C, D, A, X[15], 19 ); + P( local.A, local.B, local.C, local.D, local.X[ 0], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 1], 7 ); + P( local.C, local.D, local.A, local.B, local.X[ 2], 11 ); + P( local.B, local.C, local.D, local.A, local.X[ 3], 19 ); + P( local.A, local.B, local.C, local.D, local.X[ 4], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 5], 7 ); + P( local.C, local.D, local.A, local.B, local.X[ 6], 11 ); + P( local.B, local.C, local.D, local.A, local.X[ 7], 19 ); + P( local.A, local.B, local.C, local.D, local.X[ 8], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 9], 7 ); + P( local.C, local.D, local.A, local.B, local.X[10], 11 ); + P( local.B, local.C, local.D, local.A, local.X[11], 19 ); + P( local.A, local.B, local.C, local.D, local.X[12], 3 ); + P( local.D, local.A, local.B, local.C, local.X[13], 7 ); + P( local.C, local.D, local.A, local.B, local.X[14], 11 ); + P( local.B, local.C, local.D, local.A, local.X[15], 19 ); #undef P #undef F @@ -206,22 +209,22 @@ int mbedtls_internal_md4_process( mbedtls_md4_context *ctx, (a) = S((a),(s)); \ } while( 0 ) - P( A, B, C, D, X[ 0], 3 ); - P( D, A, B, C, X[ 4], 5 ); - P( C, D, A, B, X[ 8], 9 ); - P( B, C, D, A, X[12], 13 ); - P( A, B, C, D, X[ 1], 3 ); - P( D, A, B, C, X[ 5], 5 ); - P( C, D, A, B, X[ 9], 9 ); - P( B, C, D, A, X[13], 13 ); - P( A, B, C, D, X[ 2], 3 ); - P( D, A, B, C, X[ 6], 5 ); - P( C, D, A, B, X[10], 9 ); - P( B, C, D, A, X[14], 13 ); - P( A, B, C, D, X[ 3], 3 ); - P( D, A, B, C, X[ 7], 5 ); - P( C, D, A, B, X[11], 9 ); - P( B, C, D, A, X[15], 13 ); + P( local.A, local.B, local.C, local.D, local.X[ 0], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 4], 5 ); + P( local.C, local.D, local.A, local.B, local.X[ 8], 9 ); + P( local.B, local.C, local.D, local.A, local.X[12], 13 ); + P( local.A, local.B, local.C, local.D, local.X[ 1], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 5], 5 ); + P( local.C, local.D, local.A, local.B, local.X[ 9], 9 ); + P( local.B, local.C, local.D, local.A, local.X[13], 13 ); + P( local.A, local.B, local.C, local.D, local.X[ 2], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 6], 5 ); + P( local.C, local.D, local.A, local.B, local.X[10], 9 ); + P( local.B, local.C, local.D, local.A, local.X[14], 13 ); + P( local.A, local.B, local.C, local.D, local.X[ 3], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 7], 5 ); + P( local.C, local.D, local.A, local.B, local.X[11], 9 ); + P( local.B, local.C, local.D, local.A, local.X[15], 13 ); #undef P #undef F @@ -234,30 +237,33 @@ int mbedtls_internal_md4_process( mbedtls_md4_context *ctx, (a) = S((a),(s)); \ } while( 0 ) - P( A, B, C, D, X[ 0], 3 ); - P( D, A, B, C, X[ 8], 9 ); - P( C, D, A, B, X[ 4], 11 ); - P( B, C, D, A, X[12], 15 ); - P( A, B, C, D, X[ 2], 3 ); - P( D, A, B, C, X[10], 9 ); - P( C, D, A, B, X[ 6], 11 ); - P( B, C, D, A, X[14], 15 ); - P( A, B, C, D, X[ 1], 3 ); - P( D, A, B, C, X[ 9], 9 ); - P( C, D, A, B, X[ 5], 11 ); - P( B, C, D, A, X[13], 15 ); - P( A, B, C, D, X[ 3], 3 ); - P( D, A, B, C, X[11], 9 ); - P( C, D, A, B, X[ 7], 11 ); - P( B, C, D, A, X[15], 15 ); + P( local.A, local.B, local.C, local.D, local.X[ 0], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 8], 9 ); + P( local.C, local.D, local.A, local.B, local.X[ 4], 11 ); + P( local.B, local.C, local.D, local.A, local.X[12], 15 ); + P( local.A, local.B, local.C, local.D, local.X[ 2], 3 ); + P( local.D, local.A, local.B, local.C, local.X[10], 9 ); + P( local.C, local.D, local.A, local.B, local.X[ 6], 11 ); + P( local.B, local.C, local.D, local.A, local.X[14], 15 ); + P( local.A, local.B, local.C, local.D, local.X[ 1], 3 ); + P( local.D, local.A, local.B, local.C, local.X[ 9], 9 ); + P( local.C, local.D, local.A, local.B, local.X[ 5], 11 ); + P( local.B, local.C, local.D, local.A, local.X[13], 15 ); + P( local.A, local.B, local.C, local.D, local.X[ 3], 3 ); + P( local.D, local.A, local.B, local.C, local.X[11], 9 ); + P( local.C, local.D, local.A, local.B, local.X[ 7], 11 ); + P( local.B, local.C, local.D, local.A, local.X[15], 15 ); #undef F #undef P - ctx->state[0] += A; - ctx->state[1] += B; - ctx->state[2] += C; - ctx->state[3] += D; + ctx->state[0] += local.A; + ctx->state[1] += local.B; + ctx->state[2] += local.C; + ctx->state[3] += local.D; + + /* Zeroise variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &local, sizeof( local ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/md5.c b/thirdparty/mbedtls/library/md5.c index fe25925214..d2b634fbb1 100644 --- a/thirdparty/mbedtls/library/md5.c +++ b/thirdparty/mbedtls/library/md5.c @@ -142,128 +142,134 @@ void mbedtls_md5_starts( mbedtls_md5_context *ctx ) int mbedtls_internal_md5_process( mbedtls_md5_context *ctx, const unsigned char data[64] ) { - uint32_t X[16], A, B, C, D; - - GET_UINT32_LE( X[ 0], data, 0 ); - GET_UINT32_LE( X[ 1], data, 4 ); - GET_UINT32_LE( X[ 2], data, 8 ); - GET_UINT32_LE( X[ 3], data, 12 ); - GET_UINT32_LE( X[ 4], data, 16 ); - GET_UINT32_LE( X[ 5], data, 20 ); - GET_UINT32_LE( X[ 6], data, 24 ); - GET_UINT32_LE( X[ 7], data, 28 ); - GET_UINT32_LE( X[ 8], data, 32 ); - GET_UINT32_LE( X[ 9], data, 36 ); - GET_UINT32_LE( X[10], data, 40 ); - GET_UINT32_LE( X[11], data, 44 ); - GET_UINT32_LE( X[12], data, 48 ); - GET_UINT32_LE( X[13], data, 52 ); - GET_UINT32_LE( X[14], data, 56 ); - GET_UINT32_LE( X[15], data, 60 ); + struct + { + uint32_t X[16], A, B, C, D; + } local; + + GET_UINT32_LE( local.X[ 0], data, 0 ); + GET_UINT32_LE( local.X[ 1], data, 4 ); + GET_UINT32_LE( local.X[ 2], data, 8 ); + GET_UINT32_LE( local.X[ 3], data, 12 ); + GET_UINT32_LE( local.X[ 4], data, 16 ); + GET_UINT32_LE( local.X[ 5], data, 20 ); + GET_UINT32_LE( local.X[ 6], data, 24 ); + GET_UINT32_LE( local.X[ 7], data, 28 ); + GET_UINT32_LE( local.X[ 8], data, 32 ); + GET_UINT32_LE( local.X[ 9], data, 36 ); + GET_UINT32_LE( local.X[10], data, 40 ); + GET_UINT32_LE( local.X[11], data, 44 ); + GET_UINT32_LE( local.X[12], data, 48 ); + GET_UINT32_LE( local.X[13], data, 52 ); + GET_UINT32_LE( local.X[14], data, 56 ); + GET_UINT32_LE( local.X[15], data, 60 ); #define S(x,n) \ ( ( (x) << (n) ) | ( ( (x) & 0xFFFFFFFF) >> ( 32 - (n) ) ) ) -#define P(a,b,c,d,k,s,t) \ - do \ - { \ - (a) += F((b),(c),(d)) + X[(k)] + (t); \ - (a) = S((a),(s)) + (b); \ +#define P(a,b,c,d,k,s,t) \ + do \ + { \ + (a) += F((b),(c),(d)) + local.X[(k)] + (t); \ + (a) = S((a),(s)) + (b); \ } while( 0 ) - A = ctx->state[0]; - B = ctx->state[1]; - C = ctx->state[2]; - D = ctx->state[3]; + local.A = ctx->state[0]; + local.B = ctx->state[1]; + local.C = ctx->state[2]; + local.D = ctx->state[3]; #define F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) - P( A, B, C, D, 0, 7, 0xD76AA478 ); - P( D, A, B, C, 1, 12, 0xE8C7B756 ); - P( C, D, A, B, 2, 17, 0x242070DB ); - P( B, C, D, A, 3, 22, 0xC1BDCEEE ); - P( A, B, C, D, 4, 7, 0xF57C0FAF ); - P( D, A, B, C, 5, 12, 0x4787C62A ); - P( C, D, A, B, 6, 17, 0xA8304613 ); - P( B, C, D, A, 7, 22, 0xFD469501 ); - P( A, B, C, D, 8, 7, 0x698098D8 ); - P( D, A, B, C, 9, 12, 0x8B44F7AF ); - P( C, D, A, B, 10, 17, 0xFFFF5BB1 ); - P( B, C, D, A, 11, 22, 0x895CD7BE ); - P( A, B, C, D, 12, 7, 0x6B901122 ); - P( D, A, B, C, 13, 12, 0xFD987193 ); - P( C, D, A, B, 14, 17, 0xA679438E ); - P( B, C, D, A, 15, 22, 0x49B40821 ); + P( local.A, local.B, local.C, local.D, 0, 7, 0xD76AA478 ); + P( local.D, local.A, local.B, local.C, 1, 12, 0xE8C7B756 ); + P( local.C, local.D, local.A, local.B, 2, 17, 0x242070DB ); + P( local.B, local.C, local.D, local.A, 3, 22, 0xC1BDCEEE ); + P( local.A, local.B, local.C, local.D, 4, 7, 0xF57C0FAF ); + P( local.D, local.A, local.B, local.C, 5, 12, 0x4787C62A ); + P( local.C, local.D, local.A, local.B, 6, 17, 0xA8304613 ); + P( local.B, local.C, local.D, local.A, 7, 22, 0xFD469501 ); + P( local.A, local.B, local.C, local.D, 8, 7, 0x698098D8 ); + P( local.D, local.A, local.B, local.C, 9, 12, 0x8B44F7AF ); + P( local.C, local.D, local.A, local.B, 10, 17, 0xFFFF5BB1 ); + P( local.B, local.C, local.D, local.A, 11, 22, 0x895CD7BE ); + P( local.A, local.B, local.C, local.D, 12, 7, 0x6B901122 ); + P( local.D, local.A, local.B, local.C, 13, 12, 0xFD987193 ); + P( local.C, local.D, local.A, local.B, 14, 17, 0xA679438E ); + P( local.B, local.C, local.D, local.A, 15, 22, 0x49B40821 ); #undef F #define F(x,y,z) ((y) ^ ((z) & ((x) ^ (y)))) - P( A, B, C, D, 1, 5, 0xF61E2562 ); - P( D, A, B, C, 6, 9, 0xC040B340 ); - P( C, D, A, B, 11, 14, 0x265E5A51 ); - P( B, C, D, A, 0, 20, 0xE9B6C7AA ); - P( A, B, C, D, 5, 5, 0xD62F105D ); - P( D, A, B, C, 10, 9, 0x02441453 ); - P( C, D, A, B, 15, 14, 0xD8A1E681 ); - P( B, C, D, A, 4, 20, 0xE7D3FBC8 ); - P( A, B, C, D, 9, 5, 0x21E1CDE6 ); - P( D, A, B, C, 14, 9, 0xC33707D6 ); - P( C, D, A, B, 3, 14, 0xF4D50D87 ); - P( B, C, D, A, 8, 20, 0x455A14ED ); - P( A, B, C, D, 13, 5, 0xA9E3E905 ); - P( D, A, B, C, 2, 9, 0xFCEFA3F8 ); - P( C, D, A, B, 7, 14, 0x676F02D9 ); - P( B, C, D, A, 12, 20, 0x8D2A4C8A ); + P( local.A, local.B, local.C, local.D, 1, 5, 0xF61E2562 ); + P( local.D, local.A, local.B, local.C, 6, 9, 0xC040B340 ); + P( local.C, local.D, local.A, local.B, 11, 14, 0x265E5A51 ); + P( local.B, local.C, local.D, local.A, 0, 20, 0xE9B6C7AA ); + P( local.A, local.B, local.C, local.D, 5, 5, 0xD62F105D ); + P( local.D, local.A, local.B, local.C, 10, 9, 0x02441453 ); + P( local.C, local.D, local.A, local.B, 15, 14, 0xD8A1E681 ); + P( local.B, local.C, local.D, local.A, 4, 20, 0xE7D3FBC8 ); + P( local.A, local.B, local.C, local.D, 9, 5, 0x21E1CDE6 ); + P( local.D, local.A, local.B, local.C, 14, 9, 0xC33707D6 ); + P( local.C, local.D, local.A, local.B, 3, 14, 0xF4D50D87 ); + P( local.B, local.C, local.D, local.A, 8, 20, 0x455A14ED ); + P( local.A, local.B, local.C, local.D, 13, 5, 0xA9E3E905 ); + P( local.D, local.A, local.B, local.C, 2, 9, 0xFCEFA3F8 ); + P( local.C, local.D, local.A, local.B, 7, 14, 0x676F02D9 ); + P( local.B, local.C, local.D, local.A, 12, 20, 0x8D2A4C8A ); #undef F #define F(x,y,z) ((x) ^ (y) ^ (z)) - P( A, B, C, D, 5, 4, 0xFFFA3942 ); - P( D, A, B, C, 8, 11, 0x8771F681 ); - P( C, D, A, B, 11, 16, 0x6D9D6122 ); - P( B, C, D, A, 14, 23, 0xFDE5380C ); - P( A, B, C, D, 1, 4, 0xA4BEEA44 ); - P( D, A, B, C, 4, 11, 0x4BDECFA9 ); - P( C, D, A, B, 7, 16, 0xF6BB4B60 ); - P( B, C, D, A, 10, 23, 0xBEBFBC70 ); - P( A, B, C, D, 13, 4, 0x289B7EC6 ); - P( D, A, B, C, 0, 11, 0xEAA127FA ); - P( C, D, A, B, 3, 16, 0xD4EF3085 ); - P( B, C, D, A, 6, 23, 0x04881D05 ); - P( A, B, C, D, 9, 4, 0xD9D4D039 ); - P( D, A, B, C, 12, 11, 0xE6DB99E5 ); - P( C, D, A, B, 15, 16, 0x1FA27CF8 ); - P( B, C, D, A, 2, 23, 0xC4AC5665 ); + P( local.A, local.B, local.C, local.D, 5, 4, 0xFFFA3942 ); + P( local.D, local.A, local.B, local.C, 8, 11, 0x8771F681 ); + P( local.C, local.D, local.A, local.B, 11, 16, 0x6D9D6122 ); + P( local.B, local.C, local.D, local.A, 14, 23, 0xFDE5380C ); + P( local.A, local.B, local.C, local.D, 1, 4, 0xA4BEEA44 ); + P( local.D, local.A, local.B, local.C, 4, 11, 0x4BDECFA9 ); + P( local.C, local.D, local.A, local.B, 7, 16, 0xF6BB4B60 ); + P( local.B, local.C, local.D, local.A, 10, 23, 0xBEBFBC70 ); + P( local.A, local.B, local.C, local.D, 13, 4, 0x289B7EC6 ); + P( local.D, local.A, local.B, local.C, 0, 11, 0xEAA127FA ); + P( local.C, local.D, local.A, local.B, 3, 16, 0xD4EF3085 ); + P( local.B, local.C, local.D, local.A, 6, 23, 0x04881D05 ); + P( local.A, local.B, local.C, local.D, 9, 4, 0xD9D4D039 ); + P( local.D, local.A, local.B, local.C, 12, 11, 0xE6DB99E5 ); + P( local.C, local.D, local.A, local.B, 15, 16, 0x1FA27CF8 ); + P( local.B, local.C, local.D, local.A, 2, 23, 0xC4AC5665 ); #undef F #define F(x,y,z) ((y) ^ ((x) | ~(z))) - P( A, B, C, D, 0, 6, 0xF4292244 ); - P( D, A, B, C, 7, 10, 0x432AFF97 ); - P( C, D, A, B, 14, 15, 0xAB9423A7 ); - P( B, C, D, A, 5, 21, 0xFC93A039 ); - P( A, B, C, D, 12, 6, 0x655B59C3 ); - P( D, A, B, C, 3, 10, 0x8F0CCC92 ); - P( C, D, A, B, 10, 15, 0xFFEFF47D ); - P( B, C, D, A, 1, 21, 0x85845DD1 ); - P( A, B, C, D, 8, 6, 0x6FA87E4F ); - P( D, A, B, C, 15, 10, 0xFE2CE6E0 ); - P( C, D, A, B, 6, 15, 0xA3014314 ); - P( B, C, D, A, 13, 21, 0x4E0811A1 ); - P( A, B, C, D, 4, 6, 0xF7537E82 ); - P( D, A, B, C, 11, 10, 0xBD3AF235 ); - P( C, D, A, B, 2, 15, 0x2AD7D2BB ); - P( B, C, D, A, 9, 21, 0xEB86D391 ); + P( local.A, local.B, local.C, local.D, 0, 6, 0xF4292244 ); + P( local.D, local.A, local.B, local.C, 7, 10, 0x432AFF97 ); + P( local.C, local.D, local.A, local.B, 14, 15, 0xAB9423A7 ); + P( local.B, local.C, local.D, local.A, 5, 21, 0xFC93A039 ); + P( local.A, local.B, local.C, local.D, 12, 6, 0x655B59C3 ); + P( local.D, local.A, local.B, local.C, 3, 10, 0x8F0CCC92 ); + P( local.C, local.D, local.A, local.B, 10, 15, 0xFFEFF47D ); + P( local.B, local.C, local.D, local.A, 1, 21, 0x85845DD1 ); + P( local.A, local.B, local.C, local.D, 8, 6, 0x6FA87E4F ); + P( local.D, local.A, local.B, local.C, 15, 10, 0xFE2CE6E0 ); + P( local.C, local.D, local.A, local.B, 6, 15, 0xA3014314 ); + P( local.B, local.C, local.D, local.A, 13, 21, 0x4E0811A1 ); + P( local.A, local.B, local.C, local.D, 4, 6, 0xF7537E82 ); + P( local.D, local.A, local.B, local.C, 11, 10, 0xBD3AF235 ); + P( local.C, local.D, local.A, local.B, 2, 15, 0x2AD7D2BB ); + P( local.B, local.C, local.D, local.A, 9, 21, 0xEB86D391 ); #undef F - ctx->state[0] += A; - ctx->state[1] += B; - ctx->state[2] += C; - ctx->state[3] += D; + ctx->state[0] += local.A; + ctx->state[1] += local.B; + ctx->state[2] += local.C; + ctx->state[3] += local.D; + + /* Zeroise variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &local, sizeof( local ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/pem.c b/thirdparty/mbedtls/library/pem.c index a7a2f7f5cf..50e663ccdb 100644 --- a/thirdparty/mbedtls/library/pem.c +++ b/thirdparty/mbedtls/library/pem.c @@ -508,8 +508,12 @@ int mbedtls_pem_write_buffer( const char *header, const char *footer, *p++ = '\0'; *olen = p - buf; + /* Clean any remaining data previously written to the buffer */ + memset( buf + *olen, 0, buf_len - *olen ); + mbedtls_free( encode_buf ); return( 0 ); } #endif /* MBEDTLS_PEM_WRITE_C */ #endif /* MBEDTLS_PEM_PARSE_C || MBEDTLS_PEM_WRITE_C */ + diff --git a/thirdparty/mbedtls/library/pkcs5.c b/thirdparty/mbedtls/library/pkcs5.c index 8a80aa5d05..c4447f1546 100644 --- a/thirdparty/mbedtls/library/pkcs5.c +++ b/thirdparty/mbedtls/library/pkcs5.c @@ -247,7 +247,7 @@ int mbedtls_pkcs5_pbkdf2_hmac( mbedtls_md_context_t *ctx, const unsigned char *p unsigned int iteration_count, uint32_t key_length, unsigned char *output ) { - int ret, j; + int ret = 0, j; unsigned int i; unsigned char md1[MBEDTLS_MD_MAX_SIZE]; unsigned char work[MBEDTLS_MD_MAX_SIZE]; @@ -269,16 +269,16 @@ int mbedtls_pkcs5_pbkdf2_hmac( mbedtls_md_context_t *ctx, const unsigned char *p // U1 ends up in work // if( ( ret = mbedtls_md_hmac_starts( ctx, password, plen ) ) != 0 ) - return( ret ); + goto cleanup; if( ( ret = mbedtls_md_hmac_update( ctx, salt, slen ) ) != 0 ) - return( ret ); + goto cleanup; if( ( ret = mbedtls_md_hmac_update( ctx, counter, 4 ) ) != 0 ) - return( ret ); + goto cleanup; if( ( ret = mbedtls_md_hmac_finish( ctx, work ) ) != 0 ) - return( ret ); + goto cleanup; memcpy( md1, work, md_size ); @@ -287,13 +287,13 @@ int mbedtls_pkcs5_pbkdf2_hmac( mbedtls_md_context_t *ctx, const unsigned char *p // U2 ends up in md1 // if( ( ret = mbedtls_md_hmac_starts( ctx, password, plen ) ) != 0 ) - return( ret ); + goto cleanup; if( ( ret = mbedtls_md_hmac_update( ctx, md1, md_size ) ) != 0 ) - return( ret ); + goto cleanup; if( ( ret = mbedtls_md_hmac_finish( ctx, md1 ) ) != 0 ) - return( ret ); + goto cleanup; // U1 xor U2 // @@ -312,7 +312,12 @@ int mbedtls_pkcs5_pbkdf2_hmac( mbedtls_md_context_t *ctx, const unsigned char *p break; } - return( 0 ); +cleanup: + /* Zeroise buffers to clear sensitive data from memory. */ + mbedtls_platform_zeroize( work, MBEDTLS_MD_MAX_SIZE ); + mbedtls_platform_zeroize( md1, MBEDTLS_MD_MAX_SIZE ); + + return( ret ); } #if defined(MBEDTLS_SELF_TEST) diff --git a/thirdparty/mbedtls/library/pkparse.c b/thirdparty/mbedtls/library/pkparse.c index 086807d836..e410f3aae1 100644 --- a/thirdparty/mbedtls/library/pkparse.c +++ b/thirdparty/mbedtls/library/pkparse.c @@ -692,7 +692,7 @@ int mbedtls_pk_parse_subpubkey( unsigned char **p, const unsigned char *end, ret = MBEDTLS_ERR_PK_UNKNOWN_PK_ALG; if( ret == 0 && *p != end ) - ret = MBEDTLS_ERR_PK_INVALID_PUBKEY + ret = MBEDTLS_ERR_PK_INVALID_PUBKEY + MBEDTLS_ERR_ASN1_LENGTH_MISMATCH; if( ret != 0 ) diff --git a/thirdparty/mbedtls/library/platform_util.c b/thirdparty/mbedtls/library/platform_util.c index 3ba2aead12..c8cd52d52a 100644 --- a/thirdparty/mbedtls/library/platform_util.c +++ b/thirdparty/mbedtls/library/platform_util.c @@ -115,7 +115,7 @@ void mbedtls_platform_zeroize( void *buf, size_t len ) #if !( ( defined(_POSIX_VERSION) && _POSIX_VERSION >= 200809L ) || \ ( defined(_POSIX_THREAD_SAFE_FUNCTIONS ) && \ - _POSIX_THREAD_SAFE_FUNCTIONS >= 20112L ) ) + _POSIX_THREAD_SAFE_FUNCTIONS >= 200112L ) ) /* * This is a convenience shorthand macro to avoid checking the long * preprocessor conditions above. Ideally, we could expose this macro in @@ -129,7 +129,7 @@ void mbedtls_platform_zeroize( void *buf, size_t len ) #endif /* !( ( defined(_POSIX_VERSION) && _POSIX_VERSION >= 200809L ) || \ ( defined(_POSIX_THREAD_SAFE_FUNCTIONS ) && \ - _POSIX_THREAD_SAFE_FUNCTIONS >= 20112L ) ) */ + _POSIX_THREAD_SAFE_FUNCTIONS >= 200112L ) ) */ struct tm *mbedtls_platform_gmtime_r( const mbedtls_time_t *tt, struct tm *tm_buf ) diff --git a/thirdparty/mbedtls/library/ripemd160.c b/thirdparty/mbedtls/library/ripemd160.c index 0b6efcb574..d6ee933b2e 100644 --- a/thirdparty/mbedtls/library/ripemd160.c +++ b/thirdparty/mbedtls/library/ripemd160.c @@ -147,30 +147,33 @@ void mbedtls_ripemd160_starts( mbedtls_ripemd160_context *ctx ) int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, const unsigned char data[64] ) { - uint32_t A, B, C, D, E, Ap, Bp, Cp, Dp, Ep, X[16]; - - GET_UINT32_LE( X[ 0], data, 0 ); - GET_UINT32_LE( X[ 1], data, 4 ); - GET_UINT32_LE( X[ 2], data, 8 ); - GET_UINT32_LE( X[ 3], data, 12 ); - GET_UINT32_LE( X[ 4], data, 16 ); - GET_UINT32_LE( X[ 5], data, 20 ); - GET_UINT32_LE( X[ 6], data, 24 ); - GET_UINT32_LE( X[ 7], data, 28 ); - GET_UINT32_LE( X[ 8], data, 32 ); - GET_UINT32_LE( X[ 9], data, 36 ); - GET_UINT32_LE( X[10], data, 40 ); - GET_UINT32_LE( X[11], data, 44 ); - GET_UINT32_LE( X[12], data, 48 ); - GET_UINT32_LE( X[13], data, 52 ); - GET_UINT32_LE( X[14], data, 56 ); - GET_UINT32_LE( X[15], data, 60 ); - - A = Ap = ctx->state[0]; - B = Bp = ctx->state[1]; - C = Cp = ctx->state[2]; - D = Dp = ctx->state[3]; - E = Ep = ctx->state[4]; + struct + { + uint32_t A, B, C, D, E, Ap, Bp, Cp, Dp, Ep, X[16]; + } local; + + GET_UINT32_LE( local.X[ 0], data, 0 ); + GET_UINT32_LE( local.X[ 1], data, 4 ); + GET_UINT32_LE( local.X[ 2], data, 8 ); + GET_UINT32_LE( local.X[ 3], data, 12 ); + GET_UINT32_LE( local.X[ 4], data, 16 ); + GET_UINT32_LE( local.X[ 5], data, 20 ); + GET_UINT32_LE( local.X[ 6], data, 24 ); + GET_UINT32_LE( local.X[ 7], data, 28 ); + GET_UINT32_LE( local.X[ 8], data, 32 ); + GET_UINT32_LE( local.X[ 9], data, 36 ); + GET_UINT32_LE( local.X[10], data, 40 ); + GET_UINT32_LE( local.X[11], data, 44 ); + GET_UINT32_LE( local.X[12], data, 48 ); + GET_UINT32_LE( local.X[13], data, 52 ); + GET_UINT32_LE( local.X[14], data, 56 ); + GET_UINT32_LE( local.X[15], data, 60 ); + + local.A = local.Ap = ctx->state[0]; + local.B = local.Bp = ctx->state[1]; + local.C = local.Cp = ctx->state[2]; + local.D = local.Dp = ctx->state[3]; + local.E = local.Ep = ctx->state[4]; #define F1( x, y, z ) ( (x) ^ (y) ^ (z) ) #define F2( x, y, z ) ( ( (x) & (y) ) | ( ~(x) & (z) ) ) @@ -180,12 +183,12 @@ int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, #define S( x, n ) ( ( (x) << (n) ) | ( (x) >> (32 - (n)) ) ) -#define P( a, b, c, d, e, r, s, f, k ) \ - do \ - { \ - (a) += f( (b), (c), (d) ) + X[r] + (k); \ - (a) = S( (a), (s) ) + (e); \ - (c) = S( (c), 10 ); \ +#define P( a, b, c, d, e, r, s, f, k ) \ + do \ + { \ + (a) += f( (b), (c), (d) ) + local.X[r] + (k); \ + (a) = S( (a), (s) ) + (e); \ + (c) = S( (c), 10 ); \ } while( 0 ) #define P2( a, b, c, d, e, r, s, rp, sp ) \ @@ -200,22 +203,22 @@ int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, #define K 0x00000000 #define Fp F5 #define Kp 0x50A28BE6 - P2( A, B, C, D, E, 0, 11, 5, 8 ); - P2( E, A, B, C, D, 1, 14, 14, 9 ); - P2( D, E, A, B, C, 2, 15, 7, 9 ); - P2( C, D, E, A, B, 3, 12, 0, 11 ); - P2( B, C, D, E, A, 4, 5, 9, 13 ); - P2( A, B, C, D, E, 5, 8, 2, 15 ); - P2( E, A, B, C, D, 6, 7, 11, 15 ); - P2( D, E, A, B, C, 7, 9, 4, 5 ); - P2( C, D, E, A, B, 8, 11, 13, 7 ); - P2( B, C, D, E, A, 9, 13, 6, 7 ); - P2( A, B, C, D, E, 10, 14, 15, 8 ); - P2( E, A, B, C, D, 11, 15, 8, 11 ); - P2( D, E, A, B, C, 12, 6, 1, 14 ); - P2( C, D, E, A, B, 13, 7, 10, 14 ); - P2( B, C, D, E, A, 14, 9, 3, 12 ); - P2( A, B, C, D, E, 15, 8, 12, 6 ); + P2( local.A, local.B, local.C, local.D, local.E, 0, 11, 5, 8 ); + P2( local.E, local.A, local.B, local.C, local.D, 1, 14, 14, 9 ); + P2( local.D, local.E, local.A, local.B, local.C, 2, 15, 7, 9 ); + P2( local.C, local.D, local.E, local.A, local.B, 3, 12, 0, 11 ); + P2( local.B, local.C, local.D, local.E, local.A, 4, 5, 9, 13 ); + P2( local.A, local.B, local.C, local.D, local.E, 5, 8, 2, 15 ); + P2( local.E, local.A, local.B, local.C, local.D, 6, 7, 11, 15 ); + P2( local.D, local.E, local.A, local.B, local.C, 7, 9, 4, 5 ); + P2( local.C, local.D, local.E, local.A, local.B, 8, 11, 13, 7 ); + P2( local.B, local.C, local.D, local.E, local.A, 9, 13, 6, 7 ); + P2( local.A, local.B, local.C, local.D, local.E, 10, 14, 15, 8 ); + P2( local.E, local.A, local.B, local.C, local.D, 11, 15, 8, 11 ); + P2( local.D, local.E, local.A, local.B, local.C, 12, 6, 1, 14 ); + P2( local.C, local.D, local.E, local.A, local.B, 13, 7, 10, 14 ); + P2( local.B, local.C, local.D, local.E, local.A, 14, 9, 3, 12 ); + P2( local.A, local.B, local.C, local.D, local.E, 15, 8, 12, 6 ); #undef F #undef K #undef Fp @@ -225,22 +228,22 @@ int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, #define K 0x5A827999 #define Fp F4 #define Kp 0x5C4DD124 - P2( E, A, B, C, D, 7, 7, 6, 9 ); - P2( D, E, A, B, C, 4, 6, 11, 13 ); - P2( C, D, E, A, B, 13, 8, 3, 15 ); - P2( B, C, D, E, A, 1, 13, 7, 7 ); - P2( A, B, C, D, E, 10, 11, 0, 12 ); - P2( E, A, B, C, D, 6, 9, 13, 8 ); - P2( D, E, A, B, C, 15, 7, 5, 9 ); - P2( C, D, E, A, B, 3, 15, 10, 11 ); - P2( B, C, D, E, A, 12, 7, 14, 7 ); - P2( A, B, C, D, E, 0, 12, 15, 7 ); - P2( E, A, B, C, D, 9, 15, 8, 12 ); - P2( D, E, A, B, C, 5, 9, 12, 7 ); - P2( C, D, E, A, B, 2, 11, 4, 6 ); - P2( B, C, D, E, A, 14, 7, 9, 15 ); - P2( A, B, C, D, E, 11, 13, 1, 13 ); - P2( E, A, B, C, D, 8, 12, 2, 11 ); + P2( local.E, local.A, local.B, local.C, local.D, 7, 7, 6, 9 ); + P2( local.D, local.E, local.A, local.B, local.C, 4, 6, 11, 13 ); + P2( local.C, local.D, local.E, local.A, local.B, 13, 8, 3, 15 ); + P2( local.B, local.C, local.D, local.E, local.A, 1, 13, 7, 7 ); + P2( local.A, local.B, local.C, local.D, local.E, 10, 11, 0, 12 ); + P2( local.E, local.A, local.B, local.C, local.D, 6, 9, 13, 8 ); + P2( local.D, local.E, local.A, local.B, local.C, 15, 7, 5, 9 ); + P2( local.C, local.D, local.E, local.A, local.B, 3, 15, 10, 11 ); + P2( local.B, local.C, local.D, local.E, local.A, 12, 7, 14, 7 ); + P2( local.A, local.B, local.C, local.D, local.E, 0, 12, 15, 7 ); + P2( local.E, local.A, local.B, local.C, local.D, 9, 15, 8, 12 ); + P2( local.D, local.E, local.A, local.B, local.C, 5, 9, 12, 7 ); + P2( local.C, local.D, local.E, local.A, local.B, 2, 11, 4, 6 ); + P2( local.B, local.C, local.D, local.E, local.A, 14, 7, 9, 15 ); + P2( local.A, local.B, local.C, local.D, local.E, 11, 13, 1, 13 ); + P2( local.E, local.A, local.B, local.C, local.D, 8, 12, 2, 11 ); #undef F #undef K #undef Fp @@ -250,22 +253,22 @@ int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, #define K 0x6ED9EBA1 #define Fp F3 #define Kp 0x6D703EF3 - P2( D, E, A, B, C, 3, 11, 15, 9 ); - P2( C, D, E, A, B, 10, 13, 5, 7 ); - P2( B, C, D, E, A, 14, 6, 1, 15 ); - P2( A, B, C, D, E, 4, 7, 3, 11 ); - P2( E, A, B, C, D, 9, 14, 7, 8 ); - P2( D, E, A, B, C, 15, 9, 14, 6 ); - P2( C, D, E, A, B, 8, 13, 6, 6 ); - P2( B, C, D, E, A, 1, 15, 9, 14 ); - P2( A, B, C, D, E, 2, 14, 11, 12 ); - P2( E, A, B, C, D, 7, 8, 8, 13 ); - P2( D, E, A, B, C, 0, 13, 12, 5 ); - P2( C, D, E, A, B, 6, 6, 2, 14 ); - P2( B, C, D, E, A, 13, 5, 10, 13 ); - P2( A, B, C, D, E, 11, 12, 0, 13 ); - P2( E, A, B, C, D, 5, 7, 4, 7 ); - P2( D, E, A, B, C, 12, 5, 13, 5 ); + P2( local.D, local.E, local.A, local.B, local.C, 3, 11, 15, 9 ); + P2( local.C, local.D, local.E, local.A, local.B, 10, 13, 5, 7 ); + P2( local.B, local.C, local.D, local.E, local.A, 14, 6, 1, 15 ); + P2( local.A, local.B, local.C, local.D, local.E, 4, 7, 3, 11 ); + P2( local.E, local.A, local.B, local.C, local.D, 9, 14, 7, 8 ); + P2( local.D, local.E, local.A, local.B, local.C, 15, 9, 14, 6 ); + P2( local.C, local.D, local.E, local.A, local.B, 8, 13, 6, 6 ); + P2( local.B, local.C, local.D, local.E, local.A, 1, 15, 9, 14 ); + P2( local.A, local.B, local.C, local.D, local.E, 2, 14, 11, 12 ); + P2( local.E, local.A, local.B, local.C, local.D, 7, 8, 8, 13 ); + P2( local.D, local.E, local.A, local.B, local.C, 0, 13, 12, 5 ); + P2( local.C, local.D, local.E, local.A, local.B, 6, 6, 2, 14 ); + P2( local.B, local.C, local.D, local.E, local.A, 13, 5, 10, 13 ); + P2( local.A, local.B, local.C, local.D, local.E, 11, 12, 0, 13 ); + P2( local.E, local.A, local.B, local.C, local.D, 5, 7, 4, 7 ); + P2( local.D, local.E, local.A, local.B, local.C, 12, 5, 13, 5 ); #undef F #undef K #undef Fp @@ -275,22 +278,22 @@ int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, #define K 0x8F1BBCDC #define Fp F2 #define Kp 0x7A6D76E9 - P2( C, D, E, A, B, 1, 11, 8, 15 ); - P2( B, C, D, E, A, 9, 12, 6, 5 ); - P2( A, B, C, D, E, 11, 14, 4, 8 ); - P2( E, A, B, C, D, 10, 15, 1, 11 ); - P2( D, E, A, B, C, 0, 14, 3, 14 ); - P2( C, D, E, A, B, 8, 15, 11, 14 ); - P2( B, C, D, E, A, 12, 9, 15, 6 ); - P2( A, B, C, D, E, 4, 8, 0, 14 ); - P2( E, A, B, C, D, 13, 9, 5, 6 ); - P2( D, E, A, B, C, 3, 14, 12, 9 ); - P2( C, D, E, A, B, 7, 5, 2, 12 ); - P2( B, C, D, E, A, 15, 6, 13, 9 ); - P2( A, B, C, D, E, 14, 8, 9, 12 ); - P2( E, A, B, C, D, 5, 6, 7, 5 ); - P2( D, E, A, B, C, 6, 5, 10, 15 ); - P2( C, D, E, A, B, 2, 12, 14, 8 ); + P2( local.C, local.D, local.E, local.A, local.B, 1, 11, 8, 15 ); + P2( local.B, local.C, local.D, local.E, local.A, 9, 12, 6, 5 ); + P2( local.A, local.B, local.C, local.D, local.E, 11, 14, 4, 8 ); + P2( local.E, local.A, local.B, local.C, local.D, 10, 15, 1, 11 ); + P2( local.D, local.E, local.A, local.B, local.C, 0, 14, 3, 14 ); + P2( local.C, local.D, local.E, local.A, local.B, 8, 15, 11, 14 ); + P2( local.B, local.C, local.D, local.E, local.A, 12, 9, 15, 6 ); + P2( local.A, local.B, local.C, local.D, local.E, 4, 8, 0, 14 ); + P2( local.E, local.A, local.B, local.C, local.D, 13, 9, 5, 6 ); + P2( local.D, local.E, local.A, local.B, local.C, 3, 14, 12, 9 ); + P2( local.C, local.D, local.E, local.A, local.B, 7, 5, 2, 12 ); + P2( local.B, local.C, local.D, local.E, local.A, 15, 6, 13, 9 ); + P2( local.A, local.B, local.C, local.D, local.E, 14, 8, 9, 12 ); + P2( local.E, local.A, local.B, local.C, local.D, 5, 6, 7, 5 ); + P2( local.D, local.E, local.A, local.B, local.C, 6, 5, 10, 15 ); + P2( local.C, local.D, local.E, local.A, local.B, 2, 12, 14, 8 ); #undef F #undef K #undef Fp @@ -300,33 +303,36 @@ int mbedtls_internal_ripemd160_process( mbedtls_ripemd160_context *ctx, #define K 0xA953FD4E #define Fp F1 #define Kp 0x00000000 - P2( B, C, D, E, A, 4, 9, 12, 8 ); - P2( A, B, C, D, E, 0, 15, 15, 5 ); - P2( E, A, B, C, D, 5, 5, 10, 12 ); - P2( D, E, A, B, C, 9, 11, 4, 9 ); - P2( C, D, E, A, B, 7, 6, 1, 12 ); - P2( B, C, D, E, A, 12, 8, 5, 5 ); - P2( A, B, C, D, E, 2, 13, 8, 14 ); - P2( E, A, B, C, D, 10, 12, 7, 6 ); - P2( D, E, A, B, C, 14, 5, 6, 8 ); - P2( C, D, E, A, B, 1, 12, 2, 13 ); - P2( B, C, D, E, A, 3, 13, 13, 6 ); - P2( A, B, C, D, E, 8, 14, 14, 5 ); - P2( E, A, B, C, D, 11, 11, 0, 15 ); - P2( D, E, A, B, C, 6, 8, 3, 13 ); - P2( C, D, E, A, B, 15, 5, 9, 11 ); - P2( B, C, D, E, A, 13, 6, 11, 11 ); + P2( local.B, local.C, local.D, local.E, local.A, 4, 9, 12, 8 ); + P2( local.A, local.B, local.C, local.D, local.E, 0, 15, 15, 5 ); + P2( local.E, local.A, local.B, local.C, local.D, 5, 5, 10, 12 ); + P2( local.D, local.E, local.A, local.B, local.C, 9, 11, 4, 9 ); + P2( local.C, local.D, local.E, local.A, local.B, 7, 6, 1, 12 ); + P2( local.B, local.C, local.D, local.E, local.A, 12, 8, 5, 5 ); + P2( local.A, local.B, local.C, local.D, local.E, 2, 13, 8, 14 ); + P2( local.E, local.A, local.B, local.C, local.D, 10, 12, 7, 6 ); + P2( local.D, local.E, local.A, local.B, local.C, 14, 5, 6, 8 ); + P2( local.C, local.D, local.E, local.A, local.B, 1, 12, 2, 13 ); + P2( local.B, local.C, local.D, local.E, local.A, 3, 13, 13, 6 ); + P2( local.A, local.B, local.C, local.D, local.E, 8, 14, 14, 5 ); + P2( local.E, local.A, local.B, local.C, local.D, 11, 11, 0, 15 ); + P2( local.D, local.E, local.A, local.B, local.C, 6, 8, 3, 13 ); + P2( local.C, local.D, local.E, local.A, local.B, 15, 5, 9, 11 ); + P2( local.B, local.C, local.D, local.E, local.A, 13, 6, 11, 11 ); #undef F #undef K #undef Fp #undef Kp - C = ctx->state[1] + C + Dp; - ctx->state[1] = ctx->state[2] + D + Ep; - ctx->state[2] = ctx->state[3] + E + Ap; - ctx->state[3] = ctx->state[4] + A + Bp; - ctx->state[4] = ctx->state[0] + B + Cp; - ctx->state[0] = C; + local.C = ctx->state[1] + local.C + local.Dp; + ctx->state[1] = ctx->state[2] + local.D + local.Ep; + ctx->state[2] = ctx->state[3] + local.E + local.Ap; + ctx->state[3] = ctx->state[4] + local.A + local.Bp; + ctx->state[4] = ctx->state[0] + local.B + local.Cp; + ctx->state[0] = local.C; + + /* Zeroise variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &local, sizeof( local ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/rsa.c b/thirdparty/mbedtls/library/rsa.c index 42becbf17b..a25c633bc6 100644 --- a/thirdparty/mbedtls/library/rsa.c +++ b/thirdparty/mbedtls/library/rsa.c @@ -841,15 +841,14 @@ static int rsa_prepare_blinding( mbedtls_rsa_context *ctx, * which one, we just loop and choose new values for both of them. * (Each iteration succeeds with overwhelming probability.) */ ret = mbedtls_mpi_inv_mod( &ctx->Vi, &ctx->Vi, &ctx->N ); - if( ret == MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ) - continue; - if( ret != 0 ) + if( ret != 0 && ret != MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ) goto cleanup; - /* Finish the computation of Vf^-1 = R * (R Vf)^-1 */ - MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &ctx->Vi, &ctx->Vi, &R ) ); - MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &ctx->Vi, &ctx->Vi, &ctx->N ) ); - } while( 0 ); + } while( ret == MBEDTLS_ERR_MPI_NOT_ACCEPTABLE ); + + /* Finish the computation of Vf^-1 = R * (R Vf)^-1 */ + MBEDTLS_MPI_CHK( mbedtls_mpi_mul_mpi( &ctx->Vi, &ctx->Vi, &R ) ); + MBEDTLS_MPI_CHK( mbedtls_mpi_mod_mpi( &ctx->Vi, &ctx->Vi, &ctx->N ) ); /* Blinding value: Vi = Vf^(-e) mod N * (Vi already contains Vf^-1 at this point) */ diff --git a/thirdparty/mbedtls/library/sha1.c b/thirdparty/mbedtls/library/sha1.c index 8682abd740..e99a5e8635 100644 --- a/thirdparty/mbedtls/library/sha1.c +++ b/thirdparty/mbedtls/library/sha1.c @@ -155,35 +155,40 @@ void mbedtls_sha1_starts( mbedtls_sha1_context *ctx ) int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx, const unsigned char data[64] ) { - uint32_t temp, W[16], A, B, C, D, E; + struct + { + uint32_t temp, W[16], A, B, C, D, E; + } local; SHA1_VALIDATE_RET( ctx != NULL ); SHA1_VALIDATE_RET( (const unsigned char *)data != NULL ); - GET_UINT32_BE( W[ 0], data, 0 ); - GET_UINT32_BE( W[ 1], data, 4 ); - GET_UINT32_BE( W[ 2], data, 8 ); - GET_UINT32_BE( W[ 3], data, 12 ); - GET_UINT32_BE( W[ 4], data, 16 ); - GET_UINT32_BE( W[ 5], data, 20 ); - GET_UINT32_BE( W[ 6], data, 24 ); - GET_UINT32_BE( W[ 7], data, 28 ); - GET_UINT32_BE( W[ 8], data, 32 ); - GET_UINT32_BE( W[ 9], data, 36 ); - GET_UINT32_BE( W[10], data, 40 ); - GET_UINT32_BE( W[11], data, 44 ); - GET_UINT32_BE( W[12], data, 48 ); - GET_UINT32_BE( W[13], data, 52 ); - GET_UINT32_BE( W[14], data, 56 ); - GET_UINT32_BE( W[15], data, 60 ); + GET_UINT32_BE( local.W[ 0], data, 0 ); + GET_UINT32_BE( local.W[ 1], data, 4 ); + GET_UINT32_BE( local.W[ 2], data, 8 ); + GET_UINT32_BE( local.W[ 3], data, 12 ); + GET_UINT32_BE( local.W[ 4], data, 16 ); + GET_UINT32_BE( local.W[ 5], data, 20 ); + GET_UINT32_BE( local.W[ 6], data, 24 ); + GET_UINT32_BE( local.W[ 7], data, 28 ); + GET_UINT32_BE( local.W[ 8], data, 32 ); + GET_UINT32_BE( local.W[ 9], data, 36 ); + GET_UINT32_BE( local.W[10], data, 40 ); + GET_UINT32_BE( local.W[11], data, 44 ); + GET_UINT32_BE( local.W[12], data, 48 ); + GET_UINT32_BE( local.W[13], data, 52 ); + GET_UINT32_BE( local.W[14], data, 56 ); + GET_UINT32_BE( local.W[15], data, 60 ); #define S(x,n) (((x) << (n)) | (((x) & 0xFFFFFFFF) >> (32 - (n)))) #define R(t) \ ( \ - temp = W[( (t) - 3 ) & 0x0F] ^ W[( (t) - 8 ) & 0x0F] ^ \ - W[( (t) - 14 ) & 0x0F] ^ W[ (t) & 0x0F], \ - ( W[(t) & 0x0F] = S(temp,1) ) \ + local.temp = local.W[( (t) - 3 ) & 0x0F] ^ \ + local.W[( (t) - 8 ) & 0x0F] ^ \ + local.W[( (t) - 14 ) & 0x0F] ^ \ + local.W[ (t) & 0x0F], \ + ( local.W[(t) & 0x0F] = S(local.temp,1) ) \ ) #define P(a,b,c,d,e,x) \ @@ -193,35 +198,35 @@ int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx, (b) = S((b),30); \ } while( 0 ) - A = ctx->state[0]; - B = ctx->state[1]; - C = ctx->state[2]; - D = ctx->state[3]; - E = ctx->state[4]; + local.A = ctx->state[0]; + local.B = ctx->state[1]; + local.C = ctx->state[2]; + local.D = ctx->state[3]; + local.E = ctx->state[4]; #define F(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) #define K 0x5A827999 - P( A, B, C, D, E, W[0] ); - P( E, A, B, C, D, W[1] ); - P( D, E, A, B, C, W[2] ); - P( C, D, E, A, B, W[3] ); - P( B, C, D, E, A, W[4] ); - P( A, B, C, D, E, W[5] ); - P( E, A, B, C, D, W[6] ); - P( D, E, A, B, C, W[7] ); - P( C, D, E, A, B, W[8] ); - P( B, C, D, E, A, W[9] ); - P( A, B, C, D, E, W[10] ); - P( E, A, B, C, D, W[11] ); - P( D, E, A, B, C, W[12] ); - P( C, D, E, A, B, W[13] ); - P( B, C, D, E, A, W[14] ); - P( A, B, C, D, E, W[15] ); - P( E, A, B, C, D, R(16) ); - P( D, E, A, B, C, R(17) ); - P( C, D, E, A, B, R(18) ); - P( B, C, D, E, A, R(19) ); + P( local.A, local.B, local.C, local.D, local.E, local.W[0] ); + P( local.E, local.A, local.B, local.C, local.D, local.W[1] ); + P( local.D, local.E, local.A, local.B, local.C, local.W[2] ); + P( local.C, local.D, local.E, local.A, local.B, local.W[3] ); + P( local.B, local.C, local.D, local.E, local.A, local.W[4] ); + P( local.A, local.B, local.C, local.D, local.E, local.W[5] ); + P( local.E, local.A, local.B, local.C, local.D, local.W[6] ); + P( local.D, local.E, local.A, local.B, local.C, local.W[7] ); + P( local.C, local.D, local.E, local.A, local.B, local.W[8] ); + P( local.B, local.C, local.D, local.E, local.A, local.W[9] ); + P( local.A, local.B, local.C, local.D, local.E, local.W[10] ); + P( local.E, local.A, local.B, local.C, local.D, local.W[11] ); + P( local.D, local.E, local.A, local.B, local.C, local.W[12] ); + P( local.C, local.D, local.E, local.A, local.B, local.W[13] ); + P( local.B, local.C, local.D, local.E, local.A, local.W[14] ); + P( local.A, local.B, local.C, local.D, local.E, local.W[15] ); + P( local.E, local.A, local.B, local.C, local.D, R(16) ); + P( local.D, local.E, local.A, local.B, local.C, R(17) ); + P( local.C, local.D, local.E, local.A, local.B, R(18) ); + P( local.B, local.C, local.D, local.E, local.A, R(19) ); #undef K #undef F @@ -229,26 +234,26 @@ int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx, #define F(x,y,z) ((x) ^ (y) ^ (z)) #define K 0x6ED9EBA1 - P( A, B, C, D, E, R(20) ); - P( E, A, B, C, D, R(21) ); - P( D, E, A, B, C, R(22) ); - P( C, D, E, A, B, R(23) ); - P( B, C, D, E, A, R(24) ); - P( A, B, C, D, E, R(25) ); - P( E, A, B, C, D, R(26) ); - P( D, E, A, B, C, R(27) ); - P( C, D, E, A, B, R(28) ); - P( B, C, D, E, A, R(29) ); - P( A, B, C, D, E, R(30) ); - P( E, A, B, C, D, R(31) ); - P( D, E, A, B, C, R(32) ); - P( C, D, E, A, B, R(33) ); - P( B, C, D, E, A, R(34) ); - P( A, B, C, D, E, R(35) ); - P( E, A, B, C, D, R(36) ); - P( D, E, A, B, C, R(37) ); - P( C, D, E, A, B, R(38) ); - P( B, C, D, E, A, R(39) ); + P( local.A, local.B, local.C, local.D, local.E, R(20) ); + P( local.E, local.A, local.B, local.C, local.D, R(21) ); + P( local.D, local.E, local.A, local.B, local.C, R(22) ); + P( local.C, local.D, local.E, local.A, local.B, R(23) ); + P( local.B, local.C, local.D, local.E, local.A, R(24) ); + P( local.A, local.B, local.C, local.D, local.E, R(25) ); + P( local.E, local.A, local.B, local.C, local.D, R(26) ); + P( local.D, local.E, local.A, local.B, local.C, R(27) ); + P( local.C, local.D, local.E, local.A, local.B, R(28) ); + P( local.B, local.C, local.D, local.E, local.A, R(29) ); + P( local.A, local.B, local.C, local.D, local.E, R(30) ); + P( local.E, local.A, local.B, local.C, local.D, R(31) ); + P( local.D, local.E, local.A, local.B, local.C, R(32) ); + P( local.C, local.D, local.E, local.A, local.B, R(33) ); + P( local.B, local.C, local.D, local.E, local.A, R(34) ); + P( local.A, local.B, local.C, local.D, local.E, R(35) ); + P( local.E, local.A, local.B, local.C, local.D, R(36) ); + P( local.D, local.E, local.A, local.B, local.C, R(37) ); + P( local.C, local.D, local.E, local.A, local.B, R(38) ); + P( local.B, local.C, local.D, local.E, local.A, R(39) ); #undef K #undef F @@ -256,26 +261,26 @@ int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx, #define F(x,y,z) (((x) & (y)) | ((z) & ((x) | (y)))) #define K 0x8F1BBCDC - P( A, B, C, D, E, R(40) ); - P( E, A, B, C, D, R(41) ); - P( D, E, A, B, C, R(42) ); - P( C, D, E, A, B, R(43) ); - P( B, C, D, E, A, R(44) ); - P( A, B, C, D, E, R(45) ); - P( E, A, B, C, D, R(46) ); - P( D, E, A, B, C, R(47) ); - P( C, D, E, A, B, R(48) ); - P( B, C, D, E, A, R(49) ); - P( A, B, C, D, E, R(50) ); - P( E, A, B, C, D, R(51) ); - P( D, E, A, B, C, R(52) ); - P( C, D, E, A, B, R(53) ); - P( B, C, D, E, A, R(54) ); - P( A, B, C, D, E, R(55) ); - P( E, A, B, C, D, R(56) ); - P( D, E, A, B, C, R(57) ); - P( C, D, E, A, B, R(58) ); - P( B, C, D, E, A, R(59) ); + P( local.A, local.B, local.C, local.D, local.E, R(40) ); + P( local.E, local.A, local.B, local.C, local.D, R(41) ); + P( local.D, local.E, local.A, local.B, local.C, R(42) ); + P( local.C, local.D, local.E, local.A, local.B, R(43) ); + P( local.B, local.C, local.D, local.E, local.A, R(44) ); + P( local.A, local.B, local.C, local.D, local.E, R(45) ); + P( local.E, local.A, local.B, local.C, local.D, R(46) ); + P( local.D, local.E, local.A, local.B, local.C, R(47) ); + P( local.C, local.D, local.E, local.A, local.B, R(48) ); + P( local.B, local.C, local.D, local.E, local.A, R(49) ); + P( local.A, local.B, local.C, local.D, local.E, R(50) ); + P( local.E, local.A, local.B, local.C, local.D, R(51) ); + P( local.D, local.E, local.A, local.B, local.C, R(52) ); + P( local.C, local.D, local.E, local.A, local.B, R(53) ); + P( local.B, local.C, local.D, local.E, local.A, R(54) ); + P( local.A, local.B, local.C, local.D, local.E, R(55) ); + P( local.E, local.A, local.B, local.C, local.D, R(56) ); + P( local.D, local.E, local.A, local.B, local.C, R(57) ); + P( local.C, local.D, local.E, local.A, local.B, R(58) ); + P( local.B, local.C, local.D, local.E, local.A, R(59) ); #undef K #undef F @@ -283,35 +288,38 @@ int mbedtls_internal_sha1_process( mbedtls_sha1_context *ctx, #define F(x,y,z) ((x) ^ (y) ^ (z)) #define K 0xCA62C1D6 - P( A, B, C, D, E, R(60) ); - P( E, A, B, C, D, R(61) ); - P( D, E, A, B, C, R(62) ); - P( C, D, E, A, B, R(63) ); - P( B, C, D, E, A, R(64) ); - P( A, B, C, D, E, R(65) ); - P( E, A, B, C, D, R(66) ); - P( D, E, A, B, C, R(67) ); - P( C, D, E, A, B, R(68) ); - P( B, C, D, E, A, R(69) ); - P( A, B, C, D, E, R(70) ); - P( E, A, B, C, D, R(71) ); - P( D, E, A, B, C, R(72) ); - P( C, D, E, A, B, R(73) ); - P( B, C, D, E, A, R(74) ); - P( A, B, C, D, E, R(75) ); - P( E, A, B, C, D, R(76) ); - P( D, E, A, B, C, R(77) ); - P( C, D, E, A, B, R(78) ); - P( B, C, D, E, A, R(79) ); + P( local.A, local.B, local.C, local.D, local.E, R(60) ); + P( local.E, local.A, local.B, local.C, local.D, R(61) ); + P( local.D, local.E, local.A, local.B, local.C, R(62) ); + P( local.C, local.D, local.E, local.A, local.B, R(63) ); + P( local.B, local.C, local.D, local.E, local.A, R(64) ); + P( local.A, local.B, local.C, local.D, local.E, R(65) ); + P( local.E, local.A, local.B, local.C, local.D, R(66) ); + P( local.D, local.E, local.A, local.B, local.C, R(67) ); + P( local.C, local.D, local.E, local.A, local.B, R(68) ); + P( local.B, local.C, local.D, local.E, local.A, R(69) ); + P( local.A, local.B, local.C, local.D, local.E, R(70) ); + P( local.E, local.A, local.B, local.C, local.D, R(71) ); + P( local.D, local.E, local.A, local.B, local.C, R(72) ); + P( local.C, local.D, local.E, local.A, local.B, R(73) ); + P( local.B, local.C, local.D, local.E, local.A, R(74) ); + P( local.A, local.B, local.C, local.D, local.E, R(75) ); + P( local.E, local.A, local.B, local.C, local.D, R(76) ); + P( local.D, local.E, local.A, local.B, local.C, R(77) ); + P( local.C, local.D, local.E, local.A, local.B, R(78) ); + P( local.B, local.C, local.D, local.E, local.A, R(79) ); #undef K #undef F - ctx->state[0] += A; - ctx->state[1] += B; - ctx->state[2] += C; - ctx->state[3] += D; - ctx->state[4] += E; + ctx->state[0] += local.A; + ctx->state[1] += local.B; + ctx->state[2] += local.C; + ctx->state[3] += local.D; + ctx->state[4] += local.E; + + /* Zeroise buffers and variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &local, sizeof( local ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/sha256.c b/thirdparty/mbedtls/library/sha256.c index 5169584b68..75a8f8a2b2 100644 --- a/thirdparty/mbedtls/library/sha256.c +++ b/thirdparty/mbedtls/library/sha256.c @@ -209,77 +209,104 @@ static const uint32_t K[] = #define F0(x,y,z) (((x) & (y)) | ((z) & ((x) | (y)))) #define F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define R(t) \ - ( \ - W[t] = S1(W[(t) - 2]) + W[(t) - 7] + \ - S0(W[(t) - 15]) + W[(t) - 16] \ +#define R(t) \ + ( \ + local.W[t] = S1(local.W[(t) - 2]) + local.W[(t) - 7] + \ + S0(local.W[(t) - 15]) + local.W[(t) - 16] \ ) -#define P(a,b,c,d,e,f,g,h,x,K) \ - do \ - { \ - temp1 = (h) + S3(e) + F1((e),(f),(g)) + (K) + (x); \ - temp2 = S2(a) + F0((a),(b),(c)); \ - (d) += temp1; (h) = temp1 + temp2; \ +#define P(a,b,c,d,e,f,g,h,x,K) \ + do \ + { \ + local.temp1 = (h) + S3(e) + F1((e),(f),(g)) + (K) + (x); \ + local.temp2 = S2(a) + F0((a),(b),(c)); \ + (d) += local.temp1; (h) = local.temp1 + local.temp2; \ } while( 0 ) int mbedtls_internal_sha256_process( mbedtls_sha256_context *ctx, const unsigned char data[64] ) { - uint32_t temp1, temp2, W[64]; - uint32_t A[8]; + struct + { + uint32_t temp1, temp2, W[64]; + uint32_t A[8]; + } local; + unsigned int i; SHA256_VALIDATE_RET( ctx != NULL ); SHA256_VALIDATE_RET( (const unsigned char *)data != NULL ); for( i = 0; i < 8; i++ ) - A[i] = ctx->state[i]; + local.A[i] = ctx->state[i]; #if defined(MBEDTLS_SHA256_SMALLER) for( i = 0; i < 64; i++ ) { if( i < 16 ) - GET_UINT32_BE( W[i], data, 4 * i ); + GET_UINT32_BE( local.W[i], data, 4 * i ); else R( i ); - P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[i], K[i] ); + P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4], + local.A[5], local.A[6], local.A[7], local.W[i], K[i] ); - temp1 = A[7]; A[7] = A[6]; A[6] = A[5]; A[5] = A[4]; A[4] = A[3]; - A[3] = A[2]; A[2] = A[1]; A[1] = A[0]; A[0] = temp1; + local.temp1 = local.A[7]; local.A[7] = local.A[6]; + local.A[6] = local.A[5]; local.A[5] = local.A[4]; + local.A[4] = local.A[3]; local.A[3] = local.A[2]; + local.A[2] = local.A[1]; local.A[1] = local.A[0]; + local.A[0] = local.temp1; } #else /* MBEDTLS_SHA256_SMALLER */ for( i = 0; i < 16; i++ ) - GET_UINT32_BE( W[i], data, 4 * i ); + GET_UINT32_BE( local.W[i], data, 4 * i ); for( i = 0; i < 16; i += 8 ) { - P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], W[i+0], K[i+0] ); - P( A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], W[i+1], K[i+1] ); - P( A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], W[i+2], K[i+2] ); - P( A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], W[i+3], K[i+3] ); - P( A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], W[i+4], K[i+4] ); - P( A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], W[i+5], K[i+5] ); - P( A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], W[i+6], K[i+6] ); - P( A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], W[i+7], K[i+7] ); + P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4], + local.A[5], local.A[6], local.A[7], local.W[i+0], K[i+0] ); + P( local.A[7], local.A[0], local.A[1], local.A[2], local.A[3], + local.A[4], local.A[5], local.A[6], local.W[i+1], K[i+1] ); + P( local.A[6], local.A[7], local.A[0], local.A[1], local.A[2], + local.A[3], local.A[4], local.A[5], local.W[i+2], K[i+2] ); + P( local.A[5], local.A[6], local.A[7], local.A[0], local.A[1], + local.A[2], local.A[3], local.A[4], local.W[i+3], K[i+3] ); + P( local.A[4], local.A[5], local.A[6], local.A[7], local.A[0], + local.A[1], local.A[2], local.A[3], local.W[i+4], K[i+4] ); + P( local.A[3], local.A[4], local.A[5], local.A[6], local.A[7], + local.A[0], local.A[1], local.A[2], local.W[i+5], K[i+5] ); + P( local.A[2], local.A[3], local.A[4], local.A[5], local.A[6], + local.A[7], local.A[0], local.A[1], local.W[i+6], K[i+6] ); + P( local.A[1], local.A[2], local.A[3], local.A[4], local.A[5], + local.A[6], local.A[7], local.A[0], local.W[i+7], K[i+7] ); } for( i = 16; i < 64; i += 8 ) { - P( A[0], A[1], A[2], A[3], A[4], A[5], A[6], A[7], R(i+0), K[i+0] ); - P( A[7], A[0], A[1], A[2], A[3], A[4], A[5], A[6], R(i+1), K[i+1] ); - P( A[6], A[7], A[0], A[1], A[2], A[3], A[4], A[5], R(i+2), K[i+2] ); - P( A[5], A[6], A[7], A[0], A[1], A[2], A[3], A[4], R(i+3), K[i+3] ); - P( A[4], A[5], A[6], A[7], A[0], A[1], A[2], A[3], R(i+4), K[i+4] ); - P( A[3], A[4], A[5], A[6], A[7], A[0], A[1], A[2], R(i+5), K[i+5] ); - P( A[2], A[3], A[4], A[5], A[6], A[7], A[0], A[1], R(i+6), K[i+6] ); - P( A[1], A[2], A[3], A[4], A[5], A[6], A[7], A[0], R(i+7), K[i+7] ); + P( local.A[0], local.A[1], local.A[2], local.A[3], local.A[4], + local.A[5], local.A[6], local.A[7], R(i+0), K[i+0] ); + P( local.A[7], local.A[0], local.A[1], local.A[2], local.A[3], + local.A[4], local.A[5], local.A[6], R(i+1), K[i+1] ); + P( local.A[6], local.A[7], local.A[0], local.A[1], local.A[2], + local.A[3], local.A[4], local.A[5], R(i+2), K[i+2] ); + P( local.A[5], local.A[6], local.A[7], local.A[0], local.A[1], + local.A[2], local.A[3], local.A[4], R(i+3), K[i+3] ); + P( local.A[4], local.A[5], local.A[6], local.A[7], local.A[0], + local.A[1], local.A[2], local.A[3], R(i+4), K[i+4] ); + P( local.A[3], local.A[4], local.A[5], local.A[6], local.A[7], + local.A[0], local.A[1], local.A[2], R(i+5), K[i+5] ); + P( local.A[2], local.A[3], local.A[4], local.A[5], local.A[6], + local.A[7], local.A[0], local.A[1], R(i+6), K[i+6] ); + P( local.A[1], local.A[2], local.A[3], local.A[4], local.A[5], + local.A[6], local.A[7], local.A[0], R(i+7), K[i+7] ); } #endif /* MBEDTLS_SHA256_SMALLER */ for( i = 0; i < 8; i++ ) - ctx->state[i] += A[i]; + ctx->state[i] += local.A[i]; + + /* Zeroise buffers and variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &local, sizeof( local ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/sha512.c b/thirdparty/mbedtls/library/sha512.c index 36d5d96146..986037ab7c 100644 --- a/thirdparty/mbedtls/library/sha512.c +++ b/thirdparty/mbedtls/library/sha512.c @@ -243,8 +243,11 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx, const unsigned char data[128] ) { int i; - uint64_t temp1, temp2, W[80]; - uint64_t A, B, C, D, E, F, G, H; + struct + { + uint64_t temp1, temp2, W[80]; + uint64_t A, B, C, D, E, F, G, H; + } local; SHA512_VALIDATE_RET( ctx != NULL ); SHA512_VALIDATE_RET( (const unsigned char *)data != NULL ); @@ -261,56 +264,67 @@ int mbedtls_internal_sha512_process( mbedtls_sha512_context *ctx, #define F0(x,y,z) (((x) & (y)) | ((z) & ((x) | (y)))) #define F1(x,y,z) ((z) ^ ((x) & ((y) ^ (z)))) -#define P(a,b,c,d,e,f,g,h,x,K) \ - do \ - { \ - temp1 = (h) + S3(e) + F1((e),(f),(g)) + (K) + (x); \ - temp2 = S2(a) + F0((a),(b),(c)); \ - (d) += temp1; (h) = temp1 + temp2; \ +#define P(a,b,c,d,e,f,g,h,x,K) \ + do \ + { \ + local.temp1 = (h) + S3(e) + F1((e),(f),(g)) + (K) + (x); \ + local.temp2 = S2(a) + F0((a),(b),(c)); \ + (d) += local.temp1; (h) = local.temp1 + local.temp2; \ } while( 0 ) for( i = 0; i < 16; i++ ) { - GET_UINT64_BE( W[i], data, i << 3 ); + GET_UINT64_BE( local.W[i], data, i << 3 ); } for( ; i < 80; i++ ) { - W[i] = S1(W[i - 2]) + W[i - 7] + - S0(W[i - 15]) + W[i - 16]; + local.W[i] = S1(local.W[i - 2]) + local.W[i - 7] + + S0(local.W[i - 15]) + local.W[i - 16]; } - A = ctx->state[0]; - B = ctx->state[1]; - C = ctx->state[2]; - D = ctx->state[3]; - E = ctx->state[4]; - F = ctx->state[5]; - G = ctx->state[6]; - H = ctx->state[7]; + local.A = ctx->state[0]; + local.B = ctx->state[1]; + local.C = ctx->state[2]; + local.D = ctx->state[3]; + local.E = ctx->state[4]; + local.F = ctx->state[5]; + local.G = ctx->state[6]; + local.H = ctx->state[7]; i = 0; do { - P( A, B, C, D, E, F, G, H, W[i], K[i] ); i++; - P( H, A, B, C, D, E, F, G, W[i], K[i] ); i++; - P( G, H, A, B, C, D, E, F, W[i], K[i] ); i++; - P( F, G, H, A, B, C, D, E, W[i], K[i] ); i++; - P( E, F, G, H, A, B, C, D, W[i], K[i] ); i++; - P( D, E, F, G, H, A, B, C, W[i], K[i] ); i++; - P( C, D, E, F, G, H, A, B, W[i], K[i] ); i++; - P( B, C, D, E, F, G, H, A, W[i], K[i] ); i++; + P( local.A, local.B, local.C, local.D, local.E, + local.F, local.G, local.H, local.W[i], K[i] ); i++; + P( local.H, local.A, local.B, local.C, local.D, + local.E, local.F, local.G, local.W[i], K[i] ); i++; + P( local.G, local.H, local.A, local.B, local.C, + local.D, local.E, local.F, local.W[i], K[i] ); i++; + P( local.F, local.G, local.H, local.A, local.B, + local.C, local.D, local.E, local.W[i], K[i] ); i++; + P( local.E, local.F, local.G, local.H, local.A, + local.B, local.C, local.D, local.W[i], K[i] ); i++; + P( local.D, local.E, local.F, local.G, local.H, + local.A, local.B, local.C, local.W[i], K[i] ); i++; + P( local.C, local.D, local.E, local.F, local.G, + local.H, local.A, local.B, local.W[i], K[i] ); i++; + P( local.B, local.C, local.D, local.E, local.F, + local.G, local.H, local.A, local.W[i], K[i] ); i++; } while( i < 80 ); - ctx->state[0] += A; - ctx->state[1] += B; - ctx->state[2] += C; - ctx->state[3] += D; - ctx->state[4] += E; - ctx->state[5] += F; - ctx->state[6] += G; - ctx->state[7] += H; + ctx->state[0] += local.A; + ctx->state[1] += local.B; + ctx->state[2] += local.C; + ctx->state[3] += local.D; + ctx->state[4] += local.E; + ctx->state[5] += local.F; + ctx->state[6] += local.G; + ctx->state[7] += local.H; + + /* Zeroise buffers and variables to clear sensitive data from memory. */ + mbedtls_platform_zeroize( &local, sizeof( local ) ); return( 0 ); } diff --git a/thirdparty/mbedtls/library/ssl_srv.c b/thirdparty/mbedtls/library/ssl_srv.c index 97b778452c..cbf6142ac2 100644 --- a/thirdparty/mbedtls/library/ssl_srv.c +++ b/thirdparty/mbedtls/library/ssl_srv.c @@ -3587,11 +3587,12 @@ static int ssl_parse_encrypted_pms( mbedtls_ssl_context *ssl, /* In case of a failure in decryption, the decryption may write less than * 2 bytes of output, but we always read the first two bytes. It doesn't * matter in the end because diff will be nonzero in that case due to - * peer_pmslen being less than 48, and we only care whether diff is 0. - * But do initialize peer_pms for robustness anyway. This also makes - * memory analyzers happy (don't access uninitialized memory, even - * if it's an unsigned char). */ + * ret being nonzero, and we only care whether diff is 0. + * But do initialize peer_pms and peer_pmslen for robustness anyway. This + * also makes memory analyzers happy (don't access uninitialized memory, + * even if it's an unsigned char). */ peer_pms[0] = peer_pms[1] = ~0; + peer_pmslen = 0; ret = ssl_decrypt_encrypted_pms( ssl, p, end, peer_pms, diff --git a/thirdparty/mbedtls/library/ssl_tls.c b/thirdparty/mbedtls/library/ssl_tls.c index 2471600c9a..c749a8611c 100644 --- a/thirdparty/mbedtls/library/ssl_tls.c +++ b/thirdparty/mbedtls/library/ssl_tls.c @@ -621,7 +621,7 @@ static void ssl_calc_finished_tls( mbedtls_ssl_context *, unsigned char *, int ) #if defined(MBEDTLS_SSL_PROTO_TLS1_2) #if defined(MBEDTLS_SHA256_C) static void ssl_update_checksum_sha256( mbedtls_ssl_context *, const unsigned char *, size_t ); -static void ssl_calc_verify_tls_sha256( mbedtls_ssl_context *,unsigned char * ); +static void ssl_calc_verify_tls_sha256( mbedtls_ssl_context *, unsigned char * ); static void ssl_calc_finished_tls_sha256( mbedtls_ssl_context *,unsigned char *, int ); #endif @@ -1142,7 +1142,7 @@ int mbedtls_ssl_derive_keys( mbedtls_ssl_context *ssl ) } #if defined(MBEDTLS_SSL_PROTO_SSL3) -void ssl_calc_verify_ssl( mbedtls_ssl_context *ssl, unsigned char hash[36] ) +void ssl_calc_verify_ssl( mbedtls_ssl_context *ssl, unsigned char *hash ) { mbedtls_md5_context md5; mbedtls_sha1_context sha1; @@ -1191,7 +1191,7 @@ void ssl_calc_verify_ssl( mbedtls_ssl_context *ssl, unsigned char hash[36] ) #endif /* MBEDTLS_SSL_PROTO_SSL3 */ #if defined(MBEDTLS_SSL_PROTO_TLS1) || defined(MBEDTLS_SSL_PROTO_TLS1_1) -void ssl_calc_verify_tls( mbedtls_ssl_context *ssl, unsigned char hash[36] ) +void ssl_calc_verify_tls( mbedtls_ssl_context *ssl, unsigned char *hash ) { mbedtls_md5_context md5; mbedtls_sha1_context sha1; @@ -1219,7 +1219,7 @@ void ssl_calc_verify_tls( mbedtls_ssl_context *ssl, unsigned char hash[36] ) #if defined(MBEDTLS_SSL_PROTO_TLS1_2) #if defined(MBEDTLS_SHA256_C) -void ssl_calc_verify_tls_sha256( mbedtls_ssl_context *ssl, unsigned char hash[32] ) +void ssl_calc_verify_tls_sha256( mbedtls_ssl_context *ssl, unsigned char *hash ) { mbedtls_sha256_context sha256; @@ -1240,7 +1240,7 @@ void ssl_calc_verify_tls_sha256( mbedtls_ssl_context *ssl, unsigned char hash[32 #endif /* MBEDTLS_SHA256_C */ #if defined(MBEDTLS_SHA512_C) -void ssl_calc_verify_tls_sha384( mbedtls_ssl_context *ssl, unsigned char hash[48] ) +void ssl_calc_verify_tls_sha384( mbedtls_ssl_context *ssl, unsigned char *hash ) { mbedtls_sha512_context sha512; @@ -6363,6 +6363,9 @@ static void ssl_calc_finished_tls_sha256( #endif /* MBEDTLS_SHA256_C */ #if defined(MBEDTLS_SHA512_C) + +typedef int (*finish_sha384_t)(mbedtls_sha512_context*, unsigned char*); + static void ssl_calc_finished_tls_sha384( mbedtls_ssl_context *ssl, unsigned char *buf, int from ) { @@ -6370,6 +6373,12 @@ static void ssl_calc_finished_tls_sha384( const char *sender; mbedtls_sha512_context sha512; unsigned char padbuf[48]; + /* + * For SHA-384, we can save 16 bytes by keeping padbuf 48 bytes long. + * However, to avoid stringop-overflow warning in gcc, we have to cast + * mbedtls_sha512_finish_ret(). + */ + finish_sha384_t finish_sha384 = (finish_sha384_t)mbedtls_sha512_finish_ret; mbedtls_ssl_session *session = ssl->session_negotiate; if( !session ) @@ -6396,7 +6405,7 @@ static void ssl_calc_finished_tls_sha384( ? "client finished" : "server finished"; - mbedtls_sha512_finish_ret( &sha512, padbuf ); + finish_sha384( &sha512, padbuf ); ssl->handshake->tls_prf( session->master, 48, sender, padbuf, 48, buf, len ); diff --git a/thirdparty/mbedtls/library/threading.c b/thirdparty/mbedtls/library/threading.c index 61c4b94041..f4f29cff5e 100644 --- a/thirdparty/mbedtls/library/threading.c +++ b/thirdparty/mbedtls/library/threading.c @@ -73,7 +73,7 @@ #if !( ( defined(_POSIX_VERSION) && _POSIX_VERSION >= 200809L ) || \ ( defined(_POSIX_THREAD_SAFE_FUNCTIONS ) && \ - _POSIX_THREAD_SAFE_FUNCTIONS >= 20112L ) ) + _POSIX_THREAD_SAFE_FUNCTIONS >= 200112L ) ) /* * This is a convenience shorthand macro to avoid checking the long * preprocessor conditions above. Ideally, we could expose this macro in @@ -88,7 +88,7 @@ #endif /* !( ( defined(_POSIX_VERSION) && _POSIX_VERSION >= 200809L ) || \ ( defined(_POSIX_THREAD_SAFE_FUNCTIONS ) && \ - _POSIX_THREAD_SAFE_FUNCTIONS >= 20112L ) ) */ + _POSIX_THREAD_SAFE_FUNCTIONS >= 200112L ) ) */ #endif /* MBEDTLS_HAVE_TIME_DATE && !MBEDTLS_PLATFORM_GMTIME_R_ALT */ diff --git a/thirdparty/mbedtls/library/x509_crt.c b/thirdparty/mbedtls/library/x509_crt.c index de40eaaf58..c458c25ff4 100644 --- a/thirdparty/mbedtls/library/x509_crt.c +++ b/thirdparty/mbedtls/library/x509_crt.c @@ -1101,6 +1101,7 @@ static int x509_crt_parse_der_core( mbedtls_x509_crt *crt, const unsigned char * if( crt->sig_oid.len != sig_oid2.len || memcmp( crt->sig_oid.p, sig_oid2.p, crt->sig_oid.len ) != 0 || + sig_params1.tag != sig_params2.tag || sig_params1.len != sig_params2.len || ( sig_params1.len != 0 && memcmp( sig_params1.p, sig_params2.p, sig_params1.len ) != 0 ) ) diff --git a/thirdparty/meshoptimizer/LICENSE.md b/thirdparty/meshoptimizer/LICENSE.md new file mode 100644 index 0000000000..4fcd766d22 --- /dev/null +++ b/thirdparty/meshoptimizer/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2016-2020 Arseny Kapoulkine + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/thirdparty/meshoptimizer/allocator.cpp b/thirdparty/meshoptimizer/allocator.cpp new file mode 100644 index 0000000000..da7cc540b2 --- /dev/null +++ b/thirdparty/meshoptimizer/allocator.cpp @@ -0,0 +1,8 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*)) +{ + meshopt_Allocator::Storage::allocate = allocate; + meshopt_Allocator::Storage::deallocate = deallocate; +} diff --git a/thirdparty/meshoptimizer/clusterizer.cpp b/thirdparty/meshoptimizer/clusterizer.cpp new file mode 100644 index 0000000000..f7d88c5136 --- /dev/null +++ b/thirdparty/meshoptimizer/clusterizer.cpp @@ -0,0 +1,351 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <math.h> +#include <string.h> + +// This work is based on: +// Graham Wihlidal. Optimizing the Graphics Pipeline with Compute. 2016 +// Matthaeus Chajdas. GeometryFX 1.2 - Cluster Culling. 2016 +// Jack Ritter. An Efficient Bounding Sphere. 1990 +namespace meshopt +{ + +static void computeBoundingSphere(float result[4], const float points[][3], size_t count) +{ + assert(count > 0); + + // find extremum points along all 3 axes; for each axis we get a pair of points with min/max coordinates + size_t pmin[3] = {0, 0, 0}; + size_t pmax[3] = {0, 0, 0}; + + for (size_t i = 0; i < count; ++i) + { + const float* p = points[i]; + + for (int axis = 0; axis < 3; ++axis) + { + pmin[axis] = (p[axis] < points[pmin[axis]][axis]) ? i : pmin[axis]; + pmax[axis] = (p[axis] > points[pmax[axis]][axis]) ? i : pmax[axis]; + } + } + + // find the pair of points with largest distance + float paxisd2 = 0; + int paxis = 0; + + for (int axis = 0; axis < 3; ++axis) + { + const float* p1 = points[pmin[axis]]; + const float* p2 = points[pmax[axis]]; + + float d2 = (p2[0] - p1[0]) * (p2[0] - p1[0]) + (p2[1] - p1[1]) * (p2[1] - p1[1]) + (p2[2] - p1[2]) * (p2[2] - p1[2]); + + if (d2 > paxisd2) + { + paxisd2 = d2; + paxis = axis; + } + } + + // use the longest segment as the initial sphere diameter + const float* p1 = points[pmin[paxis]]; + const float* p2 = points[pmax[paxis]]; + + float center[3] = {(p1[0] + p2[0]) / 2, (p1[1] + p2[1]) / 2, (p1[2] + p2[2]) / 2}; + float radius = sqrtf(paxisd2) / 2; + + // iteratively adjust the sphere up until all points fit + for (size_t i = 0; i < count; ++i) + { + const float* p = points[i]; + float d2 = (p[0] - center[0]) * (p[0] - center[0]) + (p[1] - center[1]) * (p[1] - center[1]) + (p[2] - center[2]) * (p[2] - center[2]); + + if (d2 > radius * radius) + { + float d = sqrtf(d2); + assert(d > 0); + + float k = 0.5f + (radius / d) / 2; + + center[0] = center[0] * k + p[0] * (1 - k); + center[1] = center[1] * k + p[1] * (1 - k); + center[2] = center[2] * k + p[2] * (1 - k); + radius = (radius + d) / 2; + } + } + + result[0] = center[0]; + result[1] = center[1]; + result[2] = center[2]; + result[3] = radius; +} + +} // namespace meshopt + +size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles) +{ + assert(index_count % 3 == 0); + assert(max_vertices >= 3); + assert(max_triangles >= 1); + + // meshlet construction is limited by max vertices and max triangles per meshlet + // the worst case is that the input is an unindexed stream since this equally stresses both limits + // note that we assume that in the worst case, we leave 2 vertices unpacked in each meshlet - if we have space for 3 we can pack any triangle + size_t max_vertices_conservative = max_vertices - 2; + size_t meshlet_limit_vertices = (index_count + max_vertices_conservative - 1) / max_vertices_conservative; + size_t meshlet_limit_triangles = (index_count / 3 + max_triangles - 1) / max_triangles; + + return meshlet_limit_vertices > meshlet_limit_triangles ? meshlet_limit_vertices : meshlet_limit_triangles; +} + +size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + assert(index_count % 3 == 0); + assert(max_vertices >= 3); + assert(max_triangles >= 1); + + meshopt_Allocator allocator; + + meshopt_Meshlet meshlet; + memset(&meshlet, 0, sizeof(meshlet)); + + assert(max_vertices <= sizeof(meshlet.vertices) / sizeof(meshlet.vertices[0])); + assert(max_triangles <= sizeof(meshlet.indices) / 3); + + // index of the vertex in the meshlet, 0xff if the vertex isn't used + unsigned char* used = allocator.allocate<unsigned char>(vertex_count); + memset(used, -1, vertex_count); + + size_t offset = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + unsigned char& av = used[a]; + unsigned char& bv = used[b]; + unsigned char& cv = used[c]; + + unsigned int used_extra = (av == 0xff) + (bv == 0xff) + (cv == 0xff); + + if (meshlet.vertex_count + used_extra > max_vertices || meshlet.triangle_count >= max_triangles) + { + destination[offset++] = meshlet; + + for (size_t j = 0; j < meshlet.vertex_count; ++j) + used[meshlet.vertices[j]] = 0xff; + + memset(&meshlet, 0, sizeof(meshlet)); + } + + if (av == 0xff) + { + av = meshlet.vertex_count; + meshlet.vertices[meshlet.vertex_count++] = a; + } + + if (bv == 0xff) + { + bv = meshlet.vertex_count; + meshlet.vertices[meshlet.vertex_count++] = b; + } + + if (cv == 0xff) + { + cv = meshlet.vertex_count; + meshlet.vertices[meshlet.vertex_count++] = c; + } + + meshlet.indices[meshlet.triangle_count][0] = av; + meshlet.indices[meshlet.triangle_count][1] = bv; + meshlet.indices[meshlet.triangle_count][2] = cv; + meshlet.triangle_count++; + } + + if (meshlet.triangle_count) + destination[offset++] = meshlet; + + assert(offset <= meshopt_buildMeshletsBound(index_count, max_vertices, max_triangles)); + + return offset; +} + +meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + assert(index_count / 3 <= 256); + + (void)vertex_count; + + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + // compute triangle normals and gather triangle corners + float normals[256][3]; + float corners[256][3][3]; + size_t triangles = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* p0 = vertex_positions + vertex_stride_float * a; + const float* p1 = vertex_positions + vertex_stride_float * b; + const float* p2 = vertex_positions + vertex_stride_float * c; + + float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; + + float normalx = p10[1] * p20[2] - p10[2] * p20[1]; + float normaly = p10[2] * p20[0] - p10[0] * p20[2]; + float normalz = p10[0] * p20[1] - p10[1] * p20[0]; + + float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); + + // no need to include degenerate triangles - they will be invisible anyway + if (area == 0.f) + continue; + + // record triangle normals & corners for future use; normal and corner 0 define a plane equation + normals[triangles][0] = normalx / area; + normals[triangles][1] = normaly / area; + normals[triangles][2] = normalz / area; + memcpy(corners[triangles][0], p0, 3 * sizeof(float)); + memcpy(corners[triangles][1], p1, 3 * sizeof(float)); + memcpy(corners[triangles][2], p2, 3 * sizeof(float)); + triangles++; + } + + meshopt_Bounds bounds = {}; + + // degenerate cluster, no valid triangles => trivial reject (cone data is 0) + if (triangles == 0) + return bounds; + + // compute cluster bounding sphere; we'll use the center to determine normal cone apex as well + float psphere[4] = {}; + computeBoundingSphere(psphere, corners[0], triangles * 3); + + float center[3] = {psphere[0], psphere[1], psphere[2]}; + + // treating triangle normals as points, find the bounding sphere - the sphere center determines the optimal cone axis + float nsphere[4] = {}; + computeBoundingSphere(nsphere, normals, triangles); + + float axis[3] = {nsphere[0], nsphere[1], nsphere[2]}; + float axislength = sqrtf(axis[0] * axis[0] + axis[1] * axis[1] + axis[2] * axis[2]); + float invaxislength = axislength == 0.f ? 0.f : 1.f / axislength; + + axis[0] *= invaxislength; + axis[1] *= invaxislength; + axis[2] *= invaxislength; + + // compute a tight cone around all normals, mindp = cos(angle/2) + float mindp = 1.f; + + for (size_t i = 0; i < triangles; ++i) + { + float dp = normals[i][0] * axis[0] + normals[i][1] * axis[1] + normals[i][2] * axis[2]; + + mindp = (dp < mindp) ? dp : mindp; + } + + // fill bounding sphere info; note that below we can return bounds without cone information for degenerate cones + bounds.center[0] = center[0]; + bounds.center[1] = center[1]; + bounds.center[2] = center[2]; + bounds.radius = psphere[3]; + + // degenerate cluster, normal cone is larger than a hemisphere => trivial accept + // note that if mindp is positive but close to 0, the triangle intersection code below gets less stable + // we arbitrarily decide that if a normal cone is ~168 degrees wide or more, the cone isn't useful + if (mindp <= 0.1f) + { + bounds.cone_cutoff = 1; + bounds.cone_cutoff_s8 = 127; + return bounds; + } + + float maxt = 0; + + // we need to find the point on center-t*axis ray that lies in negative half-space of all triangles + for (size_t i = 0; i < triangles; ++i) + { + // dot(center-t*axis-corner, trinormal) = 0 + // dot(center-corner, trinormal) - t * dot(axis, trinormal) = 0 + float cx = center[0] - corners[i][0][0]; + float cy = center[1] - corners[i][0][1]; + float cz = center[2] - corners[i][0][2]; + + float dc = cx * normals[i][0] + cy * normals[i][1] + cz * normals[i][2]; + float dn = axis[0] * normals[i][0] + axis[1] * normals[i][1] + axis[2] * normals[i][2]; + + // dn should be larger than mindp cutoff above + assert(dn > 0.f); + float t = dc / dn; + + maxt = (t > maxt) ? t : maxt; + } + + // cone apex should be in the negative half-space of all cluster triangles by construction + bounds.cone_apex[0] = center[0] - axis[0] * maxt; + bounds.cone_apex[1] = center[1] - axis[1] * maxt; + bounds.cone_apex[2] = center[2] - axis[2] * maxt; + + // note: this axis is the axis of the normal cone, but our test for perspective camera effectively negates the axis + bounds.cone_axis[0] = axis[0]; + bounds.cone_axis[1] = axis[1]; + bounds.cone_axis[2] = axis[2]; + + // cos(a) for normal cone is mindp; we need to add 90 degrees on both sides and invert the cone + // which gives us -cos(a+90) = -(-sin(a)) = sin(a) = sqrt(1 - cos^2(a)) + bounds.cone_cutoff = sqrtf(1 - mindp * mindp); + + // quantize axis & cutoff to 8-bit SNORM format + bounds.cone_axis_s8[0] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[0], 8)); + bounds.cone_axis_s8[1] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[1], 8)); + bounds.cone_axis_s8[2] = (signed char)(meshopt_quantizeSnorm(bounds.cone_axis[2], 8)); + + // for the 8-bit test to be conservative, we need to adjust the cutoff by measuring the max. error + float cone_axis_s8_e0 = fabsf(bounds.cone_axis_s8[0] / 127.f - bounds.cone_axis[0]); + float cone_axis_s8_e1 = fabsf(bounds.cone_axis_s8[1] / 127.f - bounds.cone_axis[1]); + float cone_axis_s8_e2 = fabsf(bounds.cone_axis_s8[2] / 127.f - bounds.cone_axis[2]); + + // note that we need to round this up instead of rounding to nearest, hence +1 + int cone_cutoff_s8 = int(127 * (bounds.cone_cutoff + cone_axis_s8_e0 + cone_axis_s8_e1 + cone_axis_s8_e2) + 1); + + bounds.cone_cutoff_s8 = (cone_cutoff_s8 > 127) ? 127 : (signed char)(cone_cutoff_s8); + + return bounds; +} + +meshopt_Bounds meshopt_computeMeshletBounds(const meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + unsigned int indices[sizeof(meshlet->indices) / sizeof(meshlet->indices[0][0])]; + + for (size_t i = 0; i < meshlet->triangle_count; ++i) + { + unsigned int a = meshlet->vertices[meshlet->indices[i][0]]; + unsigned int b = meshlet->vertices[meshlet->indices[i][1]]; + unsigned int c = meshlet->vertices[meshlet->indices[i][2]]; + + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + indices[i * 3 + 0] = a; + indices[i * 3 + 1] = b; + indices[i * 3 + 2] = c; + } + + return meshopt_computeClusterBounds(indices, meshlet->triangle_count * 3, vertex_positions, vertex_count, vertex_positions_stride); +} diff --git a/thirdparty/meshoptimizer/indexcodec.cpp b/thirdparty/meshoptimizer/indexcodec.cpp new file mode 100644 index 0000000000..eeb541e5be --- /dev/null +++ b/thirdparty/meshoptimizer/indexcodec.cpp @@ -0,0 +1,752 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include <stdio.h> +#endif + +// This work is based on: +// Fabian Giesen. Simple lossless index buffer compression & follow-up. 2013 +// Conor Stokes. Vertex Cache Optimised Index Buffer Compression. 2014 +namespace meshopt +{ + +const unsigned char kIndexHeader = 0xe0; +const unsigned char kSequenceHeader = 0xd0; + +static int gEncodeIndexVersion = 0; + +typedef unsigned int VertexFifo[16]; +typedef unsigned int EdgeFifo[16][2]; + +static const unsigned int kTriangleIndexOrder[3][3] = { + {0, 1, 2}, + {1, 2, 0}, + {2, 0, 1}, +}; + +static const unsigned char kCodeAuxEncodingTable[16] = { + 0x00, 0x76, 0x87, 0x56, 0x67, 0x78, 0xa9, 0x86, 0x65, 0x89, 0x68, 0x98, 0x01, 0x69, + 0, 0, // last two entries aren't used for encoding +}; + +static int rotateTriangle(unsigned int a, unsigned int b, unsigned int c, unsigned int next) +{ + (void)a; + + return (b == next) ? 1 : (c == next) ? 2 : 0; +} + +static int getEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, unsigned int c, size_t offset) +{ + for (int i = 0; i < 16; ++i) + { + size_t index = (offset - 1 - i) & 15; + + unsigned int e0 = fifo[index][0]; + unsigned int e1 = fifo[index][1]; + + if (e0 == a && e1 == b) + return (i << 2) | 0; + if (e0 == b && e1 == c) + return (i << 2) | 1; + if (e0 == c && e1 == a) + return (i << 2) | 2; + } + + return -1; +} + +static void pushEdgeFifo(EdgeFifo fifo, unsigned int a, unsigned int b, size_t& offset) +{ + fifo[offset][0] = a; + fifo[offset][1] = b; + offset = (offset + 1) & 15; +} + +static int getVertexFifo(VertexFifo fifo, unsigned int v, size_t offset) +{ + for (int i = 0; i < 16; ++i) + { + size_t index = (offset - 1 - i) & 15; + + if (fifo[index] == v) + return i; + } + + return -1; +} + +static void pushVertexFifo(VertexFifo fifo, unsigned int v, size_t& offset, int cond = 1) +{ + fifo[offset] = v; + offset = (offset + cond) & 15; +} + +static void encodeVByte(unsigned char*& data, unsigned int v) +{ + // encode 32-bit value in up to 5 7-bit groups + do + { + *data++ = (v & 127) | (v > 127 ? 128 : 0); + v >>= 7; + } while (v); +} + +static unsigned int decodeVByte(const unsigned char*& data) +{ + unsigned char lead = *data++; + + // fast path: single byte + if (lead < 128) + return lead; + + // slow path: up to 4 extra bytes + // note that this loop always terminates, which is important for malformed data + unsigned int result = lead & 127; + unsigned int shift = 7; + + for (int i = 0; i < 4; ++i) + { + unsigned char group = *data++; + result |= (group & 127) << shift; + shift += 7; + + if (group < 128) + break; + } + + return result; +} + +static void encodeIndex(unsigned char*& data, unsigned int index, unsigned int last) +{ + unsigned int d = index - last; + unsigned int v = (d << 1) ^ (int(d) >> 31); + + encodeVByte(data, v); +} + +static unsigned int decodeIndex(const unsigned char*& data, unsigned int last) +{ + unsigned int v = decodeVByte(data); + unsigned int d = (v >> 1) ^ -int(v & 1); + + return last + d; +} + +static int getCodeAuxIndex(unsigned char v, const unsigned char* table) +{ + for (int i = 0; i < 16; ++i) + if (table[i] == v) + return i; + + return -1; +} + +static void writeTriangle(void* destination, size_t offset, size_t index_size, unsigned int a, unsigned int b, unsigned int c) +{ + if (index_size == 2) + { + static_cast<unsigned short*>(destination)[offset + 0] = (unsigned short)(a); + static_cast<unsigned short*>(destination)[offset + 1] = (unsigned short)(b); + static_cast<unsigned short*>(destination)[offset + 2] = (unsigned short)(c); + } + else + { + static_cast<unsigned int*>(destination)[offset + 0] = a; + static_cast<unsigned int*>(destination)[offset + 1] = b; + static_cast<unsigned int*>(destination)[offset + 2] = c; + } +} + +#if TRACE +static size_t sortTop16(unsigned char dest[16], size_t stats[256]) +{ + size_t destsize = 0; + + for (size_t i = 0; i < 256; ++i) + { + size_t j = 0; + for (; j < destsize; ++j) + { + if (stats[i] >= stats[dest[j]]) + { + if (destsize < 16) + destsize++; + + memmove(&dest[j + 1], &dest[j], destsize - 1 - j); + dest[j] = (unsigned char)i; + break; + } + } + + if (j == destsize && destsize < 16) + { + dest[destsize] = (unsigned char)i; + destsize++; + } + } + + return destsize; +} +#endif + +} // namespace meshopt + +size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + +#if TRACE + size_t codestats[256] = {}; + size_t codeauxstats[256] = {}; +#endif + + // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table + if (buffer_size < 1 + index_count / 3 + 16) + return 0; + + int version = gEncodeIndexVersion; + + buffer[0] = (unsigned char)(kIndexHeader | version); + + EdgeFifo edgefifo; + memset(edgefifo, -1, sizeof(edgefifo)); + + VertexFifo vertexfifo; + memset(vertexfifo, -1, sizeof(vertexfifo)); + + size_t edgefifooffset = 0; + size_t vertexfifooffset = 0; + + unsigned int next = 0; + unsigned int last = 0; + + unsigned char* code = buffer + 1; + unsigned char* data = code + index_count / 3; + unsigned char* data_safe_end = buffer + buffer_size - 16; + + int fecmax = version >= 1 ? 13 : 15; + + // use static encoding table; it's possible to pack the result and then build an optimal table and repack + // for now we keep it simple and use the table that has been generated based on symbol frequency on a training mesh set + const unsigned char* codeaux_table = kCodeAuxEncodingTable; + + for (size_t i = 0; i < index_count; i += 3) + { + // make sure we have enough space to write a triangle + // each triangle writes at most 16 bytes: 1b for codeaux and 5b for each free index + // after this we can be sure we can write without extra bounds checks + if (data > data_safe_end) + return 0; + + int fer = getEdgeFifo(edgefifo, indices[i + 0], indices[i + 1], indices[i + 2], edgefifooffset); + + if (fer >= 0 && (fer >> 2) < 15) + { + const unsigned int* order = kTriangleIndexOrder[fer & 3]; + + unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; + + // encode edge index and vertex fifo index, next or free index + int fe = fer >> 2; + int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); + + int fec = (fc >= 1 && fc < fecmax) ? fc : (c == next) ? (next++, 0) : 15; + + if (fec == 15 && version >= 1) + { + // encode last-1 and last+1 to optimize strip-like sequences + if (c + 1 == last) + fec = 13, last = c; + if (c == last + 1) + fec = 14, last = c; + } + + *code++ = (unsigned char)((fe << 4) | fec); + +#if TRACE + codestats[code[-1]]++; +#endif + + // note that we need to update the last index since free indices are delta-encoded + if (fec == 15) + encodeIndex(data, c, last), last = c; + + // we only need to push third vertex since first two are likely already in the vertex fifo + if (fec == 0 || fec >= fecmax) + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + // we only need to push two new edges to edge fifo since the third one is already there + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + int rotation = rotateTriangle(indices[i + 0], indices[i + 1], indices[i + 2], next); + const unsigned int* order = kTriangleIndexOrder[rotation]; + + unsigned int a = indices[i + order[0]], b = indices[i + order[1]], c = indices[i + order[2]]; + + // if a/b/c are 0/1/2, we emit a reset code + bool reset = false; + + if (a == 0 && b == 1 && c == 2 && next > 0 && version >= 1) + { + reset = true; + next = 0; + + // reset vertex fifo to make sure we don't accidentally reference vertices from that in the future + // this makes sure next continues to get incremented instead of being stuck + memset(vertexfifo, -1, sizeof(vertexfifo)); + } + + int fb = getVertexFifo(vertexfifo, b, vertexfifooffset); + int fc = getVertexFifo(vertexfifo, c, vertexfifooffset); + + // after rotation, a is almost always equal to next, so we don't waste bits on FIFO encoding for a + int fea = (a == next) ? (next++, 0) : 15; + int feb = (fb >= 0 && fb < 14) ? (fb + 1) : (b == next) ? (next++, 0) : 15; + int fec = (fc >= 0 && fc < 14) ? (fc + 1) : (c == next) ? (next++, 0) : 15; + + // we encode feb & fec in 4 bits using a table if possible, and as a full byte otherwise + unsigned char codeaux = (unsigned char)((feb << 4) | fec); + int codeauxindex = getCodeAuxIndex(codeaux, codeaux_table); + + // <14 encodes an index into codeaux table, 14 encodes fea=0, 15 encodes fea=15 + if (fea == 0 && codeauxindex >= 0 && codeauxindex < 14 && !reset) + { + *code++ = (unsigned char)((15 << 4) | codeauxindex); + } + else + { + *code++ = (unsigned char)((15 << 4) | 14 | fea); + *data++ = codeaux; + } + +#if TRACE + codestats[code[-1]]++; + codeauxstats[codeaux]++; +#endif + + // note that we need to update the last index since free indices are delta-encoded + if (fea == 15) + encodeIndex(data, a, last), last = a; + + if (feb == 15) + encodeIndex(data, b, last), last = b; + + if (fec == 15) + encodeIndex(data, c, last), last = c; + + // only push vertices that weren't already in fifo + if (fea == 0 || fea == 15) + pushVertexFifo(vertexfifo, a, vertexfifooffset); + + if (feb == 0 || feb == 15) + pushVertexFifo(vertexfifo, b, vertexfifooffset); + + if (fec == 0 || fec == 15) + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + // all three edges aren't in the fifo; pushing all of them is important so that we can match them for later triangles + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + + // make sure we have enough space to write codeaux table + if (data > data_safe_end) + return 0; + + // add codeaux encoding table to the end of the stream; this is used for decoding codeaux *and* as padding + // we need padding for decoding to be able to assume that each triangle is encoded as <= 16 bytes of extra data + // this is enough space for aux byte + 5 bytes per varint index which is the absolute worst case for any input + for (size_t i = 0; i < 16; ++i) + { + // decoder assumes that table entries never refer to separately encoded indices + assert((codeaux_table[i] & 0xf) != 0xf && (codeaux_table[i] >> 4) != 0xf); + + *data++ = codeaux_table[i]; + } + + // since we encode restarts as codeaux without a table reference, we need to make sure 00 is encoded as a table reference + assert(codeaux_table[0] == 0); + + assert(data >= buffer + index_count / 3 + 16); + assert(data <= buffer + buffer_size); + +#if TRACE + unsigned char codetop[16], codeauxtop[16]; + size_t codetopsize = sortTop16(codetop, codestats); + size_t codeauxtopsize = sortTop16(codeauxtop, codeauxstats); + + size_t sumcode = 0, sumcodeaux = 0; + for (size_t i = 0; i < 256; ++i) + sumcode += codestats[i], sumcodeaux += codeauxstats[i]; + + size_t acccode = 0, acccodeaux = 0; + + printf("code\t\t\t\t\tcodeaux\n"); + + for (size_t i = 0; i < codetopsize && i < codeauxtopsize; ++i) + { + acccode += codestats[codetop[i]]; + acccodeaux += codeauxstats[codeauxtop[i]]; + + printf("%2d: %02x = %d (%.1f%% ..%.1f%%)\t\t%2d: %02x = %d (%.1f%% ..%.1f%%)\n", + int(i), codetop[i], int(codestats[codetop[i]]), double(codestats[codetop[i]]) / double(sumcode) * 100, double(acccode) / double(sumcode) * 100, + int(i), codeauxtop[i], int(codeauxstats[codeauxtop[i]]), double(codeauxstats[codeauxtop[i]]) / double(sumcodeaux) * 100, double(acccodeaux) / double(sumcodeaux) * 100); + } +#endif + + return data - buffer; +} + +size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count) +{ + assert(index_count % 3 == 0); + + // compute number of bits required for each index + unsigned int vertex_bits = 1; + + while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) + vertex_bits++; + + // worst-case encoding is 2 header bytes + 3 varint-7 encoded index deltas + unsigned int vertex_groups = (vertex_bits + 1 + 6) / 7; + + return 1 + (index_count / 3) * (2 + 3 * vertex_groups) + 16; +} + +void meshopt_encodeIndexVersion(int version) +{ + assert(unsigned(version) <= 1); + + meshopt::gEncodeIndexVersion = version; +} + +int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(index_size == 2 || index_size == 4); + + // the minimum valid encoding is header, 1 byte per triangle and a 16-byte codeaux table + if (buffer_size < 1 + index_count / 3 + 16) + return -2; + + if ((buffer[0] & 0xf0) != kIndexHeader) + return -1; + + int version = buffer[0] & 0x0f; + if (version > 1) + return -1; + + EdgeFifo edgefifo; + memset(edgefifo, -1, sizeof(edgefifo)); + + VertexFifo vertexfifo; + memset(vertexfifo, -1, sizeof(vertexfifo)); + + size_t edgefifooffset = 0; + size_t vertexfifooffset = 0; + + unsigned int next = 0; + unsigned int last = 0; + + int fecmax = version >= 1 ? 13 : 15; + + // since we store 16-byte codeaux table at the end, triangle data has to begin before data_safe_end + const unsigned char* code = buffer + 1; + const unsigned char* data = code + index_count / 3; + const unsigned char* data_safe_end = buffer + buffer_size - 16; + + const unsigned char* codeaux_table = data_safe_end; + + for (size_t i = 0; i < index_count; i += 3) + { + // make sure we have enough data to read for a triangle + // each triangle reads at most 16 bytes of data: 1b for codeaux and 5b for each free index + // after this we can be sure we can read without extra bounds checks + if (data > data_safe_end) + return -2; + + unsigned char codetri = *code++; + + if (codetri < 0xf0) + { + int fe = codetri >> 4; + + // fifo reads are wrapped around 16 entry buffer + unsigned int a = edgefifo[(edgefifooffset - 1 - fe) & 15][0]; + unsigned int b = edgefifo[(edgefifooffset - 1 - fe) & 15][1]; + + int fec = codetri & 15; + + // note: this is the most common path in the entire decoder + // inside this if we try to stay branchless (by using cmov/etc.) since these aren't predictable + if (fec < fecmax) + { + // fifo reads are wrapped around 16 entry buffer + unsigned int cf = vertexfifo[(vertexfifooffset - 1 - fec) & 15]; + unsigned int c = (fec == 0) ? next : cf; + + int fec0 = fec == 0; + next += fec0; + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); + + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + unsigned int c = 0; + + // fec - (fec ^ 3) decodes 13, 14 into -1, 1 + // note that we need to update the last index since free indices are delta-encoded + last = c = (fec != 15) ? last + (fec - (fec ^ 3)) : decodeIndex(data, last); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, c, vertexfifooffset); + + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + else + { + // fast path: read codeaux from the table + if (codetri < 0xfe) + { + unsigned char codeaux = codeaux_table[codetri & 15]; + + // note: table can't contain feb/fec=15 + int feb = codeaux >> 4; + int fec = codeaux & 15; + + // fifo reads are wrapped around 16 entry buffer + // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior + unsigned int a = next++; + + unsigned int bf = vertexfifo[(vertexfifooffset - feb) & 15]; + unsigned int b = (feb == 0) ? next : bf; + + int feb0 = feb == 0; + next += feb0; + + unsigned int cf = vertexfifo[(vertexfifooffset - fec) & 15]; + unsigned int c = (fec == 0) ? next : cf; + + int fec0 = fec == 0; + next += fec0; + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, a, vertexfifooffset); + pushVertexFifo(vertexfifo, b, vertexfifooffset, feb0); + pushVertexFifo(vertexfifo, c, vertexfifooffset, fec0); + + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + else + { + // slow path: read a full byte for codeaux instead of using a table lookup + unsigned char codeaux = *data++; + + int fea = codetri == 0xfe ? 0 : 15; + int feb = codeaux >> 4; + int fec = codeaux & 15; + + // reset: codeaux is 0 but encoded as not-a-table + if (codeaux == 0) + next = 0; + + // fifo reads are wrapped around 16 entry buffer + // also note that we increment next for all three vertices before decoding indices - this matches encoder behavior + unsigned int a = (fea == 0) ? next++ : 0; + unsigned int b = (feb == 0) ? next++ : vertexfifo[(vertexfifooffset - feb) & 15]; + unsigned int c = (fec == 0) ? next++ : vertexfifo[(vertexfifooffset - fec) & 15]; + + // note that we need to update the last index since free indices are delta-encoded + if (fea == 15) + last = a = decodeIndex(data, last); + + if (feb == 15) + last = b = decodeIndex(data, last); + + if (fec == 15) + last = c = decodeIndex(data, last); + + // output triangle + writeTriangle(destination, i, index_size, a, b, c); + + // push vertex/edge fifo must match the encoding step *exactly* otherwise the data will not be decoded correctly + pushVertexFifo(vertexfifo, a, vertexfifooffset); + pushVertexFifo(vertexfifo, b, vertexfifooffset, (feb == 0) | (feb == 15)); + pushVertexFifo(vertexfifo, c, vertexfifooffset, (fec == 0) | (fec == 15)); + + pushEdgeFifo(edgefifo, b, a, edgefifooffset); + pushEdgeFifo(edgefifo, c, b, edgefifooffset); + pushEdgeFifo(edgefifo, a, c, edgefifooffset); + } + } + } + + // we should've read all data bytes and stopped at the boundary between data and codeaux table + if (data != data_safe_end) + return -3; + + return 0; +} + +size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count) +{ + using namespace meshopt; + + // the minimum valid encoding is header, 1 byte per index and a 4-byte tail + if (buffer_size < 1 + index_count + 4) + return 0; + + int version = gEncodeIndexVersion; + + buffer[0] = (unsigned char)(kSequenceHeader | version); + + unsigned int last[2] = {}; + unsigned int current = 0; + + unsigned char* data = buffer + 1; + unsigned char* data_safe_end = buffer + buffer_size - 4; + + for (size_t i = 0; i < index_count; ++i) + { + // make sure we have enough data to write + // each index writes at most 5 bytes of data; there's a 4 byte tail after data_safe_end + // after this we can be sure we can write without extra bounds checks + if (data >= data_safe_end) + return 0; + + unsigned int index = indices[i]; + + // this is a heuristic that switches between baselines when the delta grows too large + // we want the encoded delta to fit into one byte (7 bits), but 2 bits are used for sign and baseline index + // for now we immediately switch the baseline when delta grows too large - this can be adjusted arbitrarily + int cd = int(index - last[current]); + current ^= ((cd < 0 ? -cd : cd) >= 30); + + // encode delta from the last index + unsigned int d = index - last[current]; + unsigned int v = (d << 1) ^ (int(d) >> 31); + + // note: low bit encodes the index of the last baseline which will be used for reconstruction + encodeVByte(data, (v << 1) | current); + + // update last for the next iteration that uses it + last[current] = index; + } + + // make sure we have enough space to write tail + if (data > data_safe_end) + return 0; + + for (int k = 0; k < 4; ++k) + *data++ = 0; + + return data - buffer; +} + +size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count) +{ + // compute number of bits required for each index + unsigned int vertex_bits = 1; + + while (vertex_bits < 32 && vertex_count > size_t(1) << vertex_bits) + vertex_bits++; + + // worst-case encoding is 1 varint-7 encoded index delta for a K bit value and an extra bit + unsigned int vertex_groups = (vertex_bits + 1 + 1 + 6) / 7; + + return 1 + index_count * vertex_groups + 4; +} + +int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + // the minimum valid encoding is header, 1 byte per index and a 4-byte tail + if (buffer_size < 1 + index_count + 4) + return -2; + + if ((buffer[0] & 0xf0) != kSequenceHeader) + return -1; + + int version = buffer[0] & 0x0f; + if (version > 1) + return -1; + + const unsigned char* data = buffer + 1; + const unsigned char* data_safe_end = buffer + buffer_size - 4; + + unsigned int last[2] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + // make sure we have enough data to read + // each index reads at most 5 bytes of data; there's a 4 byte tail after data_safe_end + // after this we can be sure we can read without extra bounds checks + if (data >= data_safe_end) + return -2; + + unsigned int v = decodeVByte(data); + + // decode the index of the last baseline + unsigned int current = v & 1; + v >>= 1; + + // reconstruct index as a delta + unsigned int d = (v >> 1) ^ -int(v & 1); + unsigned int index = last[current] + d; + + // update last for the next iteration that uses it + last[current] = index; + + if (index_size == 2) + { + static_cast<unsigned short*>(destination)[i] = (unsigned short)(index); + } + else + { + static_cast<unsigned int*>(destination)[i] = index; + } + } + + // we should've read all data bytes and stopped at the boundary between data and tail + if (data != data_safe_end) + return -3; + + return 0; +} diff --git a/thirdparty/meshoptimizer/indexgenerator.cpp b/thirdparty/meshoptimizer/indexgenerator.cpp new file mode 100644 index 0000000000..aa4a30efa4 --- /dev/null +++ b/thirdparty/meshoptimizer/indexgenerator.cpp @@ -0,0 +1,347 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +namespace meshopt +{ + +static unsigned int hashUpdate4(unsigned int h, const unsigned char* key, size_t len) +{ + // MurmurHash2 + const unsigned int m = 0x5bd1e995; + const int r = 24; + + while (len >= 4) + { + unsigned int k = *reinterpret_cast<const unsigned int*>(key); + + k *= m; + k ^= k >> r; + k *= m; + + h *= m; + h ^= k; + + key += 4; + len -= 4; + } + + return h; +} + +struct VertexHasher +{ + const unsigned char* vertices; + size_t vertex_size; + size_t vertex_stride; + + size_t hash(unsigned int index) const + { + return hashUpdate4(0, vertices + index * vertex_stride, vertex_size); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return memcmp(vertices + lhs * vertex_stride, vertices + rhs * vertex_stride, vertex_size) == 0; + } +}; + +struct VertexStreamHasher +{ + const meshopt_Stream* streams; + size_t stream_count; + + size_t hash(unsigned int index) const + { + unsigned int h = 0; + + for (size_t i = 0; i < stream_count; ++i) + { + const meshopt_Stream& s = streams[i]; + const unsigned char* data = static_cast<const unsigned char*>(s.data); + + h = hashUpdate4(h, data + index * s.stride, s.size); + } + + return h; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + for (size_t i = 0; i < stream_count; ++i) + { + const meshopt_Stream& s = streams[i]; + const unsigned char* data = static_cast<const unsigned char*>(s.data); + + if (memcmp(data + lhs * s.stride, data + rhs * s.stride, s.size) != 0) + return false; + } + + return true; + } +}; + +static size_t hashBuckets(size_t count) +{ + size_t buckets = 1; + while (buckets < count) + buckets *= 2; + + return buckets; +} + +template <typename T, typename Hash> +static T* hashLookup(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty) +{ + assert(buckets > 0); + assert((buckets & (buckets - 1)) == 0); + + size_t hashmod = buckets - 1; + size_t bucket = hash.hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + T& item = table[bucket]; + + if (item == empty) + return &item; + + if (hash.equal(item, key)) + return &item; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(false && "Hash table is full"); // unreachable + return 0; +} + +} // namespace meshopt + +size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(indices || index_count == vertex_count); + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + memset(destination, -1, vertex_count * sizeof(unsigned int)); + + VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_size}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(index < vertex_count); + + if (destination[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + { + *entry = index; + + destination[index] = next_vertex++; + } + else + { + assert(destination[*entry] != ~0u); + + destination[index] = destination[*entry]; + } + } + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} + +size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) +{ + using namespace meshopt; + + assert(indices || index_count == vertex_count); + assert(index_count % 3 == 0); + assert(stream_count > 0 && stream_count <= 16); + + for (size_t i = 0; i < stream_count; ++i) + { + assert(streams[i].size > 0 && streams[i].size <= 256); + assert(streams[i].size <= streams[i].stride); + } + + meshopt_Allocator allocator; + + memset(destination, -1, vertex_count * sizeof(unsigned int)); + + VertexStreamHasher hasher = {streams, stream_count}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(index < vertex_count); + + if (destination[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + { + *entry = index; + + destination[index] = next_vertex++; + } + else + { + assert(destination[*entry] != ~0u); + + destination[index] = destination[*entry]; + } + } + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} + +void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap) +{ + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + // support in-place remap + if (destination == vertices) + { + unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size); + memcpy(vertices_copy, vertices, vertex_count * vertex_size); + vertices = vertices_copy; + } + + for (size_t i = 0; i < vertex_count; ++i) + { + if (remap[i] != ~0u) + { + assert(remap[i] < vertex_count); + + memcpy(static_cast<unsigned char*>(destination) + remap[i] * vertex_size, static_cast<const unsigned char*>(vertices) + i * vertex_size, vertex_size); + } + } +} + +void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap) +{ + assert(index_count % 3 == 0); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices ? indices[i] : unsigned(i); + assert(remap[index] != ~0u); + + destination[i] = remap[index]; + } +} + +void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +{ + using namespace meshopt; + + assert(indices); + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size <= vertex_stride); + + meshopt_Allocator allocator; + + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + VertexHasher hasher = {static_cast<const unsigned char*>(vertices), vertex_size, vertex_stride}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (remap[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + destination[i] = remap[index]; + } +} + +void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count) +{ + using namespace meshopt; + + assert(indices); + assert(index_count % 3 == 0); + assert(stream_count > 0 && stream_count <= 16); + + for (size_t i = 0; i < stream_count; ++i) + { + assert(streams[i].size > 0 && streams[i].size <= 256); + assert(streams[i].size <= streams[i].stride); + } + + meshopt_Allocator allocator; + + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + memset(remap, -1, vertex_count * sizeof(unsigned int)); + + VertexStreamHasher hasher = {streams, stream_count}; + + size_t table_size = hashBuckets(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (remap[index] == ~0u) + { + unsigned int* entry = hashLookup(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + destination[i] = remap[index]; + } +} diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h new file mode 100644 index 0000000000..fde00f9c82 --- /dev/null +++ b/thirdparty/meshoptimizer/meshoptimizer.h @@ -0,0 +1,951 @@ +/** + * meshoptimizer - version 0.15 + * + * Copyright (C) 2016-2020, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com) + * Report bugs and download new versions at https://github.com/zeux/meshoptimizer + * + * This library is distributed under the MIT License. See notice at the end of this file. + */ +#pragma once + +#include <assert.h> +#include <stddef.h> + +/* Version macro; major * 1000 + minor * 10 + patch */ +#define MESHOPTIMIZER_VERSION 150 /* 0.15 */ + +/* If no API is defined, assume default */ +#ifndef MESHOPTIMIZER_API +#define MESHOPTIMIZER_API +#endif + +/* Experimental APIs have unstable interface and might have implementation that's not fully tested or optimized */ +#define MESHOPTIMIZER_EXPERIMENTAL MESHOPTIMIZER_API + +/* C interface */ +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Vertex attribute stream, similar to glVertexPointer + * Each element takes size bytes, with stride controlling the spacing between successive elements. + */ +struct meshopt_Stream +{ + const void* data; + size_t size; + size_t stride; +}; + +/** + * Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); + +/** + * Generates a vertex remap table from multiple vertex streams and an optional index buffer and returns number of unique vertices + * As a result, all vertices that are binary equivalent map to the same (new) location, with no gaps in the resulting sequence. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer. + * To remap vertex buffers, you will need to call meshopt_remapVertexBuffer for each vertex stream. + * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); + +/** + * Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap + * + * destination must contain enough space for the resulting vertex buffer (unique_vertex_count elements, returned by meshopt_generateVertexRemap) + * vertex_count should be the initial vertex count and not the value returned by meshopt_generateVertexRemap + */ +MESHOPTIMIZER_API void meshopt_remapVertexBuffer(void* destination, const void* vertices, size_t vertex_count, size_t vertex_size, const unsigned int* remap); + +/** + * Generate index buffer from the source index buffer and remap table generated by meshopt_generateVertexRemap + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * indices can be NULL if the input is unindexed + */ +MESHOPTIMIZER_API void meshopt_remapIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const unsigned int* remap); + +/** + * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary + * All vertices that are binary equivalent (wrt first vertex_size bytes) map to the first vertex in the original vertex buffer. + * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. + * Note that binary equivalence considers all vertex_size bytes, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); + +/** + * Generate index buffer that can be used for more efficient rendering when only a subset of the vertex attributes is necessary + * All vertices that are binary equivalent (wrt specified streams) map to the first vertex in the original vertex buffer. + * This makes it possible to use the index buffer for Z pre-pass or shadowmap rendering, while using the original index buffer for regular rendering. + * Note that binary equivalence considers all size bytes in each stream, including padding which should be zero-initialized. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count); + +/** + * Vertex transform cache optimizer + * Reorders indices to reduce the number of GPU vertex shader invocations + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Vertex transform cache optimizer for strip-like caches + * Produces inferior results to meshopt_optimizeVertexCache from the GPU vertex cache perspective + * However, the resulting index order is more optimal if the goal is to reduce the triangle strip length or improve compression efficiency + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Vertex transform cache optimizer for FIFO caches + * Reorders indices to reduce the number of GPU vertex shader invocations + * Generally takes ~3x less time to optimize meshes but produces inferior results compared to meshopt_optimizeVertexCache + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * cache_size should be less than the actual GPU cache size to avoid cache thrashing + */ +MESHOPTIMIZER_API void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); + +/** + * Overdraw optimizer + * Reorders indices to reduce the number of GPU vertex shader invocations and the pixel overdraw + * If index buffer contains multiple ranges for multiple draw calls, this functions needs to be called on each range individually. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * indices must contain index data that is the result of meshopt_optimizeVertexCache (*not* the original mesh indices!) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * threshold indicates how much the overdraw optimizer can degrade vertex cache efficiency (1.05 = up to 5%) to reduce overdraw more efficiently + */ +MESHOPTIMIZER_API void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); + +/** + * Vertex fetch cache optimizer + * Reorders vertices and changes indices to reduce the amount of GPU memory fetches during vertex processing + * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused + * This functions works for a single vertex stream; for multiple vertex streams, use meshopt_optimizeVertexFetchRemap + meshopt_remapVertexBuffer for each stream. + * + * destination must contain enough space for the resulting vertex buffer (vertex_count elements) + * indices is used both as an input and as an output index buffer + */ +MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); + +/** + * Vertex fetch cache optimizer + * Generates vertex remap to reduce the amount of GPU memory fetches during vertex processing + * Returns the number of unique vertices, which is the same as input vertex count unless some vertices are unused + * The resulting remap table should be used to reorder vertex/index buffers using meshopt_remapVertexBuffer/meshopt_remapIndexBuffer + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + */ +MESHOPTIMIZER_API size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count); + +/** + * Index buffer encoder + * Encodes index data into an array of bytes that is generally much smaller (<1.5 bytes/triangle) and compresses better (<1 bytes/triangle) compared to original. + * Input index buffer must represent a triangle list. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * For maximum efficiency the index buffer being encoded has to be optimized for vertex cache and vertex fetch first. + * + * buffer must contain enough space for the encoded index buffer (use meshopt_encodeIndexBufferBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +MESHOPTIMIZER_API size_t meshopt_encodeIndexBufferBound(size_t index_count, size_t vertex_count); + +/** + * Experimental: Set index encoder format version + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) and 1 (decodable by 0.14+) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeIndexVersion(int version); + +/** + * Index buffer decoder + * Decodes index data from an array of bytes generated by meshopt_encodeIndexBuffer + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). + * + * destination must contain enough space for the resulting index buffer (index_count elements) + */ +MESHOPTIMIZER_API int meshopt_decodeIndexBuffer(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Experimental: Index sequence encoder + * Encodes index sequence into an array of bytes that is generally smaller and compresses better compared to original. + * Input index sequence can represent arbitrary topology; for triangle lists meshopt_encodeIndexBuffer is likely to be better. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * + * buffer must contain enough space for the encoded index sequence (use meshopt_encodeIndexSequenceBound to compute worst case size) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const unsigned int* indices, size_t index_count); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeIndexSequenceBound(size_t index_count, size_t vertex_count); + +/** + * Index sequence decoder + * Decodes index data from an array of bytes generated by meshopt_encodeIndexSequence + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data (e.g. out of range indices). + * + * destination must contain enough space for the resulting index sequence (index_count elements) + */ +MESHOPTIMIZER_EXPERIMENTAL int meshopt_decodeIndexSequence(void* destination, size_t index_count, size_t index_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Vertex buffer encoder + * Encodes vertex data into an array of bytes that is generally smaller and compresses better compared to original. + * Returns encoded data size on success, 0 on error; the only error condition is if buffer doesn't have enough space + * This function works for a single vertex stream; for multiple vertex streams, call meshopt_encodeVertexBuffer for each stream. + * Note that all vertex_size bytes of each vertex are encoded verbatim, including padding which should be zero-initialized. + * + * buffer must contain enough space for the encoded vertex buffer (use meshopt_encodeVertexBufferBound to compute worst case size) + */ +MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size); + +/** + * Experimental: Set vertex encoder format version + * version must specify the data format version to encode; valid values are 0 (decodable by all library versions) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeVertexVersion(int version); + +/** + * Vertex buffer decoder + * Decodes vertex data from an array of bytes generated by meshopt_encodeVertexBuffer + * Returns 0 if decoding was successful, and an error code otherwise + * The decoder is safe to use for untrusted input, but it may produce garbage data. + * + * destination must contain enough space for the resulting vertex buffer (vertex_count * vertex_size bytes) + */ +MESHOPTIMIZER_API int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size); + +/** + * Vertex buffer filters + * These functions can be used to filter output of meshopt_decodeVertexBuffer in-place. + * count must be aligned by 4 and stride is fixed for each function to facilitate SIMD implementation. + * + * meshopt_decodeFilterOct decodes octahedral encoding of a unit vector with K-bit (K <= 16) signed X/Y as an input; Z must store 1.0f. + * Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8. W is preserved as is. + * + * meshopt_decodeFilterQuat decodes 3-component quaternion encoding with K-bit (4 <= K <= 16) component encoding and a 2-bit component index indicating which component to reconstruct. + * Each component is stored as an 16-bit integer; stride must be equal to 8. + * + * meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M. + * Each 32-bit component is decoded in isolation; stride must be divisible by 4. + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size); +MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size); + +/** + * Experimental: Mesh simplifier + * Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible + * The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error. + * If not all attributes from the input mesh are required, it's recommended to reindex the mesh using meshopt_generateShadowIndexBuffer prior to simplification. + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +// -- GODOT start -- +//MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error); +// -- GODOT end -- + +/** + * Experimental: Mesh simplifier (sloppy) + * Reduces the number of triangles in the mesh, sacrificing mesh apperance for simplification performance + * The algorithm doesn't preserve mesh topology but is always able to reach target triangle count. + * Returns the number of indices after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count); + +/** + * Experimental: Point cloud simplifier + * Reduces the number of points in the cloud to reach the given target + * Returns the number of points after simplification, with destination containing new index data + * The resulting index buffer references vertices from the original vertex buffer. + * If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended. + * + * destination must contain enough space for the target index buffer + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count); + +/** + * Mesh stripifier + * Converts a previously vertex cache optimized triangle list to triangle strip, stitching strips using restart index or degenerate triangles + * Returns the number of indices in the resulting strip, with destination containing new index data + * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * Using restart indices can result in ~10% smaller index buffers, but on some GPUs restart indices may result in decreased performance. + * + * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_stripifyBound + * restart_index should be 0xffff or 0xffffffff depending on index size, or 0 to use degenerate triangles + */ +MESHOPTIMIZER_API size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index); +MESHOPTIMIZER_API size_t meshopt_stripifyBound(size_t index_count); + +/** + * Mesh unstripifier + * Converts a triangle strip to a triangle list + * Returns the number of indices in the resulting list, with destination containing new index data + * + * destination must contain enough space for the target index buffer, worst case can be computed with meshopt_unstripifyBound + */ +MESHOPTIMIZER_API size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index); +MESHOPTIMIZER_API size_t meshopt_unstripifyBound(size_t index_count); + +struct meshopt_VertexCacheStatistics +{ + unsigned int vertices_transformed; + unsigned int warps_executed; + float acmr; /* transformed vertices / triangle count; best case 0.5, worst case 3.0, optimum depends on topology */ + float atvr; /* transformed vertices / vertex count; best case 1.0, worst case 6.0, optimum is 1.0 (each vertex is transformed once) */ +}; + +/** + * Vertex transform cache analyzer + * Returns cache hit statistics using a simplified FIFO model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size); + +struct meshopt_OverdrawStatistics +{ + unsigned int pixels_covered; + unsigned int pixels_shaded; + float overdraw; /* shaded pixels / covered pixels; best case 1.0 */ +}; + +/** + * Overdraw analyzer + * Returns overdraw statistics using a software rasterizer + * Results may not match actual GPU performance + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_API struct meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +struct meshopt_VertexFetchStatistics +{ + unsigned int bytes_fetched; + float overfetch; /* fetched bytes / vertex buffer size; best case 1.0 (each byte is fetched once) */ +}; + +/** + * Vertex fetch cache analyzer + * Returns cache hit statistics using a simplified direct mapped model + * Results may not match actual GPU performance + */ +MESHOPTIMIZER_API struct meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size); + +struct meshopt_Meshlet +{ + unsigned int vertices[64]; + unsigned char indices[126][3]; + unsigned char triangle_count; + unsigned char vertex_count; +}; + +/** + * Experimental: Meshlet builder + * Splits the mesh into a set of meshlets where each meshlet has a micro index buffer indexing into meshlet vertices that refer to the original vertex buffer + * The resulting data can be used to render meshes using NVidia programmable mesh shading pipeline, or in other cluster-based renderers. + * For maximum efficiency the index buffer being converted has to be optimized for vertex cache first. + * + * destination must contain enough space for all meshlets, worst case size can be computed with meshopt_buildMeshletsBound + * max_vertices and max_triangles can't exceed limits statically declared in meshopt_Meshlet (max_vertices <= 64, max_triangles <= 126) + */ +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshlets(struct meshopt_Meshlet* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_buildMeshletsBound(size_t index_count, size_t max_vertices, size_t max_triangles); + +struct meshopt_Bounds +{ + /* bounding sphere, useful for frustum and occlusion culling */ + float center[3]; + float radius; + + /* normal cone, useful for backface culling */ + float cone_apex[3]; + float cone_axis[3]; + float cone_cutoff; /* = cos(angle/2) */ + + /* normal cone axis and cutoff, stored in 8-bit SNORM format; decode using x/127.0 */ + signed char cone_axis_s8[3]; + signed char cone_cutoff_s8; +}; + +/** + * Experimental: Cluster bounds generator + * Creates bounding volumes that can be used for frustum, backface and occlusion culling. + * + * For backface culling with orthographic projection, use the following formula to reject backfacing clusters: + * dot(view, cone_axis) >= cone_cutoff + * + * For perspective projection, you can the formula that needs cone apex in addition to axis & cutoff: + * dot(normalize(cone_apex - camera_position), cone_axis) >= cone_cutoff + * + * Alternatively, you can use the formula that doesn't need cone apex and uses bounding sphere instead: + * dot(normalize(center - camera_position), cone_axis) >= cone_cutoff + radius / length(center - camera_position) + * or an equivalent formula that doesn't have a singularity at center = camera_position: + * dot(center - camera_position, cone_axis) >= cone_cutoff * length(center - camera_position) + radius + * + * The formula that uses the apex is slightly more accurate but needs the apex; if you are already using bounding sphere + * to do frustum/occlusion culling, the formula that doesn't use the apex may be preferable. + * + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + * index_count should be less than or equal to 256*3 (the function assumes clusters of limited size) + */ +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeClusterBounds(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeMeshletBounds(const struct meshopt_Meshlet* meshlet, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Experimental: Spatial sorter + * Generates a remap table that can be used to reorder points for spatial locality. + * Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer. + * + * destination must contain enough space for the resulting remap table (vertex_count elements) + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Experimental: Spatial sorter + * Reorders triangles for spatial locality, and generates a new index buffer. The resulting index buffer can be used with other functions like optimizeVertexCache. + * + * destination must contain enough space for the resulting index buffer (index_count elements) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); + +/** + * Set allocation callbacks + * These callbacks will be used instead of the default operator new/operator delete for all temporary allocations in the library. + * Note that all algorithms only allocate memory for temporary use. + * allocate/deallocate are always called in a stack-like order - last pointer to be allocated is deallocated first. + */ +MESHOPTIMIZER_API void meshopt_setAllocator(void* (*allocate)(size_t), void (*deallocate)(void*)); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +/* Quantization into commonly supported data formats */ +#ifdef __cplusplus +/** + * Quantize a float in [0..1] range into an N-bit fixed point unorm value + * Assumes reconstruction function (q / (2^N-1)), which is the case for fixed-function normalized fixed point conversion + * Maximum reconstruction error: 1/2^(N+1) + */ +inline int meshopt_quantizeUnorm(float v, int N); + +/** + * Quantize a float in [-1..1] range into an N-bit fixed point snorm value + * Assumes reconstruction function (q / (2^(N-1)-1)), which is the case for fixed-function normalized fixed point conversion (except early OpenGL versions) + * Maximum reconstruction error: 1/2^N + */ +inline int meshopt_quantizeSnorm(float v, int N); + +/** + * Quantize a float into half-precision floating point value + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Representable magnitude range: [6e-5; 65504] + * Maximum relative reconstruction error: 5e-4 + */ +inline unsigned short meshopt_quantizeHalf(float v); + +/** + * Quantize a float into a floating point value with a limited number of significant mantissa bits + * Generates +-inf for overflow, preserves NaN, flushes denormals to zero, rounds to nearest + * Assumes N is in a valid mantissa precision range, which is 1..23 + */ +inline float meshopt_quantizeFloat(float v, int N); +#endif + +/** + * C++ template interface + * + * These functions mirror the C interface the library provides, providing template-based overloads so that + * the caller can use an arbitrary type for the index data, both for input and output. + * When the supplied type is the same size as that of unsigned int, the wrappers are zero-cost; when it's not, + * the wrappers end up allocating memory and copying index data to convert from one type to another. + */ +#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +template <typename T> +inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +template <typename T> +inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template <typename T> +inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap); +template <typename T> +inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride); +template <typename T> +inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count); +template <typename T> +inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count); +template <typename T> +inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count); +template <typename T> +inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size); +template <typename T> +inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold); +template <typename T> +inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count); +template <typename T> +inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size); +template <typename T> +inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +template <typename T> +inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +template <typename T> +inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count); +template <typename T> +inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size); +template <typename T> +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); +template <typename T> +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count); +template <typename T> +inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index); +template <typename T> +inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index); +template <typename T> +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size); +template <typename T> +inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size); +template <typename T> +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles); +template <typename T> +inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +template <typename T> +inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride); +#endif + +/* Inline implementation */ +#ifdef __cplusplus +inline int meshopt_quantizeUnorm(float v, int N) +{ + const float scale = float((1 << N) - 1); + + v = (v >= 0) ? v : 0; + v = (v <= 1) ? v : 1; + + return int(v * scale + 0.5f); +} + +inline int meshopt_quantizeSnorm(float v, int N) +{ + const float scale = float((1 << (N - 1)) - 1); + + float round = (v >= 0 ? 0.5f : -0.5f); + + v = (v >= -1) ? v : -1; + v = (v <= +1) ? v : +1; + + return int(v * scale + round); +} + +inline unsigned short meshopt_quantizeHalf(float v) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + int s = (ui >> 16) & 0x8000; + int em = ui & 0x7fffffff; + + /* bias exponent and round to nearest; 112 is relative exponent bias (127-15) */ + int h = (em - (112 << 23) + (1 << 12)) >> 13; + + /* underflow: flush to zero; 113 encodes exponent -14 */ + h = (em < (113 << 23)) ? 0 : h; + + /* overflow: infinity; 143 encodes exponent 16 */ + h = (em >= (143 << 23)) ? 0x7c00 : h; + + /* NaN; note that we convert all types of NaN to qNaN */ + h = (em > (255 << 23)) ? 0x7e00 : h; + + return (unsigned short)(s | h); +} + +inline float meshopt_quantizeFloat(float v, int N) +{ + union { float f; unsigned int ui; } u = {v}; + unsigned int ui = u.ui; + + const int mask = (1 << (23 - N)) - 1; + const int round = (1 << (23 - N)) >> 1; + + int e = ui & 0x7f800000; + unsigned int rui = (ui + round) & ~mask; + + /* round all numbers except inf/nan; this is important to make sure nan doesn't overflow into -0 */ + ui = e == 0x7f800000 ? ui : rui; + + /* flush denormals to zero */ + ui = e == 0 ? 0 : ui; + + u.ui = ui; + return u.f; +} +#endif + +/* Internal implementation helpers */ +#ifdef __cplusplus +class meshopt_Allocator +{ +public: + template <typename T> + struct StorageT + { + static void* (*allocate)(size_t); + static void (*deallocate)(void*); + }; + + typedef StorageT<void> Storage; + + meshopt_Allocator() + : blocks() + , count(0) + { + } + + ~meshopt_Allocator() + { + for (size_t i = count; i > 0; --i) + Storage::deallocate(blocks[i - 1]); + } + + template <typename T> T* allocate(size_t size) + { + assert(count < sizeof(blocks) / sizeof(blocks[0])); + T* result = static_cast<T*>(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T))); + blocks[count++] = result; + return result; + } + +private: + void* blocks[24]; + size_t count; +}; + +// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker +template <typename T> void* (*meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new; +template <typename T> void (*meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete; +#endif + +/* Inline implementation for C++ templated wrappers */ +#if defined(__cplusplus) && !defined(MESHOPTIMIZER_NO_WRAPPERS) +template <typename T, bool ZeroCopy = sizeof(T) == sizeof(unsigned int)> +struct meshopt_IndexAdapter; + +template <typename T> +struct meshopt_IndexAdapter<T, false> +{ + T* result; + unsigned int* data; + size_t count; + + meshopt_IndexAdapter(T* result_, const T* input, size_t count_) + : result(result_) + , data(0) + , count(count_) + { + size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int); + + data = static_cast<unsigned int*>(meshopt_Allocator::Storage::allocate(size)); + + if (input) + { + for (size_t i = 0; i < count; ++i) + data[i] = input[i]; + } + } + + ~meshopt_IndexAdapter() + { + if (result) + { + for (size_t i = 0; i < count; ++i) + result[i] = T(data[i]); + } + + meshopt_Allocator::Storage::deallocate(data); + } +}; + +template <typename T> +struct meshopt_IndexAdapter<T, true> +{ + unsigned int* data; + + meshopt_IndexAdapter(T* result, const T* input, size_t) + : data(reinterpret_cast<unsigned int*>(result ? result : const_cast<T*>(input))) + { + } +}; + +template <typename T> +inline size_t meshopt_generateVertexRemap(unsigned int* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemap(destination, indices ? in.data : 0, index_count, vertices, vertex_count, vertex_size); +} + +template <typename T> +inline size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +{ + meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0); + + return meshopt_generateVertexRemapMulti(destination, indices ? in.data : 0, index_count, vertex_count, streams, stream_count); +} + +template <typename T> +inline void meshopt_remapIndexBuffer(T* destination, const T* indices, size_t index_count, const unsigned int* remap) +{ + meshopt_IndexAdapter<T> in(0, indices, indices ? index_count : 0); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_remapIndexBuffer(out.data, indices ? in.data : 0, index_count, remap); +} + +template <typename T> +inline void meshopt_generateShadowIndexBuffer(T* destination, const T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size, size_t vertex_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_generateShadowIndexBuffer(out.data, in.data, index_count, vertices, vertex_count, vertex_size, vertex_stride); +} + +template <typename T> +inline void meshopt_generateShadowIndexBufferMulti(T* destination, const T* indices, size_t index_count, size_t vertex_count, const meshopt_Stream* streams, size_t stream_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_generateShadowIndexBufferMulti(out.data, in.data, index_count, vertex_count, streams, stream_count); +} + +template <typename T> +inline void meshopt_optimizeVertexCache(T* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeVertexCache(out.data, in.data, index_count, vertex_count); +} + +template <typename T> +inline void meshopt_optimizeVertexCacheStrip(T* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeVertexCacheStrip(out.data, in.data, index_count, vertex_count); +} + +template <typename T> +inline void meshopt_optimizeVertexCacheFifo(T* destination, const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeVertexCacheFifo(out.data, in.data, index_count, vertex_count, cache_size); +} + +template <typename T> +inline void meshopt_optimizeOverdraw(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_optimizeOverdraw(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, threshold); +} + +template <typename T> +inline size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const T* indices, size_t index_count, size_t vertex_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_optimizeVertexFetchRemap(destination, in.data, index_count, vertex_count); +} + +template <typename T> +inline size_t meshopt_optimizeVertexFetch(void* destination, T* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter<T> inout(indices, indices, index_count); + + return meshopt_optimizeVertexFetch(destination, inout.data, index_count, vertices, vertex_count, vertex_size); +} + +template <typename T> +inline size_t meshopt_encodeIndexBuffer(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_encodeIndexBuffer(buffer, buffer_size, in.data, index_count); +} + +template <typename T> +inline int meshopt_decodeIndexBuffer(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +{ + char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; + (void)index_size_valid; + + return meshopt_decodeIndexBuffer(destination, index_count, sizeof(T), buffer, buffer_size); +} + +template <typename T> +inline size_t meshopt_encodeIndexSequence(unsigned char* buffer, size_t buffer_size, const T* indices, size_t index_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_encodeIndexSequence(buffer, buffer_size, in.data, index_count); +} + +template <typename T> +inline int meshopt_decodeIndexSequence(T* destination, size_t index_count, const unsigned char* buffer, size_t buffer_size) +{ + char index_size_valid[sizeof(T) == 2 || sizeof(T) == 4 ? 1 : -1]; + (void)index_size_valid; + + return meshopt_decodeIndexSequence(destination, index_count, sizeof(T), buffer, buffer_size); +} + +template <typename T> +inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + return meshopt_simplify(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error); +} + +template <typename T> +inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, target_index_count); + + return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count); +} + +template <typename T> +inline size_t meshopt_stripify(T* destination, const T* indices, size_t index_count, size_t vertex_count, T restart_index) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, (index_count / 3) * 5); + + return meshopt_stripify(out.data, in.data, index_count, vertex_count, unsigned(restart_index)); +} + +template <typename T> +inline size_t meshopt_unstripify(T* destination, const T* indices, size_t index_count, T restart_index) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, (index_count - 2) * 3); + + return meshopt_unstripify(out.data, in.data, index_count, unsigned(restart_index)); +} + +template <typename T> +inline meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const T* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int buffer_size) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_analyzeVertexCache(in.data, index_count, vertex_count, cache_size, warp_size, buffer_size); +} + +template <typename T> +inline meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_analyzeOverdraw(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> +inline meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const T* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_analyzeVertexFetch(in.data, index_count, vertex_count, vertex_size); +} + +template <typename T> +inline size_t meshopt_buildMeshlets(meshopt_Meshlet* destination, const T* indices, size_t index_count, size_t vertex_count, size_t max_vertices, size_t max_triangles) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_buildMeshlets(destination, in.data, index_count, vertex_count, max_vertices, max_triangles); +} + +template <typename T> +inline meshopt_Bounds meshopt_computeClusterBounds(const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + + return meshopt_computeClusterBounds(in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} + +template <typename T> +inline void meshopt_spatialSortTriangles(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + meshopt_IndexAdapter<T> in(0, indices, index_count); + meshopt_IndexAdapter<T> out(destination, 0, index_count); + + meshopt_spatialSortTriangles(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride); +} +#endif + +/** + * Copyright (c) 2016-2020 Arseny Kapoulkine + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ diff --git a/thirdparty/meshoptimizer/overdrawanalyzer.cpp b/thirdparty/meshoptimizer/overdrawanalyzer.cpp new file mode 100644 index 0000000000..8d5859ba39 --- /dev/null +++ b/thirdparty/meshoptimizer/overdrawanalyzer.cpp @@ -0,0 +1,230 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <float.h> +#include <string.h> + +// This work is based on: +// Nicolas Capens. Advanced Rasterization. 2004 +namespace meshopt +{ + +const int kViewport = 256; + +struct OverdrawBuffer +{ + float z[kViewport][kViewport][2]; + unsigned int overdraw[kViewport][kViewport][2]; +}; + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#ifndef max +#define max(a, b) ((a) > (b) ? (a) : (b)) +#endif + +static float computeDepthGradients(float& dzdx, float& dzdy, float x1, float y1, float z1, float x2, float y2, float z2, float x3, float y3, float z3) +{ + // z2 = z1 + dzdx * (x2 - x1) + dzdy * (y2 - y1) + // z3 = z1 + dzdx * (x3 - x1) + dzdy * (y3 - y1) + // (x2-x1 y2-y1)(dzdx) = (z2-z1) + // (x3-x1 y3-y1)(dzdy) (z3-z1) + // we'll solve it with Cramer's rule + float det = (x2 - x1) * (y3 - y1) - (y2 - y1) * (x3 - x1); + float invdet = (det == 0) ? 0 : 1 / det; + + dzdx = (z2 - z1) * (y3 - y1) - (y2 - y1) * (z3 - z1) * invdet; + dzdy = (x2 - x1) * (z3 - z1) - (z2 - z1) * (x3 - x1) * invdet; + + return det; +} + +// half-space fixed point triangle rasterizer +static void rasterize(OverdrawBuffer* buffer, float v1x, float v1y, float v1z, float v2x, float v2y, float v2z, float v3x, float v3y, float v3z) +{ + // compute depth gradients + float DZx, DZy; + float det = computeDepthGradients(DZx, DZy, v1x, v1y, v1z, v2x, v2y, v2z, v3x, v3y, v3z); + int sign = det > 0; + + // flip backfacing triangles to simplify rasterization logic + if (sign) + { + // flipping v2 & v3 preserves depth gradients since they're based on v1 + float t; + t = v2x, v2x = v3x, v3x = t; + t = v2y, v2y = v3y, v3y = t; + t = v2z, v2z = v3z, v3z = t; + + // flip depth since we rasterize backfacing triangles to second buffer with reverse Z; only v1z is used below + v1z = kViewport - v1z; + DZx = -DZx; + DZy = -DZy; + } + + // coordinates, 28.4 fixed point + int X1 = int(16.0f * v1x + 0.5f); + int X2 = int(16.0f * v2x + 0.5f); + int X3 = int(16.0f * v3x + 0.5f); + + int Y1 = int(16.0f * v1y + 0.5f); + int Y2 = int(16.0f * v2y + 0.5f); + int Y3 = int(16.0f * v3y + 0.5f); + + // bounding rectangle, clipped against viewport + // since we rasterize pixels with covered centers, min >0.5 should round up + // as for max, due to top-left filling convention we will never rasterize right/bottom edges + // so max >= 0.5 should round down + int minx = max((min(X1, min(X2, X3)) + 7) >> 4, 0); + int maxx = min((max(X1, max(X2, X3)) + 7) >> 4, kViewport); + int miny = max((min(Y1, min(Y2, Y3)) + 7) >> 4, 0); + int maxy = min((max(Y1, max(Y2, Y3)) + 7) >> 4, kViewport); + + // deltas, 28.4 fixed point + int DX12 = X1 - X2; + int DX23 = X2 - X3; + int DX31 = X3 - X1; + + int DY12 = Y1 - Y2; + int DY23 = Y2 - Y3; + int DY31 = Y3 - Y1; + + // fill convention correction + int TL1 = DY12 < 0 || (DY12 == 0 && DX12 > 0); + int TL2 = DY23 < 0 || (DY23 == 0 && DX23 > 0); + int TL3 = DY31 < 0 || (DY31 == 0 && DX31 > 0); + + // half edge equations, 24.8 fixed point + // note that we offset minx/miny by half pixel since we want to rasterize pixels with covered centers + int FX = (minx << 4) + 8; + int FY = (miny << 4) + 8; + int CY1 = DX12 * (FY - Y1) - DY12 * (FX - X1) + TL1 - 1; + int CY2 = DX23 * (FY - Y2) - DY23 * (FX - X2) + TL2 - 1; + int CY3 = DX31 * (FY - Y3) - DY31 * (FX - X3) + TL3 - 1; + float ZY = v1z + (DZx * float(FX - X1) + DZy * float(FY - Y1)) * (1 / 16.f); + + for (int y = miny; y < maxy; y++) + { + int CX1 = CY1; + int CX2 = CY2; + int CX3 = CY3; + float ZX = ZY; + + for (int x = minx; x < maxx; x++) + { + // check if all CXn are non-negative + if ((CX1 | CX2 | CX3) >= 0) + { + if (ZX >= buffer->z[y][x][sign]) + { + buffer->z[y][x][sign] = ZX; + buffer->overdraw[y][x][sign]++; + } + } + + // signed left shift is UB for negative numbers so use unsigned-signed casts + CX1 -= int(unsigned(DY12) << 4); + CX2 -= int(unsigned(DY23) << 4); + CX3 -= int(unsigned(DY31) << 4); + ZX += DZx; + } + + // signed left shift is UB for negative numbers so use unsigned-signed casts + CY1 += int(unsigned(DX12) << 4); + CY2 += int(unsigned(DX23) << 4); + CY3 += int(unsigned(DX31) << 4); + ZY += DZy; + } +} + +} // namespace meshopt + +meshopt_OverdrawStatistics meshopt_analyzeOverdraw(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + meshopt_OverdrawStatistics result = {}; + + float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; + float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; + + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions + i * vertex_stride_float; + + for (int j = 0; j < 3; ++j) + { + minv[j] = min(minv[j], v[j]); + maxv[j] = max(maxv[j], v[j]); + } + } + + float extent = max(maxv[0] - minv[0], max(maxv[1] - minv[1], maxv[2] - minv[2])); + float scale = kViewport / extent; + + float* triangles = allocator.allocate<float>(index_count * 3); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + const float* v = vertex_positions + index * vertex_stride_float; + + triangles[i * 3 + 0] = (v[0] - minv[0]) * scale; + triangles[i * 3 + 1] = (v[1] - minv[1]) * scale; + triangles[i * 3 + 2] = (v[2] - minv[2]) * scale; + } + + OverdrawBuffer* buffer = allocator.allocate<OverdrawBuffer>(1); + + for (int axis = 0; axis < 3; ++axis) + { + memset(buffer, 0, sizeof(OverdrawBuffer)); + + for (size_t i = 0; i < index_count; i += 3) + { + const float* vn0 = &triangles[3 * (i + 0)]; + const float* vn1 = &triangles[3 * (i + 1)]; + const float* vn2 = &triangles[3 * (i + 2)]; + + switch (axis) + { + case 0: + rasterize(buffer, vn0[2], vn0[1], vn0[0], vn1[2], vn1[1], vn1[0], vn2[2], vn2[1], vn2[0]); + break; + case 1: + rasterize(buffer, vn0[0], vn0[2], vn0[1], vn1[0], vn1[2], vn1[1], vn2[0], vn2[2], vn2[1]); + break; + case 2: + rasterize(buffer, vn0[1], vn0[0], vn0[2], vn1[1], vn1[0], vn1[2], vn2[1], vn2[0], vn2[2]); + break; + } + } + + for (int y = 0; y < kViewport; ++y) + for (int x = 0; x < kViewport; ++x) + for (int s = 0; s < 2; ++s) + { + unsigned int overdraw = buffer->overdraw[y][x][s]; + + result.pixels_covered += overdraw > 0; + result.pixels_shaded += overdraw; + } + } + + result.overdraw = result.pixels_covered ? float(result.pixels_shaded) / float(result.pixels_covered) : 0.f; + + return result; +} diff --git a/thirdparty/meshoptimizer/overdrawoptimizer.cpp b/thirdparty/meshoptimizer/overdrawoptimizer.cpp new file mode 100644 index 0000000000..143656ed76 --- /dev/null +++ b/thirdparty/meshoptimizer/overdrawoptimizer.cpp @@ -0,0 +1,333 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <math.h> +#include <string.h> + +// This work is based on: +// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +namespace meshopt +{ + +static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count) +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float mesh_centroid[3] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + const float* p = vertex_positions + vertex_stride_float * indices[i]; + + mesh_centroid[0] += p[0]; + mesh_centroid[1] += p[1]; + mesh_centroid[2] += p[2]; + } + + mesh_centroid[0] /= index_count; + mesh_centroid[1] /= index_count; + mesh_centroid[2] /= index_count; + + for (size_t cluster = 0; cluster < cluster_count; ++cluster) + { + size_t cluster_begin = clusters[cluster] * 3; + size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count; + assert(cluster_begin < cluster_end); + + float cluster_area = 0; + float cluster_centroid[3] = {}; + float cluster_normal[3] = {}; + + for (size_t i = cluster_begin; i < cluster_end; i += 3) + { + const float* p0 = vertex_positions + vertex_stride_float * indices[i + 0]; + const float* p1 = vertex_positions + vertex_stride_float * indices[i + 1]; + const float* p2 = vertex_positions + vertex_stride_float * indices[i + 2]; + + float p10[3] = {p1[0] - p0[0], p1[1] - p0[1], p1[2] - p0[2]}; + float p20[3] = {p2[0] - p0[0], p2[1] - p0[1], p2[2] - p0[2]}; + + float normalx = p10[1] * p20[2] - p10[2] * p20[1]; + float normaly = p10[2] * p20[0] - p10[0] * p20[2]; + float normalz = p10[0] * p20[1] - p10[1] * p20[0]; + + float area = sqrtf(normalx * normalx + normaly * normaly + normalz * normalz); + + cluster_centroid[0] += (p0[0] + p1[0] + p2[0]) * (area / 3); + cluster_centroid[1] += (p0[1] + p1[1] + p2[1]) * (area / 3); + cluster_centroid[2] += (p0[2] + p1[2] + p2[2]) * (area / 3); + cluster_normal[0] += normalx; + cluster_normal[1] += normaly; + cluster_normal[2] += normalz; + cluster_area += area; + } + + float inv_cluster_area = cluster_area == 0 ? 0 : 1 / cluster_area; + + cluster_centroid[0] *= inv_cluster_area; + cluster_centroid[1] *= inv_cluster_area; + cluster_centroid[2] *= inv_cluster_area; + + float cluster_normal_length = sqrtf(cluster_normal[0] * cluster_normal[0] + cluster_normal[1] * cluster_normal[1] + cluster_normal[2] * cluster_normal[2]); + float inv_cluster_normal_length = cluster_normal_length == 0 ? 0 : 1 / cluster_normal_length; + + cluster_normal[0] *= inv_cluster_normal_length; + cluster_normal[1] *= inv_cluster_normal_length; + cluster_normal[2] *= inv_cluster_normal_length; + + float centroid_vector[3] = {cluster_centroid[0] - mesh_centroid[0], cluster_centroid[1] - mesh_centroid[1], cluster_centroid[2] - mesh_centroid[2]}; + + sort_data[cluster] = centroid_vector[0] * cluster_normal[0] + centroid_vector[1] * cluster_normal[1] + centroid_vector[2] * cluster_normal[2]; + } +} + +static void calculateSortOrderRadix(unsigned int* sort_order, const float* sort_data, unsigned short* sort_keys, size_t cluster_count) +{ + // compute sort data bounds and renormalize, using fixed point snorm + float sort_data_max = 1e-3f; + + for (size_t i = 0; i < cluster_count; ++i) + { + float dpa = fabsf(sort_data[i]); + + sort_data_max = (sort_data_max < dpa) ? dpa : sort_data_max; + } + + const int sort_bits = 11; + + for (size_t i = 0; i < cluster_count; ++i) + { + // note that we flip distribution since high dot product should come first + float sort_key = 0.5f - 0.5f * (sort_data[i] / sort_data_max); + + sort_keys[i] = meshopt_quantizeUnorm(sort_key, sort_bits) & ((1 << sort_bits) - 1); + } + + // fill histogram for counting sort + unsigned int histogram[1 << sort_bits]; + memset(histogram, 0, sizeof(histogram)); + + for (size_t i = 0; i < cluster_count; ++i) + { + histogram[sort_keys[i]]++; + } + + // compute offsets based on histogram data + size_t histogram_sum = 0; + + for (size_t i = 0; i < 1 << sort_bits; ++i) + { + size_t count = histogram[i]; + histogram[i] = unsigned(histogram_sum); + histogram_sum += count; + } + + assert(histogram_sum == cluster_count); + + // compute sort order based on offsets + for (size_t i = 0; i < cluster_count; ++i) + { + sort_order[histogram[sort_keys[i]]++] = unsigned(i); + } +} + +static unsigned int updateCache(unsigned int a, unsigned int b, unsigned int c, unsigned int cache_size, unsigned int* cache_timestamps, unsigned int& timestamp) +{ + unsigned int cache_misses = 0; + + // if vertex is not in cache, put it in cache + if (timestamp - cache_timestamps[a] > cache_size) + { + cache_timestamps[a] = timestamp++; + cache_misses++; + } + + if (timestamp - cache_timestamps[b] > cache_size) + { + cache_timestamps[b] = timestamp++; + cache_misses++; + } + + if (timestamp - cache_timestamps[c] > cache_size) + { + cache_timestamps[c] = timestamp++; + cache_misses++; + } + + return cache_misses; +} + +static size_t generateHardBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int* cache_timestamps) +{ + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + unsigned int timestamp = cache_size + 1; + + size_t face_count = index_count / 3; + + size_t result = 0; + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); + + // when all three vertices are not in the cache it's usually relatively safe to assume that this is a new patch in the mesh + // that is disjoint from previous vertices; sometimes it might come back to reference existing vertices but that frequently + // suggests an inefficiency in the vertex cache optimization algorithm + // usually the first triangle has 3 misses unless it's degenerate - thus we make sure the first cluster always starts with 0 + if (i == 0 || m == 3) + { + destination[result++] = unsigned(i); + } + } + + assert(result <= index_count / 3); + + return result; +} + +static size_t generateSoftBoundaries(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const unsigned int* clusters, size_t cluster_count, unsigned int cache_size, float threshold, unsigned int* cache_timestamps) +{ + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + unsigned int timestamp = 0; + + size_t result = 0; + + for (size_t it = 0; it < cluster_count; ++it) + { + size_t start = clusters[it]; + size_t end = (it + 1 < cluster_count) ? clusters[it + 1] : index_count / 3; + assert(start < end); + + // reset cache + timestamp += cache_size + 1; + + // measure cluster ACMR + unsigned int cluster_misses = 0; + + for (size_t i = start; i < end; ++i) + { + unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); + + cluster_misses += m; + } + + float cluster_threshold = threshold * (float(cluster_misses) / float(end - start)); + + // first cluster always starts from the hard cluster boundary + destination[result++] = unsigned(start); + + // reset cache + timestamp += cache_size + 1; + + unsigned int running_misses = 0; + unsigned int running_faces = 0; + + for (size_t i = start; i < end; ++i) + { + unsigned int m = updateCache(indices[i * 3 + 0], indices[i * 3 + 1], indices[i * 3 + 2], cache_size, &cache_timestamps[0], timestamp); + + running_misses += m; + running_faces += 1; + + if (float(running_misses) / float(running_faces) <= cluster_threshold) + { + // we have reached the target ACMR with the current triangle so we need to start a new cluster on the next one + // note that this may mean that we add 'end` to destination for the last triangle, which will imply that the last + // cluster is empty; however, the 'pop_back' after the loop will clean it up + destination[result++] = unsigned(i + 1); + + // reset cache + timestamp += cache_size + 1; + + running_misses = 0; + running_faces = 0; + } + } + + // each time we reach the target ACMR we flush the cluster + // this means that the last cluster is by definition not very good - there are frequent cases where we are left with a few triangles + // in the last cluster, producing a very bad ACMR and significantly penalizing the overall results + // thus we remove the last cluster boundary, merging the last complete cluster with the last incomplete one + // there are sometimes cases when the last cluster is actually good enough - in which case the code above would have added 'end' + // to the cluster boundary array which we need to remove anyway - this code will do that automatically + if (destination[result - 1] != start) + { + result--; + } + } + + assert(result >= cluster_count); + assert(result <= index_count / 3); + + return result; +} + +} // namespace meshopt + +void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float threshold) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + unsigned int cache_size = 16; + + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + + // generate hard boundaries from full-triangle cache misses + unsigned int* hard_clusters = allocator.allocate<unsigned int>(index_count / 3); + size_t hard_cluster_count = generateHardBoundaries(hard_clusters, indices, index_count, vertex_count, cache_size, cache_timestamps); + + // generate soft boundaries + unsigned int* soft_clusters = allocator.allocate<unsigned int>(index_count / 3 + 1); + size_t soft_cluster_count = generateSoftBoundaries(soft_clusters, indices, index_count, vertex_count, hard_clusters, hard_cluster_count, cache_size, threshold, cache_timestamps); + + const unsigned int* clusters = soft_clusters; + size_t cluster_count = soft_cluster_count; + + // fill sort data + float* sort_data = allocator.allocate<float>(cluster_count); + calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count); + + // sort clusters using sort data + unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count); + unsigned int* sort_order = allocator.allocate<unsigned int>(cluster_count); + calculateSortOrderRadix(sort_order, sort_data, sort_keys, cluster_count); + + // fill output buffer + size_t offset = 0; + + for (size_t it = 0; it < cluster_count; ++it) + { + unsigned int cluster = sort_order[it]; + assert(cluster < cluster_count); + + size_t cluster_begin = clusters[cluster] * 3; + size_t cluster_end = (cluster + 1 < cluster_count) ? clusters[cluster + 1] * 3 : index_count; + assert(cluster_begin < cluster_end); + + memcpy(destination + offset, indices + cluster_begin, (cluster_end - cluster_begin) * sizeof(unsigned int)); + offset += cluster_end - cluster_begin; + } + + assert(offset == index_count); +} diff --git a/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch b/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch new file mode 100644 index 0000000000..1be38e45d2 --- /dev/null +++ b/thirdparty/meshoptimizer/patches/simplifier_get_resulting_error.patch @@ -0,0 +1,96 @@ +diff --git a/thirdparty/meshoptimizer/meshoptimizer.h b/thirdparty/meshoptimizer/meshoptimizer.h +index a442d103c8..fde00f9c82 100644 +--- a/thirdparty/meshoptimizer/meshoptimizer.h ++++ b/thirdparty/meshoptimizer/meshoptimizer.h +@@ -266,7 +266,10 @@ MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterExp(void* buffer, size_t ver + * destination must contain enough space for the *source* index buffer (since optimization is iterative, this means index_count elements - *not* target_index_count!) + * vertex_positions should have float3 position in the first 12 bytes of each vertex - similar to glVertexPointer + */ +-MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); ++// -- GODOT start -- ++//MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error); ++MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error); ++// -- GODOT end -- + + /** + * Experimental: Mesh simplifier (sloppy) +diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp +index bd523275ce..51cf634186 100644 +--- a/thirdparty/meshoptimizer/simplifier.cpp ++++ b/thirdparty/meshoptimizer/simplifier.cpp +@@ -1143,7 +1143,10 @@ unsigned int* meshopt_simplifyDebugLoop = 0; + unsigned int* meshopt_simplifyDebugLoopBack = 0; + #endif + +-size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) ++// -- GODOT start -- ++//size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) ++size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error) ++// -- GODOT end -- + { + using namespace meshopt; + +@@ -1198,10 +1201,13 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + if (result != indices) + memcpy(result, indices, index_count * sizeof(unsigned int)); + ++// -- GODOT start -- + #if TRACE + size_t pass_count = 0; +- float worst_error = 0; ++ //float worst_error = 0; + #endif ++ float worst_error = 0; ++// -- GODOT end -- + + Collapse* edge_collapses = allocator.allocate<Collapse>(index_count); + unsigned int* collapse_order = allocator.allocate<unsigned int>(index_count); +@@ -1213,6 +1219,12 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + // target_error input is linear; we need to adjust it to match quadricError units + float error_limit = target_error * target_error; + ++// -- GODOT start -- ++ if (r_resulting_error) { ++ *r_resulting_error = 1.0; ++ } ++// -- GODOT end -- ++ + while (result_count > target_index_count) + { + size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); +@@ -1257,7 +1269,8 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); + assert(new_count < result_count); + +-#if TRACE ++// -- GODOT start -- ++//#if TRACE + float pass_error = 0.f; + for (size_t i = 0; i < edge_collapse_count; ++i) + { +@@ -1267,15 +1280,24 @@ size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, + pass_error = c.error; + } + +- pass_count++; ++ //pass_count++; + worst_error = (worst_error < pass_error) ? pass_error : worst_error; + ++#if TRACE ++ pass_count++; + printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal); + #endif ++// -- GODOT end -- + + result_count = new_count; + } + ++// -- GODOT start -- ++ if (r_resulting_error) { ++ *r_resulting_error = sqrt(worst_error); ++ } ++// -- GODOT end -- ++ + #if TRACE + printf("passes: %d, worst error: %e\n", int(pass_count), worst_error); + #endif diff --git a/thirdparty/meshoptimizer/simplifier.cpp b/thirdparty/meshoptimizer/simplifier.cpp new file mode 100644 index 0000000000..b195a8cb5d --- /dev/null +++ b/thirdparty/meshoptimizer/simplifier.cpp @@ -0,0 +1,1562 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <float.h> +#include <math.h> +#include <string.h> + + +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include <stdio.h> +#endif + +// This work is based on: +// Michael Garland and Paul S. Heckbert. Surface simplification using quadric error metrics. 1997 +// Michael Garland. Quadric-based polygonal surface simplification. 1999 +// Peter Lindstrom. Out-of-Core Simplification of Large Polygonal Models. 2000 +// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003 +// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019 +namespace meshopt +{ + +struct EdgeAdjacency +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildEdgeAdjacency(EdgeAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill edge counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill edge data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = b; + adjacency.data[adjacency.offsets[b]++] = c; + adjacency.data[adjacency.offsets[c]++] = a; + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + +struct PositionHasher +{ + const float* vertex_positions; + size_t vertex_stride_float; + + size_t hash(unsigned int index) const + { + const unsigned int* key = reinterpret_cast<const unsigned int*>(vertex_positions + index * vertex_stride_float); + + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + return (key[0] * 73856093) ^ (key[1] * 19349663) ^ (key[2] * 83492791); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return memcmp(vertex_positions + lhs * vertex_stride_float, vertex_positions + rhs * vertex_stride_float, sizeof(float) * 3) == 0; + } +}; + +static size_t hashBuckets2(size_t count) +{ + size_t buckets = 1; + while (buckets < count) + buckets *= 2; + + return buckets; +} + +template <typename T, typename Hash> +static T* hashLookup2(T* table, size_t buckets, const Hash& hash, const T& key, const T& empty) +{ + assert(buckets > 0); + assert((buckets & (buckets - 1)) == 0); + + size_t hashmod = buckets - 1; + size_t bucket = hash.hash(key) & hashmod; + + for (size_t probe = 0; probe <= hashmod; ++probe) + { + T& item = table[bucket]; + + if (item == empty) + return &item; + + if (hash.equal(item, key)) + return &item; + + // hash collision, quadratic probing + bucket = (bucket + probe + 1) & hashmod; + } + + assert(false && "Hash table is full"); // unreachable + return 0; +} + +static void buildPositionRemap(unsigned int* remap, unsigned int* wedge, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, meshopt_Allocator& allocator) +{ + PositionHasher hasher = {vertex_positions_data, vertex_positions_stride / sizeof(float)}; + + size_t table_size = hashBuckets2(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + memset(table, -1, table_size * sizeof(unsigned int)); + + // build forward remap: for each vertex, which other (canonical) vertex does it map to? + // we use position equivalence for this, and remap vertices to other existing vertices + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int index = unsigned(i); + unsigned int* entry = hashLookup2(table, table_size, hasher, index, ~0u); + + if (*entry == ~0u) + *entry = index; + + remap[index] = *entry; + } + + // build wedge table: for each vertex, which other vertex is the next wedge that also maps to the same vertex? + // entries in table form a (cyclic) wedge loop per vertex; for manifold vertices, wedge[i] == remap[i] == i + for (size_t i = 0; i < vertex_count; ++i) + wedge[i] = unsigned(i); + + for (size_t i = 0; i < vertex_count; ++i) + if (remap[i] != i) + { + unsigned int r = remap[i]; + + wedge[i] = wedge[r]; + wedge[r] = unsigned(i); + } +} + +enum VertexKind +{ + Kind_Manifold, // not on an attribute seam, not on any boundary + Kind_Border, // not on an attribute seam, has exactly two open edges + Kind_Seam, // on an attribute seam with exactly two attribute seam edges + Kind_Complex, // none of the above; these vertices can move as long as all wedges move to the target vertex + Kind_Locked, // none of the above; these vertices can't move + + Kind_Count +}; + +// manifold vertices can collapse onto anything +// border/seam vertices can only be collapsed onto border/seam respectively +// complex vertices can collapse onto complex/locked +// a rule of thumb is that collapsing kind A into kind B preserves the kind B in the target vertex +// for example, while we could collapse Complex into Manifold, this would mean the target vertex isn't Manifold anymore +const unsigned char kCanCollapse[Kind_Count][Kind_Count] = { + {1, 1, 1, 1, 1}, + {0, 1, 0, 0, 0}, + {0, 0, 1, 0, 0}, + {0, 0, 0, 1, 1}, + {0, 0, 0, 0, 0}, +}; + +// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge +// note that for seam edges, the opposite edge isn't present in the attribute-based topology +// but is present if you consider a position-only mesh variant +const unsigned char kHasOpposite[Kind_Count][Kind_Count] = { + {1, 1, 1, 0, 1}, + {1, 0, 1, 0, 0}, + {1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0}, + {1, 0, 1, 0, 0}, +}; + +static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b) +{ + unsigned int count = adjacency.counts[a]; + const unsigned int* data = adjacency.data + adjacency.offsets[a]; + + for (size_t i = 0; i < count; ++i) + if (data[i] == b) + return true; + + return false; +} + +static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge) +{ + memset(loop, -1, vertex_count * sizeof(unsigned int)); + memset(loopback, -1, vertex_count * sizeof(unsigned int)); + + // incoming & outgoing open edges: ~0u if no open edges, i if there are more than 1 + // note that this is the same data as required in loop[] arrays; loop[] data is only valid for border/seam + // but here it's okay to fill the data out for other types of vertices as well + unsigned int* openinc = loopback; + unsigned int* openout = loop; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int vertex = unsigned(i); + + unsigned int count = adjacency.counts[vertex]; + const unsigned int* data = adjacency.data + adjacency.offsets[vertex]; + + for (size_t j = 0; j < count; ++j) + { + unsigned int target = data[j]; + + if (!hasEdge(adjacency, target, vertex)) + { + openinc[target] = (openinc[target] == ~0u) ? vertex : target; + openout[vertex] = (openout[vertex] == ~0u) ? target : vertex; + } + } + } + +#if TRACE + size_t lockedstats[4] = {}; +#define TRACELOCKED(i) lockedstats[i]++; +#else +#define TRACELOCKED(i) (void)0 +#endif + + for (size_t i = 0; i < vertex_count; ++i) + { + if (remap[i] == i) + { + if (wedge[i] == i) + { + // no attribute seam, need to check if it's manifold + unsigned int openi = openinc[i], openo = openout[i]; + + // note: we classify any vertices with no open edges as manifold + // this is technically incorrect - if 4 triangles share an edge, we'll classify vertices as manifold + // it's unclear if this is a problem in practice + if (openi == ~0u && openo == ~0u) + { + result[i] = Kind_Manifold; + } + else if (openi != i && openo != i) + { + result[i] = Kind_Border; + } + else + { + result[i] = Kind_Locked; + TRACELOCKED(0); + } + } + else if (wedge[wedge[i]] == i) + { + // attribute seam; need to distinguish between Seam and Locked + unsigned int w = wedge[i]; + unsigned int openiv = openinc[i], openov = openout[i]; + unsigned int openiw = openinc[w], openow = openout[w]; + + // seam should have one open half-edge for each vertex, and the edges need to "connect" - point to the same vertex post-remap + if (openiv != ~0u && openiv != i && openov != ~0u && openov != i && + openiw != ~0u && openiw != w && openow != ~0u && openow != w) + { + if (remap[openiv] == remap[openow] && remap[openov] == remap[openiw]) + { + result[i] = Kind_Seam; + } + else + { + result[i] = Kind_Locked; + TRACELOCKED(1); + } + } + else + { + result[i] = Kind_Locked; + TRACELOCKED(2); + } + } + else + { + // more than one vertex maps to this one; we don't have classification available + result[i] = Kind_Locked; + TRACELOCKED(3); + } + } + else + { + assert(remap[i] < i); + + result[i] = result[remap[i]]; + } + } + +#if TRACE + printf("locked: many open edges %d, disconnected seam %d, many seam edges %d, many wedges %d\n", + int(lockedstats[0]), int(lockedstats[1]), int(lockedstats[2]), int(lockedstats[3])); +#endif +} + +struct Vector3 +{ + float x, y, z; +}; +// -- GODOT start -- +//static void rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +// -- GODOT end -- + +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; + float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; + + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions_data + i * vertex_stride_float; + + result[i].x = v[0]; + result[i].y = v[1]; + result[i].z = v[2]; + + for (int j = 0; j < 3; ++j) + { + float vj = v[j]; + + minv[j] = minv[j] > vj ? vj : minv[j]; + maxv[j] = maxv[j] < vj ? vj : maxv[j]; + } + } + + float extent = 0.f; + + extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); + extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); + extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); + + float scale = extent == 0 ? 0.f : 1.f / extent; + + for (size_t i = 0; i < vertex_count; ++i) + { + result[i].x = (result[i].x - minv[0]) * scale; + result[i].y = (result[i].y - minv[1]) * scale; + result[i].z = (result[i].z - minv[2]) * scale; + } +// -- GODOT start -- + return extent; +// -- GODOT end -- + +} + +struct Quadric +{ + float a00, a11, a22; + float a10, a20, a21; + float b0, b1, b2, c; + float w; +}; + +struct Collapse +{ + unsigned int v0; + unsigned int v1; + + union + { + unsigned int bidi; + float error; + unsigned int errorui; + }; +}; + +static float normalize(Vector3& v) +{ + float length = sqrtf(v.x * v.x + v.y * v.y + v.z * v.z); + + if (length > 0) + { + v.x /= length; + v.y /= length; + v.z /= length; + } + + return length; +} + +static void quadricAdd(Quadric& Q, const Quadric& R) +{ + Q.a00 += R.a00; + Q.a11 += R.a11; + Q.a22 += R.a22; + Q.a10 += R.a10; + Q.a20 += R.a20; + Q.a21 += R.a21; + Q.b0 += R.b0; + Q.b1 += R.b1; + Q.b2 += R.b2; + Q.c += R.c; + Q.w += R.w; +} + +static float quadricError(const Quadric& Q, const Vector3& v) +{ + float rx = Q.b0; + float ry = Q.b1; + float rz = Q.b2; + + rx += Q.a10 * v.y; + ry += Q.a21 * v.z; + rz += Q.a20 * v.x; + + rx *= 2; + ry *= 2; + rz *= 2; + + rx += Q.a00 * v.x; + ry += Q.a11 * v.y; + rz += Q.a22 * v.z; + + float r = Q.c; + r += rx * v.x; + r += ry * v.y; + r += rz * v.z; + + float s = Q.w == 0.f ? 0.f : 1.f / Q.w; + + return fabsf(r) * s; +} + +static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, float w) +{ + float aw = a * w; + float bw = b * w; + float cw = c * w; + float dw = d * w; + + Q.a00 = a * aw; + Q.a11 = b * bw; + Q.a22 = c * cw; + Q.a10 = a * bw; + Q.a20 = a * cw; + Q.a21 = b * cw; + Q.b0 = a * dw; + Q.b1 = b * dw; + Q.b2 = c * dw; + Q.c = d * dw; + Q.w = w; +} + +static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w) +{ + // we need to encode (x - X) ^ 2 + (y - Y)^2 + (z - Z)^2 into the quadric + Q.a00 = w; + Q.a11 = w; + Q.a22 = w; + Q.a10 = 0.f; + Q.a20 = 0.f; + Q.a21 = 0.f; + Q.b0 = -2.f * x * w; + Q.b1 = -2.f * y * w; + Q.b2 = -2.f * z * w; + Q.c = (x * x + y * y + z * z) * w; + Q.w = w; +} + +static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) +{ + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + + // normal = cross(p1 - p0, p2 - p0) + Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x}; + float area = normalize(normal); + + float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; + + // we use sqrtf(area) so that the error is scaled linearly; this tends to improve silhouettes + quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, sqrtf(area) * weight); +} + +static void quadricFromTriangleEdge(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight) +{ + Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z}; + float length = normalize(p10); + + // p20p = length of projection of p2-p0 onto normalize(p1 - p0) + Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z}; + float p20p = p20.x * p10.x + p20.y * p10.y + p20.z * p10.z; + + // normal = altitude of triangle from point p2 onto edge p1-p0 + Vector3 normal = {p20.x - p10.x * p20p, p20.y - p10.y * p20p, p20.z - p10.z * p20p}; + normalize(normal); + + float distance = normal.x * p0.x + normal.y * p0.y + normal.z * p0.z; + + // note: the weight is scaled linearly with edge length; this has to match the triangle weight + quadricFromPlane(Q, normal.x, normal.y, normal.z, -distance, length * weight); +} + +static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap) +{ + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int i0 = indices[i + 0]; + unsigned int i1 = indices[i + 1]; + unsigned int i2 = indices[i + 2]; + + Quadric Q; + quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], 1.f); + + quadricAdd(vertex_quadrics[remap[i0]], Q); + quadricAdd(vertex_quadrics[remap[i1]], Q); + quadricAdd(vertex_quadrics[remap[i2]], Q); + } +} + +static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop, const unsigned int* loopback) +{ + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[3] = {1, 2, 0}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + // check that either i0 or i1 are border/seam and are on the same edge loop + // note that we need to add the error even for edged that connect e.g. border & locked + // if we don't do that, the adjacent border->border edge won't have correct errors for corners + if (k0 != Kind_Border && k0 != Kind_Seam && k1 != Kind_Border && k1 != Kind_Seam) + continue; + + if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) + continue; + + if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0) + continue; + + // seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges + if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) + continue; + + unsigned int i2 = indices[i + next[next[e]]]; + + // we try hard to maintain border edge geometry; seam edges can move more freely + // due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical + const float kEdgeWeightSeam = 1.f; + const float kEdgeWeightBorder = 10.f; + + float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam; + + Quadric Q; + quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight); + + quadricAdd(vertex_quadrics[remap[i0]], Q); + quadricAdd(vertex_quadrics[remap[i1]], Q); + } + } +} + +static size_t pickEdgeCollapses(Collapse* collapses, const unsigned int* indices, size_t index_count, const unsigned int* remap, const unsigned char* vertex_kind, const unsigned int* loop) +{ + size_t collapse_count = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[3] = {1, 2, 0}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + // this can happen either when input has a zero-length edge, or when we perform collapses for complex + // topology w/seams and collapse a manifold vertex that connects to both wedges onto one of them + // we leave edges like this alone since they may be important for preserving mesh integrity + if (remap[i0] == remap[i1]) + continue; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + // the edge has to be collapsible in at least one direction + if (!(kCanCollapse[k0][k1] | kCanCollapse[k1][k0])) + continue; + + // manifold and seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges + if (kHasOpposite[k0][k1] && remap[i1] > remap[i0]) + continue; + + // two vertices are on a border or a seam, but there's no direct edge between them + // this indicates that they belong to two different edge loops and we should not collapse this edge + // loop[] tracks half edges so we only need to check i0->i1 + if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1) + continue; + + // edge can be collapsed in either direction - we will pick the one with minimum error + // note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional + if (kCanCollapse[k0][k1] & kCanCollapse[k1][k0]) + { + Collapse c = {i0, i1, {/* bidi= */ 1}}; + collapses[collapse_count++] = c; + } + else + { + // edge can only be collapsed in one direction + unsigned int e0 = kCanCollapse[k0][k1] ? i0 : i1; + unsigned int e1 = kCanCollapse[k0][k1] ? i1 : i0; + + Collapse c = {e0, e1, {/* bidi= */ 0}}; + collapses[collapse_count++] = c; + } + } + } + + return collapse_count; +} + +static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const Vector3* vertex_positions, const Quadric* vertex_quadrics, const unsigned int* remap) +{ + for (size_t i = 0; i < collapse_count; ++i) + { + Collapse& c = collapses[i]; + + unsigned int i0 = c.v0; + unsigned int i1 = c.v1; + + // most edges are bidirectional which means we need to evaluate errors for two collapses + // to keep this code branchless we just use the same edge for unidirectional edges + unsigned int j0 = c.bidi ? i1 : i0; + unsigned int j1 = c.bidi ? i0 : i1; + + const Quadric& qi = vertex_quadrics[remap[i0]]; + const Quadric& qj = vertex_quadrics[remap[j0]]; + + float ei = quadricError(qi, vertex_positions[i1]); + float ej = quadricError(qj, vertex_positions[j1]); + + // pick edge direction with minimal error + c.v0 = ei <= ej ? i0 : j0; + c.v1 = ei <= ej ? i1 : j1; + c.error = ei <= ej ? ei : ej; + } +} + +#if TRACE > 1 +static void dumpEdgeCollapses(const Collapse* collapses, size_t collapse_count, const unsigned char* vertex_kind) +{ + size_t ckinds[Kind_Count][Kind_Count] = {}; + float cerrors[Kind_Count][Kind_Count] = {}; + + for (int k0 = 0; k0 < Kind_Count; ++k0) + for (int k1 = 0; k1 < Kind_Count; ++k1) + cerrors[k0][k1] = FLT_MAX; + + for (size_t i = 0; i < collapse_count; ++i) + { + unsigned int i0 = collapses[i].v0; + unsigned int i1 = collapses[i].v1; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + ckinds[k0][k1]++; + cerrors[k0][k1] = (collapses[i].error < cerrors[k0][k1]) ? collapses[i].error : cerrors[k0][k1]; + } + + for (int k0 = 0; k0 < Kind_Count; ++k0) + for (int k1 = 0; k1 < Kind_Count; ++k1) + if (ckinds[k0][k1]) + printf("collapses %d -> %d: %d, min error %e\n", k0, k1, int(ckinds[k0][k1]), cerrors[k0][k1]); +} + +static void dumpLockedCollapses(const unsigned int* indices, size_t index_count, const unsigned char* vertex_kind) +{ + size_t locked_collapses[Kind_Count][Kind_Count] = {}; + + for (size_t i = 0; i < index_count; i += 3) + { + static const int next[3] = {1, 2, 0}; + + for (int e = 0; e < 3; ++e) + { + unsigned int i0 = indices[i + e]; + unsigned int i1 = indices[i + next[e]]; + + unsigned char k0 = vertex_kind[i0]; + unsigned char k1 = vertex_kind[i1]; + + locked_collapses[k0][k1] += !kCanCollapse[k0][k1] && !kCanCollapse[k1][k0]; + } + } + + for (int k0 = 0; k0 < Kind_Count; ++k0) + for (int k1 = 0; k1 < Kind_Count; ++k1) + if (locked_collapses[k0][k1]) + printf("locked collapses %d -> %d: %d\n", k0, k1, int(locked_collapses[k0][k1])); +} +#endif + +static void sortEdgeCollapses(unsigned int* sort_order, const Collapse* collapses, size_t collapse_count) +{ + const int sort_bits = 11; + + // fill histogram for counting sort + unsigned int histogram[1 << sort_bits]; + memset(histogram, 0, sizeof(histogram)); + + for (size_t i = 0; i < collapse_count; ++i) + { + // skip sign bit since error is non-negative + unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + + histogram[key]++; + } + + // compute offsets based on histogram data + size_t histogram_sum = 0; + + for (size_t i = 0; i < 1 << sort_bits; ++i) + { + size_t count = histogram[i]; + histogram[i] = unsigned(histogram_sum); + histogram_sum += count; + } + + assert(histogram_sum == collapse_count); + + // compute sort order based on offsets + for (size_t i = 0; i < collapse_count; ++i) + { + // skip sign bit since error is non-negative + unsigned int key = (collapses[i].errorui << 1) >> (32 - sort_bits); + + sort_order[histogram[key]++] = unsigned(i); + } +} + +static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char* collapse_locked, Quadric* vertex_quadrics, const Collapse* collapses, size_t collapse_count, const unsigned int* collapse_order, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_kind, size_t triangle_collapse_goal, float error_goal, float error_limit) +{ + size_t edge_collapses = 0; + size_t triangle_collapses = 0; + + for (size_t i = 0; i < collapse_count; ++i) + { + const Collapse& c = collapses[collapse_order[i]]; + + if (c.error > error_limit) + break; + + if (c.error > error_goal && triangle_collapses > triangle_collapse_goal / 10) + break; + + if (triangle_collapses >= triangle_collapse_goal) + break; + + unsigned int i0 = c.v0; + unsigned int i1 = c.v1; + + unsigned int r0 = remap[i0]; + unsigned int r1 = remap[i1]; + + // we don't collapse vertices that had source or target vertex involved in a collapse + // it's important to not move the vertices twice since it complicates the tracking/remapping logic + // it's important to not move other vertices towards a moved vertex to preserve error since we don't re-rank collapses mid-pass + if (collapse_locked[r0] | collapse_locked[r1]) + continue; + + assert(collapse_remap[r0] == r0); + assert(collapse_remap[r1] == r1); + + quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]); + + if (vertex_kind[i0] == Kind_Complex) + { + unsigned int v = i0; + + do + { + collapse_remap[v] = r1; + v = wedge[v]; + } while (v != i0); + } + else if (vertex_kind[i0] == Kind_Seam) + { + // remap v0 to v1 and seam pair of v0 to seam pair of v1 + unsigned int s0 = wedge[i0]; + unsigned int s1 = wedge[i1]; + + assert(s0 != i0 && s1 != i1); + assert(wedge[s0] == i0 && wedge[s1] == i1); + + collapse_remap[i0] = i1; + collapse_remap[s0] = s1; + } + else + { + assert(wedge[i0] == i0); + + collapse_remap[i0] = i1; + } + + collapse_locked[r0] = 1; + collapse_locked[r1] = 1; + + // border edges collapse 1 triangle, other edges collapse 2 or more + triangle_collapses += (vertex_kind[i0] == Kind_Border) ? 1 : 2; + edge_collapses++; + } + + return edge_collapses; +} + +static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap) +{ + size_t write = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int v0 = collapse_remap[indices[i + 0]]; + unsigned int v1 = collapse_remap[indices[i + 1]]; + unsigned int v2 = collapse_remap[indices[i + 2]]; + + // we never move the vertex twice during a single pass + assert(collapse_remap[v0] == v0); + assert(collapse_remap[v1] == v1); + assert(collapse_remap[v2] == v2); + + if (v0 != v1 && v0 != v2 && v1 != v2) + { + indices[write + 0] = v0; + indices[write + 1] = v1; + indices[write + 2] = v2; + write += 3; + } + } + + return write; +} + +static void remapEdgeLoops(unsigned int* loop, size_t vertex_count, const unsigned int* collapse_remap) +{ + for (size_t i = 0; i < vertex_count; ++i) + { + if (loop[i] != ~0u) + { + unsigned int l = loop[i]; + unsigned int r = collapse_remap[l]; + + // i == r is a special case when the seam edge is collapsed in a direction opposite to where loop goes + loop[i] = (i == r) ? loop[l] : r; + } + } +} + +struct CellHasher +{ + const unsigned int* vertex_ids; + + size_t hash(unsigned int i) const + { + unsigned int h = vertex_ids[i]; + + // MurmurHash2 finalizer + h ^= h >> 13; + h *= 0x5bd1e995; + h ^= h >> 15; + return h; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return vertex_ids[lhs] == vertex_ids[rhs]; + } +}; + +struct IdHasher +{ + size_t hash(unsigned int id) const + { + unsigned int h = id; + + // MurmurHash2 finalizer + h ^= h >> 13; + h *= 0x5bd1e995; + h ^= h >> 15; + return h; + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + return lhs == rhs; + } +}; + +struct TriangleHasher +{ + unsigned int* indices; + + size_t hash(unsigned int i) const + { + const unsigned int* tri = indices + i * 3; + + // Optimized Spatial Hashing for Collision Detection of Deformable Objects + return (tri[0] * 73856093) ^ (tri[1] * 19349663) ^ (tri[2] * 83492791); + } + + bool equal(unsigned int lhs, unsigned int rhs) const + { + const unsigned int* lt = indices + lhs * 3; + const unsigned int* rt = indices + rhs * 3; + + return lt[0] == rt[0] && lt[1] == rt[1] && lt[2] == rt[2]; + } +}; + +static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size) +{ + assert(grid_size >= 1 && grid_size <= 1024); + float cell_scale = float(grid_size - 1); + + for (size_t i = 0; i < vertex_count; ++i) + { + const Vector3& v = vertex_positions[i]; + + int xi = int(v.x * cell_scale + 0.5f); + int yi = int(v.y * cell_scale + 0.5f); + int zi = int(v.z * cell_scale + 0.5f); + + vertex_ids[i] = (xi << 20) | (yi << 10) | zi; + } +} + +static size_t countTriangles(const unsigned int* vertex_ids, const unsigned int* indices, size_t index_count) +{ + size_t result = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int id0 = vertex_ids[indices[i + 0]]; + unsigned int id1 = vertex_ids[indices[i + 1]]; + unsigned int id2 = vertex_ids[indices[i + 2]]; + + result += (id0 != id1) & (id0 != id2) & (id1 != id2); + } + + return result; +} + +static size_t fillVertexCells(unsigned int* table, size_t table_size, unsigned int* vertex_cells, const unsigned int* vertex_ids, size_t vertex_count) +{ + CellHasher hasher = {vertex_ids}; + + memset(table, -1, table_size * sizeof(unsigned int)); + + size_t result = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int* entry = hashLookup2(table, table_size, hasher, unsigned(i), ~0u); + + if (*entry == ~0u) + { + *entry = unsigned(i); + vertex_cells[i] = unsigned(result++); + } + else + { + vertex_cells[i] = vertex_cells[*entry]; + } + } + + return result; +} + +static size_t countVertexCells(unsigned int* table, size_t table_size, const unsigned int* vertex_ids, size_t vertex_count) +{ + IdHasher hasher; + + memset(table, -1, table_size * sizeof(unsigned int)); + + size_t result = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int id = vertex_ids[i]; + unsigned int* entry = hashLookup2(table, table_size, hasher, id, ~0u); + + result += (*entry == ~0u); + *entry = id; + } + + return result; +} + +static void fillCellQuadrics(Quadric* cell_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* vertex_cells) +{ + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int i0 = indices[i + 0]; + unsigned int i1 = indices[i + 1]; + unsigned int i2 = indices[i + 2]; + + unsigned int c0 = vertex_cells[i0]; + unsigned int c1 = vertex_cells[i1]; + unsigned int c2 = vertex_cells[i2]; + + bool single_cell = (c0 == c1) & (c0 == c2); + + Quadric Q; + quadricFromTriangle(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], single_cell ? 3.f : 1.f); + + if (single_cell) + { + quadricAdd(cell_quadrics[c0], Q); + } + else + { + quadricAdd(cell_quadrics[c0], Q); + quadricAdd(cell_quadrics[c1], Q); + quadricAdd(cell_quadrics[c2], Q); + } + } +} + +static void fillCellQuadrics(Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* vertex_cells) +{ + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int c = vertex_cells[i]; + const Vector3& v = vertex_positions[i]; + + Quadric Q; + quadricFromPoint(Q, v.x, v.y, v.z, 1.f); + + quadricAdd(cell_quadrics[c], Q); + } +} + +static void fillCellRemap(unsigned int* cell_remap, float* cell_errors, size_t cell_count, const unsigned int* vertex_cells, const Quadric* cell_quadrics, const Vector3* vertex_positions, size_t vertex_count) +{ + memset(cell_remap, -1, cell_count * sizeof(unsigned int)); + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned int cell = vertex_cells[i]; + float error = quadricError(cell_quadrics[cell], vertex_positions[i]); + + if (cell_remap[cell] == ~0u || cell_errors[cell] > error) + { + cell_remap[cell] = unsigned(i); + cell_errors[cell] = error; + } + } +} + +static size_t filterTriangles(unsigned int* destination, unsigned int* tritable, size_t tritable_size, const unsigned int* indices, size_t index_count, const unsigned int* vertex_cells, const unsigned int* cell_remap) +{ + TriangleHasher hasher = {destination}; + + memset(tritable, -1, tritable_size * sizeof(unsigned int)); + + size_t result = 0; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int c0 = vertex_cells[indices[i + 0]]; + unsigned int c1 = vertex_cells[indices[i + 1]]; + unsigned int c2 = vertex_cells[indices[i + 2]]; + + if (c0 != c1 && c0 != c2 && c1 != c2) + { + unsigned int a = cell_remap[c0]; + unsigned int b = cell_remap[c1]; + unsigned int c = cell_remap[c2]; + + if (b < a && b < c) + { + unsigned int t = a; + a = b, b = c, c = t; + } + else if (c < a && c < b) + { + unsigned int t = c; + c = b, b = a, a = t; + } + + destination[result * 3 + 0] = a; + destination[result * 3 + 1] = b; + destination[result * 3 + 2] = c; + + unsigned int* entry = hashLookup2(tritable, tritable_size, hasher, unsigned(result), ~0u); + + if (*entry == ~0u) + *entry = unsigned(result++); + } + } + + return result * 3; +} + +static float interpolate(float y, float x0, float y0, float x1, float y1, float x2, float y2) +{ + // three point interpolation from "revenge of interpolation search" paper + float num = (y1 - y) * (x1 - x2) * (x1 - x0) * (y2 - y0); + float den = (y2 - y) * (x1 - x2) * (y0 - y1) + (y0 - y) * (x1 - x0) * (y1 - y2); + return x1 + num / den; +} + +} // namespace meshopt + +#ifndef NDEBUG +unsigned char* meshopt_simplifyDebugKind = 0; +unsigned int* meshopt_simplifyDebugLoop = 0; +unsigned int* meshopt_simplifyDebugLoopBack = 0; +#endif + +// -- GODOT start -- +//size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error) +size_t meshopt_simplify(unsigned int *destination, const unsigned int *indices, size_t index_count, const float *vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float *r_resulting_error) +// -- GODOT end -- +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_index_count <= index_count); + + meshopt_Allocator allocator; + + unsigned int* result = destination; + + // build adjacency information + EdgeAdjacency adjacency = {}; + buildEdgeAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // build position remap that maps each vertex to the one with identical position + unsigned int* remap = allocator.allocate<unsigned int>(vertex_count); + unsigned int* wedge = allocator.allocate<unsigned int>(vertex_count); + buildPositionRemap(remap, wedge, vertex_positions_data, vertex_count, vertex_positions_stride, allocator); + + // classify vertices; vertex kind determines collapse rules, see kCanCollapse + unsigned char* vertex_kind = allocator.allocate<unsigned char>(vertex_count); + unsigned int* loop = allocator.allocate<unsigned int>(vertex_count); + unsigned int* loopback = allocator.allocate<unsigned int>(vertex_count); + classifyVertices(vertex_kind, loop, loopback, vertex_count, adjacency, remap, wedge); + +#if TRACE + size_t unique_positions = 0; + for (size_t i = 0; i < vertex_count; ++i) + unique_positions += remap[i] == i; + + printf("position remap: %d vertices => %d positions\n", int(vertex_count), int(unique_positions)); + + size_t kinds[Kind_Count] = {}; + for (size_t i = 0; i < vertex_count; ++i) + kinds[vertex_kind[i]] += remap[i] == i; + + printf("kinds: manifold %d, border %d, seam %d, complex %d, locked %d\n", + int(kinds[Kind_Manifold]), int(kinds[Kind_Border]), int(kinds[Kind_Seam]), int(kinds[Kind_Complex]), int(kinds[Kind_Locked])); +#endif + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); +// -- GODOT start -- + //rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + float extent = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); +// -- GODOT end -- + + Quadric* vertex_quadrics = allocator.allocate<Quadric>(vertex_count); + memset(vertex_quadrics, 0, vertex_count * sizeof(Quadric)); + + fillFaceQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap); + fillEdgeQuadrics(vertex_quadrics, indices, index_count, vertex_positions, remap, vertex_kind, loop, loopback); + + if (result != indices) + memcpy(result, indices, index_count * sizeof(unsigned int)); + +// -- GODOT start -- +#if TRACE + size_t pass_count = 0; + //float worst_error = 0; +#endif + float worst_error = 0; +// -- GODOT end -- + + Collapse* edge_collapses = allocator.allocate<Collapse>(index_count); + unsigned int* collapse_order = allocator.allocate<unsigned int>(index_count); + unsigned int* collapse_remap = allocator.allocate<unsigned int>(vertex_count); + unsigned char* collapse_locked = allocator.allocate<unsigned char>(vertex_count); + + size_t result_count = index_count; + + // target_error input is linear; we need to adjust it to match quadricError units + float error_limit = target_error * target_error; + +// -- GODOT start -- + if (r_resulting_error) { + *r_resulting_error = 1.0; + } +// -- GODOT end -- + + while (result_count > target_index_count) + { + size_t edge_collapse_count = pickEdgeCollapses(edge_collapses, result, result_count, remap, vertex_kind, loop); + + // no edges can be collapsed any more due to topology restrictions + if (edge_collapse_count == 0) + break; + + rankEdgeCollapses(edge_collapses, edge_collapse_count, vertex_positions, vertex_quadrics, remap); + +#if TRACE > 1 + dumpEdgeCollapses(edge_collapses, edge_collapse_count, vertex_kind); +#endif + + sortEdgeCollapses(collapse_order, edge_collapses, edge_collapse_count); + + // most collapses remove 2 triangles; use this to establish a bound on the pass in terms of error limit + // note that edge_collapse_goal is an estimate; triangle_collapse_goal will be used to actually limit collapses + size_t triangle_collapse_goal = (result_count - target_index_count) / 3; + size_t edge_collapse_goal = triangle_collapse_goal / 2; + + // we limit the error in each pass based on the error of optimal last collapse; since many collapses will be locked + // as they will share vertices with other successfull collapses, we need to increase the acceptable error by this factor + const float kPassErrorBound = 1.5f; + + float error_goal = edge_collapse_goal < edge_collapse_count ? edge_collapses[collapse_order[edge_collapse_goal]].error * kPassErrorBound : FLT_MAX; + + for (size_t i = 0; i < vertex_count; ++i) + collapse_remap[i] = unsigned(i); + + memset(collapse_locked, 0, vertex_count); + + size_t collapses = performEdgeCollapses(collapse_remap, collapse_locked, vertex_quadrics, edge_collapses, edge_collapse_count, collapse_order, remap, wedge, vertex_kind, triangle_collapse_goal, error_goal, error_limit); + + // no edges can be collapsed any more due to hitting the error limit or triangle collapse limit + if (collapses == 0) + break; + + remapEdgeLoops(loop, vertex_count, collapse_remap); + remapEdgeLoops(loopback, vertex_count, collapse_remap); + + size_t new_count = remapIndexBuffer(result, result_count, collapse_remap); + assert(new_count < result_count); + +// -- GODOT start -- +//#if TRACE + float pass_error = 0.f; + for (size_t i = 0; i < edge_collapse_count; ++i) + { + Collapse& c = edge_collapses[collapse_order[i]]; + + if (collapse_remap[c.v0] == c.v1) + pass_error = c.error; + } + + //pass_count++; + worst_error = (worst_error < pass_error) ? pass_error : worst_error; + +#if TRACE + pass_count++; + printf("pass %d: triangles: %d -> %d, collapses: %d/%d (goal: %d), error: %e (limit %e goal %e)\n", int(pass_count), int(result_count / 3), int(new_count / 3), int(collapses), int(edge_collapse_count), int(edge_collapse_goal), pass_error, error_limit, error_goal); +#endif +// -- GODOT end -- + + result_count = new_count; + } + +// -- GODOT start -- + if (r_resulting_error) { + *r_resulting_error = sqrt(worst_error) * extent; + } +// -- GODOT end -- + +#if TRACE + printf("passes: %d, worst error: %e\n", int(pass_count), worst_error); +#endif + +#if TRACE > 1 + dumpLockedCollapses(result, result_count, vertex_kind); +#endif + +#ifndef NDEBUG + if (meshopt_simplifyDebugKind) + memcpy(meshopt_simplifyDebugKind, vertex_kind, vertex_count); + + if (meshopt_simplifyDebugLoop) + memcpy(meshopt_simplifyDebugLoop, loop, vertex_count * sizeof(unsigned int)); + + if (meshopt_simplifyDebugLoopBack) + memcpy(meshopt_simplifyDebugLoopBack, loopback, vertex_count * sizeof(unsigned int)); +#endif + + return result_count; +} + +size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_index_count <= index_count); + + // we expect to get ~2 triangles/vertex in the output + size_t target_cell_count = target_index_count / 6; + + if (target_cell_count == 0) + return 0; + + meshopt_Allocator allocator; + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + + // find the optimal grid size using guided binary search +#if TRACE + printf("source: %d vertices, %d triangles\n", int(vertex_count), int(index_count / 3)); + printf("target: %d cells, %d triangles\n", int(target_cell_count), int(target_index_count / 3)); +#endif + + unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count); + + const int kInterpolationPasses = 5; + + // invariant: # of triangles in min_grid <= target_count + int min_grid = 0; + int max_grid = 1025; + size_t min_triangles = 0; + size_t max_triangles = index_count / 3; + + // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size... + int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f); + + for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass) + { + assert(min_triangles < target_index_count / 3); + assert(max_grid - min_grid > 1); + + // we clamp the prediction of the grid size to make sure that the search converges + int grid_size = next_grid_size; + grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); + size_t triangles = countTriangles(vertex_ids, indices, index_count); + +#if TRACE + printf("pass %d (%s): grid size %d, triangles %d, %s\n", + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + grid_size, int(triangles), + (triangles <= target_index_count / 3) ? "under" : "over"); +#endif + + float tip = interpolate(float(target_index_count / 3), float(min_grid), float(min_triangles), float(grid_size), float(triangles), float(max_grid), float(max_triangles)); + + if (triangles <= target_index_count / 3) + { + min_grid = grid_size; + min_triangles = triangles; + } + else + { + max_grid = grid_size; + max_triangles = triangles; + } + + if (triangles == target_index_count / 3 || max_grid - min_grid <= 1) + break; + + // we start by using interpolation search - it usually converges faster + // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN) + next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2; + } + + if (min_triangles == 0) + return 0; + + // build vertex->cell association by mapping all vertices with the same quantized position to the same cell + size_t table_size = hashBuckets2(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + + unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count); + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); + + // build a quadric for each target cell + Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count); + memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); + + fillCellQuadrics(cell_quadrics, indices, index_count, vertex_positions, vertex_cells); + + // for each target cell, find the vertex with the minimal error + unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count); + float* cell_errors = allocator.allocate<float>(cell_count); + + fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); + + // collapse triangles! + // note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :( + size_t tritable_size = hashBuckets2(min_triangles); + unsigned int* tritable = allocator.allocate<unsigned int>(tritable_size); + + size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap); + assert(write <= target_index_count); + +#if TRACE + printf("result: %d cells, %d triangles (%d unfiltered)\n", int(cell_count), int(write / 3), int(min_triangles)); +#endif + + return write; +} + +size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_vertex_count) +{ + using namespace meshopt; + + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + assert(target_vertex_count <= vertex_count); + + size_t target_cell_count = target_vertex_count; + + if (target_cell_count == 0) + return 0; + + meshopt_Allocator allocator; + + Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count); + rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride); + + // find the optimal grid size using guided binary search +#if TRACE + printf("source: %d vertices\n", int(vertex_count)); + printf("target: %d cells\n", int(target_cell_count)); +#endif + + unsigned int* vertex_ids = allocator.allocate<unsigned int>(vertex_count); + + size_t table_size = hashBuckets2(vertex_count); + unsigned int* table = allocator.allocate<unsigned int>(table_size); + + const int kInterpolationPasses = 5; + + // invariant: # of vertices in min_grid <= target_count + int min_grid = 0; + int max_grid = 1025; + size_t min_vertices = 0; + size_t max_vertices = vertex_count; + + // instead of starting in the middle, let's guess as to what the answer might be! triangle count usually grows as a square of grid size... + int next_grid_size = int(sqrtf(float(target_cell_count)) + 0.5f); + + for (int pass = 0; pass < 10 + kInterpolationPasses; ++pass) + { + assert(min_vertices < target_vertex_count); + assert(max_grid - min_grid > 1); + + // we clamp the prediction of the grid size to make sure that the search converges + int grid_size = next_grid_size; + grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid) ? max_grid - 1 : grid_size; + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size); + size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count); + +#if TRACE + printf("pass %d (%s): grid size %d, vertices %d, %s\n", + pass, (pass == 0) ? "guess" : (pass <= kInterpolationPasses) ? "lerp" : "binary", + grid_size, int(vertices), + (vertices <= target_vertex_count) ? "under" : "over"); +#endif + + float tip = interpolate(float(target_vertex_count), float(min_grid), float(min_vertices), float(grid_size), float(vertices), float(max_grid), float(max_vertices)); + + if (vertices <= target_vertex_count) + { + min_grid = grid_size; + min_vertices = vertices; + } + else + { + max_grid = grid_size; + max_vertices = vertices; + } + + if (vertices == target_vertex_count || max_grid - min_grid <= 1) + break; + + // we start by using interpolation search - it usually converges faster + // however, interpolation search has a worst case of O(N) so we switch to binary search after a few iterations which converges in O(logN) + next_grid_size = (pass < kInterpolationPasses) ? int(tip + 0.5f) : (min_grid + max_grid) / 2; + } + + if (min_vertices == 0) + return 0; + + // build vertex->cell association by mapping all vertices with the same quantized position to the same cell + unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count); + + computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid); + size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count); + + // build a quadric for each target cell + Quadric* cell_quadrics = allocator.allocate<Quadric>(cell_count); + memset(cell_quadrics, 0, cell_count * sizeof(Quadric)); + + fillCellQuadrics(cell_quadrics, vertex_positions, vertex_count, vertex_cells); + + // for each target cell, find the vertex with the minimal error + unsigned int* cell_remap = allocator.allocate<unsigned int>(cell_count); + float* cell_errors = allocator.allocate<float>(cell_count); + + fillCellRemap(cell_remap, cell_errors, cell_count, vertex_cells, cell_quadrics, vertex_positions, vertex_count); + + // copy results to the output + assert(cell_count <= target_vertex_count); + memcpy(destination, cell_remap, sizeof(unsigned int) * cell_count); + +#if TRACE + printf("result: %d cells\n", int(cell_count)); +#endif + + return cell_count; +} diff --git a/thirdparty/meshoptimizer/spatialorder.cpp b/thirdparty/meshoptimizer/spatialorder.cpp new file mode 100644 index 0000000000..b09f80ac6f --- /dev/null +++ b/thirdparty/meshoptimizer/spatialorder.cpp @@ -0,0 +1,194 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <float.h> +#include <string.h> + +// This work is based on: +// Fabian Giesen. Decoding Morton codes. 2009 +namespace meshopt +{ + +// "Insert" two 0 bits after each of the 10 low bits of x +inline unsigned int part1By2(unsigned int x) +{ + x &= 0x000003ff; // x = ---- ---- ---- ---- ---- --98 7654 3210 + x = (x ^ (x << 16)) & 0xff0000ff; // x = ---- --98 ---- ---- ---- ---- 7654 3210 + x = (x ^ (x << 8)) & 0x0300f00f; // x = ---- --98 ---- ---- 7654 ---- ---- 3210 + x = (x ^ (x << 4)) & 0x030c30c3; // x = ---- --98 ---- 76-- --54 ---- 32-- --10 + x = (x ^ (x << 2)) & 0x09249249; // x = ---- 9--8 --7- -6-- 5--4 --3- -2-- 1--0 + return x; +} + +static void computeOrder(unsigned int* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride) +{ + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + float minv[3] = {FLT_MAX, FLT_MAX, FLT_MAX}; + float maxv[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; + + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions_data + i * vertex_stride_float; + + for (int j = 0; j < 3; ++j) + { + float vj = v[j]; + + minv[j] = minv[j] > vj ? vj : minv[j]; + maxv[j] = maxv[j] < vj ? vj : maxv[j]; + } + } + + float extent = 0.f; + + extent = (maxv[0] - minv[0]) < extent ? extent : (maxv[0] - minv[0]); + extent = (maxv[1] - minv[1]) < extent ? extent : (maxv[1] - minv[1]); + extent = (maxv[2] - minv[2]) < extent ? extent : (maxv[2] - minv[2]); + + float scale = extent == 0 ? 0.f : 1.f / extent; + + // generate Morton order based on the position inside a unit cube + for (size_t i = 0; i < vertex_count; ++i) + { + const float* v = vertex_positions_data + i * vertex_stride_float; + + int x = int((v[0] - minv[0]) * scale * 1023.f + 0.5f); + int y = int((v[1] - minv[1]) * scale * 1023.f + 0.5f); + int z = int((v[2] - minv[2]) * scale * 1023.f + 0.5f); + + result[i] = part1By2(x) | (part1By2(y) << 1) | (part1By2(z) << 2); + } +} + +static void computeHistogram(unsigned int (&hist)[1024][3], const unsigned int* data, size_t count) +{ + memset(hist, 0, sizeof(hist)); + + // compute 3 10-bit histograms in parallel + for (size_t i = 0; i < count; ++i) + { + unsigned int id = data[i]; + + hist[(id >> 0) & 1023][0]++; + hist[(id >> 10) & 1023][1]++; + hist[(id >> 20) & 1023][2]++; + } + + unsigned int sumx = 0, sumy = 0, sumz = 0; + + // replace histogram data with prefix histogram sums in-place + for (int i = 0; i < 1024; ++i) + { + unsigned int hx = hist[i][0], hy = hist[i][1], hz = hist[i][2]; + + hist[i][0] = sumx; + hist[i][1] = sumy; + hist[i][2] = sumz; + + sumx += hx; + sumy += hy; + sumz += hz; + } + + assert(sumx == count && sumy == count && sumz == count); +} + +static void radixPass(unsigned int* destination, const unsigned int* source, const unsigned int* keys, size_t count, unsigned int (&hist)[1024][3], int pass) +{ + int bitoff = pass * 10; + + for (size_t i = 0; i < count; ++i) + { + unsigned int id = (keys[source[i]] >> bitoff) & 1023; + + destination[hist[id][pass]++] = source[i]; + } +} + +} // namespace meshopt + +void meshopt_spatialSortRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + meshopt_Allocator allocator; + + unsigned int* keys = allocator.allocate<unsigned int>(vertex_count); + computeOrder(keys, vertex_positions, vertex_count, vertex_positions_stride); + + unsigned int hist[1024][3]; + computeHistogram(hist, keys, vertex_count); + + unsigned int* scratch = allocator.allocate<unsigned int>(vertex_count); + + for (size_t i = 0; i < vertex_count; ++i) + destination[i] = unsigned(i); + + // 3-pass radix sort computes the resulting order into scratch + radixPass(scratch, destination, keys, vertex_count, hist, 0); + radixPass(destination, scratch, keys, vertex_count, hist, 1); + radixPass(scratch, destination, keys, vertex_count, hist, 2); + + // since our remap table is mapping old=>new, we need to reverse it + for (size_t i = 0; i < vertex_count; ++i) + destination[scratch[i]] = unsigned(i); +} + +void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(vertex_positions_stride > 0 && vertex_positions_stride <= 256); + assert(vertex_positions_stride % sizeof(float) == 0); + + (void)vertex_count; + + size_t face_count = index_count / 3; + size_t vertex_stride_float = vertex_positions_stride / sizeof(float); + + meshopt_Allocator allocator; + + float* centroids = allocator.allocate<float>(face_count * 3); + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + const float* va = vertex_positions + a * vertex_stride_float; + const float* vb = vertex_positions + b * vertex_stride_float; + const float* vc = vertex_positions + c * vertex_stride_float; + + centroids[i * 3 + 0] = (va[0] + vb[0] + vc[0]) / 3.f; + centroids[i * 3 + 1] = (va[1] + vb[1] + vc[1]) / 3.f; + centroids[i * 3 + 2] = (va[2] + vb[2] + vc[2]) / 3.f; + } + + unsigned int* remap = allocator.allocate<unsigned int>(face_count); + + meshopt_spatialSortRemap(remap, centroids, face_count, sizeof(float) * 3); + + // support in-order remap + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + unsigned int r = remap[i]; + + destination[r * 3 + 0] = a; + destination[r * 3 + 1] = b; + destination[r * 3 + 2] = c; + } +} diff --git a/thirdparty/meshoptimizer/stripifier.cpp b/thirdparty/meshoptimizer/stripifier.cpp new file mode 100644 index 0000000000..8ce17ef3dc --- /dev/null +++ b/thirdparty/meshoptimizer/stripifier.cpp @@ -0,0 +1,295 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <limits.h> +#include <string.h> + +// This work is based on: +// Francine Evans, Steven Skiena and Amitabh Varshney. Optimizing Triangle Strips for Fast Rendering. 1996 +namespace meshopt +{ + +static unsigned int findStripFirst(const unsigned int buffer[][3], unsigned int buffer_size, const unsigned int* valence) +{ + unsigned int index = 0; + unsigned int iv = ~0u; + + for (size_t i = 0; i < buffer_size; ++i) + { + unsigned int va = valence[buffer[i][0]], vb = valence[buffer[i][1]], vc = valence[buffer[i][2]]; + unsigned int v = (va < vb && va < vc) ? va : (vb < vc) ? vb : vc; + + if (v < iv) + { + index = unsigned(i); + iv = v; + } + } + + return index; +} + +static int findStripNext(const unsigned int buffer[][3], unsigned int buffer_size, unsigned int e0, unsigned int e1) +{ + for (size_t i = 0; i < buffer_size; ++i) + { + unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; + + if (e0 == a && e1 == b) + return (int(i) << 2) | 2; + else if (e0 == b && e1 == c) + return (int(i) << 2) | 0; + else if (e0 == c && e1 == a) + return (int(i) << 2) | 1; + } + + return -1; +} + +} // namespace meshopt + +size_t meshopt_stripify(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int restart_index) +{ + assert(destination != indices); + assert(index_count % 3 == 0); + + using namespace meshopt; + + meshopt_Allocator allocator; + + const size_t buffer_capacity = 8; + + unsigned int buffer[buffer_capacity][3] = {}; + unsigned int buffer_size = 0; + + size_t index_offset = 0; + + unsigned int strip[2] = {}; + unsigned int parity = 0; + + size_t strip_size = 0; + + // compute vertex valence; this is used to prioritize starting triangle for strips + unsigned int* valence = allocator.allocate<unsigned int>(vertex_count); + memset(valence, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + valence[index]++; + } + + int next = -1; + + while (buffer_size > 0 || index_offset < index_count) + { + assert(next < 0 || (size_t(next >> 2) < buffer_size && (next & 3) < 3)); + + // fill triangle buffer + while (buffer_size < buffer_capacity && index_offset < index_count) + { + buffer[buffer_size][0] = indices[index_offset + 0]; + buffer[buffer_size][1] = indices[index_offset + 1]; + buffer[buffer_size][2] = indices[index_offset + 2]; + + buffer_size++; + index_offset += 3; + } + + assert(buffer_size > 0); + + if (next >= 0) + { + unsigned int i = next >> 2; + unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; + unsigned int v = buffer[i][next & 3]; + + // ordered removal from the buffer + memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); + buffer_size--; + + // update vertex valences for strip start heuristic + valence[a]--; + valence[b]--; + valence[c]--; + + // find next triangle (note that edge order flips on every iteration) + // in some cases we need to perform a swap to pick a different outgoing triangle edge + // for [a b c], the default strip edge is [b c], but we might want to use [a c] + int cont = findStripNext(buffer, buffer_size, parity ? strip[1] : v, parity ? v : strip[1]); + int swap = cont < 0 ? findStripNext(buffer, buffer_size, parity ? v : strip[0], parity ? strip[0] : v) : -1; + + if (cont < 0 && swap >= 0) + { + // [a b c] => [a b a c] + destination[strip_size++] = strip[0]; + destination[strip_size++] = v; + + // next strip has same winding + // ? a b => b a v + strip[1] = v; + + next = swap; + } + else + { + // emit the next vertex in the strip + destination[strip_size++] = v; + + // next strip has flipped winding + strip[0] = strip[1]; + strip[1] = v; + parity ^= 1; + + next = cont; + } + } + else + { + // if we didn't find anything, we need to find the next new triangle + // we use a heuristic to maximize the strip length + unsigned int i = findStripFirst(buffer, buffer_size, &valence[0]); + unsigned int a = buffer[i][0], b = buffer[i][1], c = buffer[i][2]; + + // ordered removal from the buffer + memmove(buffer[i], buffer[i + 1], (buffer_size - i - 1) * sizeof(buffer[0])); + buffer_size--; + + // update vertex valences for strip start heuristic + valence[a]--; + valence[b]--; + valence[c]--; + + // we need to pre-rotate the triangle so that we will find a match in the existing buffer on the next iteration + int ea = findStripNext(buffer, buffer_size, c, b); + int eb = findStripNext(buffer, buffer_size, a, c); + int ec = findStripNext(buffer, buffer_size, b, a); + + // in some cases we can have several matching edges; since we can pick any edge, we pick the one with the smallest + // triangle index in the buffer. this reduces the effect of stripification on ACMR and additionally - for unclear + // reasons - slightly improves the stripification efficiency + int mine = INT_MAX; + mine = (ea >= 0 && mine > ea) ? ea : mine; + mine = (eb >= 0 && mine > eb) ? eb : mine; + mine = (ec >= 0 && mine > ec) ? ec : mine; + + if (ea == mine) + { + // keep abc + next = ea; + } + else if (eb == mine) + { + // abc -> bca + unsigned int t = a; + a = b, b = c, c = t; + + next = eb; + } + else if (ec == mine) + { + // abc -> cab + unsigned int t = c; + c = b, b = a, a = t; + + next = ec; + } + + if (restart_index) + { + if (strip_size) + destination[strip_size++] = restart_index; + + destination[strip_size++] = a; + destination[strip_size++] = b; + destination[strip_size++] = c; + + // new strip always starts with the same edge winding + strip[0] = b; + strip[1] = c; + parity = 1; + } + else + { + if (strip_size) + { + // connect last strip using degenerate triangles + destination[strip_size++] = strip[1]; + destination[strip_size++] = a; + } + + // note that we may need to flip the emitted triangle based on parity + // we always end up with outgoing edge "cb" in the end + unsigned int e0 = parity ? c : b; + unsigned int e1 = parity ? b : c; + + destination[strip_size++] = a; + destination[strip_size++] = e0; + destination[strip_size++] = e1; + + strip[0] = e0; + strip[1] = e1; + parity ^= 1; + } + } + } + + return strip_size; +} + +size_t meshopt_stripifyBound(size_t index_count) +{ + assert(index_count % 3 == 0); + + // worst case without restarts is 2 degenerate indices and 3 indices per triangle + // worst case with restarts is 1 restart index and 3 indices per triangle + return (index_count / 3) * 5; +} + +size_t meshopt_unstripify(unsigned int* destination, const unsigned int* indices, size_t index_count, unsigned int restart_index) +{ + assert(destination != indices); + + size_t offset = 0; + size_t start = 0; + + for (size_t i = 0; i < index_count; ++i) + { + if (restart_index && indices[i] == restart_index) + { + start = i + 1; + } + else if (i - start >= 2) + { + unsigned int a = indices[i - 2], b = indices[i - 1], c = indices[i]; + + // flip winding for odd triangles + if ((i - start) & 1) + { + unsigned int t = a; + a = b, b = t; + } + + // although we use restart indices, strip swaps still produce degenerate triangles, so skip them + if (a != b && a != c && b != c) + { + destination[offset + 0] = a; + destination[offset + 1] = b; + destination[offset + 2] = c; + offset += 3; + } + } + } + + return offset; +} + +size_t meshopt_unstripifyBound(size_t index_count) +{ + assert(index_count == 0 || index_count >= 3); + + return (index_count == 0) ? 0 : (index_count - 2) * 3; +} diff --git a/thirdparty/meshoptimizer/vcacheanalyzer.cpp b/thirdparty/meshoptimizer/vcacheanalyzer.cpp new file mode 100644 index 0000000000..3682743820 --- /dev/null +++ b/thirdparty/meshoptimizer/vcacheanalyzer.cpp @@ -0,0 +1,73 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +meshopt_VertexCacheStatistics meshopt_analyzeVertexCache(const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size, unsigned int warp_size, unsigned int primgroup_size) +{ + assert(index_count % 3 == 0); + assert(cache_size >= 3); + assert(warp_size == 0 || warp_size >= 3); + + meshopt_Allocator allocator; + + meshopt_VertexCacheStatistics result = {}; + + unsigned int warp_offset = 0; + unsigned int primgroup_offset = 0; + + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + unsigned int timestamp = cache_size + 1; + + for (size_t i = 0; i < index_count; i += 3) + { + unsigned int a = indices[i + 0], b = indices[i + 1], c = indices[i + 2]; + assert(a < vertex_count && b < vertex_count && c < vertex_count); + + bool ac = (timestamp - cache_timestamps[a]) > cache_size; + bool bc = (timestamp - cache_timestamps[b]) > cache_size; + bool cc = (timestamp - cache_timestamps[c]) > cache_size; + + // flush cache if triangle doesn't fit into warp or into the primitive buffer + if ((primgroup_size && primgroup_offset == primgroup_size) || (warp_size && warp_offset + ac + bc + cc > warp_size)) + { + result.warps_executed += warp_offset > 0; + + warp_offset = 0; + primgroup_offset = 0; + + // reset cache + timestamp += cache_size + 1; + } + + // update cache and add vertices to warp + for (int j = 0; j < 3; ++j) + { + unsigned int index = indices[i + j]; + + if (timestamp - cache_timestamps[index] > cache_size) + { + cache_timestamps[index] = timestamp++; + result.vertices_transformed++; + warp_offset++; + } + } + + primgroup_offset++; + } + + size_t unique_vertex_count = 0; + + for (size_t i = 0; i < vertex_count; ++i) + unique_vertex_count += cache_timestamps[i] > 0; + + result.warps_executed += warp_offset > 0; + + result.acmr = index_count == 0 ? 0 : float(result.vertices_transformed) / float(index_count / 3); + result.atvr = unique_vertex_count == 0 ? 0 : float(result.vertices_transformed) / float(unique_vertex_count); + + return result; +} diff --git a/thirdparty/meshoptimizer/vcacheoptimizer.cpp b/thirdparty/meshoptimizer/vcacheoptimizer.cpp new file mode 100644 index 0000000000..fb8ade4b77 --- /dev/null +++ b/thirdparty/meshoptimizer/vcacheoptimizer.cpp @@ -0,0 +1,473 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +// This work is based on: +// Tom Forsyth. Linear-Speed Vertex Cache Optimisation. 2006 +// Pedro Sander, Diego Nehab and Joshua Barczak. Fast Triangle Reordering for Vertex Locality and Reduced Overdraw. 2007 +namespace meshopt +{ + +const size_t kCacheSizeMax = 16; +const size_t kValenceMax = 8; + +struct VertexScoreTable +{ + float cache[1 + kCacheSizeMax]; + float live[1 + kValenceMax]; +}; + +// Tuned to minimize the ACMR of a GPU that has a cache profile similar to NVidia and AMD +static const VertexScoreTable kVertexScoreTable = { + {0.f, 0.779f, 0.791f, 0.789f, 0.981f, 0.843f, 0.726f, 0.847f, 0.882f, 0.867f, 0.799f, 0.642f, 0.613f, 0.600f, 0.568f, 0.372f, 0.234f}, + {0.f, 0.995f, 0.713f, 0.450f, 0.404f, 0.059f, 0.005f, 0.147f, 0.006f}, +}; + +// Tuned to minimize the encoded index buffer size +static const VertexScoreTable kVertexScoreTableStrip = { + {0.f, 1.000f, 1.000f, 1.000f, 0.453f, 0.561f, 0.490f, 0.459f, 0.179f, 0.526f, 0.000f, 0.227f, 0.184f, 0.490f, 0.112f, 0.050f, 0.131f}, + {0.f, 0.956f, 0.786f, 0.577f, 0.558f, 0.618f, 0.549f, 0.499f, 0.489f}, +}; + +struct TriangleAdjacency +{ + unsigned int* counts; + unsigned int* offsets; + unsigned int* data; +}; + +static void buildTriangleAdjacency(TriangleAdjacency& adjacency, const unsigned int* indices, size_t index_count, size_t vertex_count, meshopt_Allocator& allocator) +{ + size_t face_count = index_count / 3; + + // allocate arrays + adjacency.counts = allocator.allocate<unsigned int>(vertex_count); + adjacency.offsets = allocator.allocate<unsigned int>(vertex_count); + adjacency.data = allocator.allocate<unsigned int>(index_count); + + // fill triangle counts + memset(adjacency.counts, 0, vertex_count * sizeof(unsigned int)); + + for (size_t i = 0; i < index_count; ++i) + { + assert(indices[i] < vertex_count); + + adjacency.counts[indices[i]]++; + } + + // fill offset table + unsigned int offset = 0; + + for (size_t i = 0; i < vertex_count; ++i) + { + adjacency.offsets[i] = offset; + offset += adjacency.counts[i]; + } + + assert(offset == index_count); + + // fill triangle data + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0], b = indices[i * 3 + 1], c = indices[i * 3 + 2]; + + adjacency.data[adjacency.offsets[a]++] = unsigned(i); + adjacency.data[adjacency.offsets[b]++] = unsigned(i); + adjacency.data[adjacency.offsets[c]++] = unsigned(i); + } + + // fix offsets that have been disturbed by the previous pass + for (size_t i = 0; i < vertex_count; ++i) + { + assert(adjacency.offsets[i] >= adjacency.counts[i]); + + adjacency.offsets[i] -= adjacency.counts[i]; + } +} + +static unsigned int getNextVertexDeadEnd(const unsigned int* dead_end, unsigned int& dead_end_top, unsigned int& input_cursor, const unsigned int* live_triangles, size_t vertex_count) +{ + // check dead-end stack + while (dead_end_top) + { + unsigned int vertex = dead_end[--dead_end_top]; + + if (live_triangles[vertex] > 0) + return vertex; + } + + // input order + while (input_cursor < vertex_count) + { + if (live_triangles[input_cursor] > 0) + return input_cursor; + + ++input_cursor; + } + + return ~0u; +} + +static unsigned int getNextVertexNeighbour(const unsigned int* next_candidates_begin, const unsigned int* next_candidates_end, const unsigned int* live_triangles, const unsigned int* cache_timestamps, unsigned int timestamp, unsigned int cache_size) +{ + unsigned int best_candidate = ~0u; + int best_priority = -1; + + for (const unsigned int* next_candidate = next_candidates_begin; next_candidate != next_candidates_end; ++next_candidate) + { + unsigned int vertex = *next_candidate; + + // otherwise we don't need to process it + if (live_triangles[vertex] > 0) + { + int priority = 0; + + // will it be in cache after fanning? + if (2 * live_triangles[vertex] + timestamp - cache_timestamps[vertex] <= cache_size) + { + priority = timestamp - cache_timestamps[vertex]; // position in cache + } + + if (priority > best_priority) + { + best_candidate = vertex; + best_priority = priority; + } + } + } + + return best_candidate; +} + +static float vertexScore(const VertexScoreTable* table, int cache_position, unsigned int live_triangles) +{ + assert(cache_position >= -1 && cache_position < int(kCacheSizeMax)); + + unsigned int live_triangles_clamped = live_triangles < kValenceMax ? live_triangles : kValenceMax; + + return table->cache[1 + cache_position] + table->live[live_triangles_clamped]; +} + +static unsigned int getNextTriangleDeadEnd(unsigned int& input_cursor, const unsigned char* emitted_flags, size_t face_count) +{ + // input order + while (input_cursor < face_count) + { + if (!emitted_flags[input_cursor]) + return input_cursor; + + ++input_cursor; + } + + return ~0u; +} + +} // namespace meshopt + +void meshopt_optimizeVertexCacheTable(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const meshopt::VertexScoreTable* table) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + unsigned int cache_size = 16; + assert(cache_size <= kCacheSizeMax); + + size_t face_count = index_count / 3; + + // build adjacency information + TriangleAdjacency adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // live triangle counts + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + // emitted flags + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + // compute initial vertex scores + float* vertex_scores = allocator.allocate<float>(vertex_count); + + for (size_t i = 0; i < vertex_count; ++i) + vertex_scores[i] = vertexScore(table, -1, live_triangles[i]); + + // compute triangle scores + float* triangle_scores = allocator.allocate<float>(face_count); + + for (size_t i = 0; i < face_count; ++i) + { + unsigned int a = indices[i * 3 + 0]; + unsigned int b = indices[i * 3 + 1]; + unsigned int c = indices[i * 3 + 2]; + + triangle_scores[i] = vertex_scores[a] + vertex_scores[b] + vertex_scores[c]; + } + + unsigned int cache_holder[2 * (kCacheSizeMax + 3)]; + unsigned int* cache = cache_holder; + unsigned int* cache_new = cache_holder + kCacheSizeMax + 3; + size_t cache_count = 0; + + unsigned int current_triangle = 0; + unsigned int input_cursor = 1; + + unsigned int output_triangle = 0; + + while (current_triangle != ~0u) + { + assert(output_triangle < face_count); + + unsigned int a = indices[current_triangle * 3 + 0]; + unsigned int b = indices[current_triangle * 3 + 1]; + unsigned int c = indices[current_triangle * 3 + 2]; + + // output indices + destination[output_triangle * 3 + 0] = a; + destination[output_triangle * 3 + 1] = b; + destination[output_triangle * 3 + 2] = c; + output_triangle++; + + // update emitted flags + emitted_flags[current_triangle] = true; + triangle_scores[current_triangle] = 0; + + // new triangle + size_t cache_write = 0; + cache_new[cache_write++] = a; + cache_new[cache_write++] = b; + cache_new[cache_write++] = c; + + // old triangles + for (size_t i = 0; i < cache_count; ++i) + { + unsigned int index = cache[i]; + + if (index != a && index != b && index != c) + { + cache_new[cache_write++] = index; + } + } + + unsigned int* cache_temp = cache; + cache = cache_new, cache_new = cache_temp; + cache_count = cache_write > cache_size ? cache_size : cache_write; + + // update live triangle counts + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // remove emitted triangle from adjacency data + // this makes sure that we spend less time traversing these lists on subsequent iterations + for (size_t k = 0; k < 3; ++k) + { + unsigned int index = indices[current_triangle * 3 + k]; + + unsigned int* neighbours = &adjacency.data[0] + adjacency.offsets[index]; + size_t neighbours_size = adjacency.counts[index]; + + for (size_t i = 0; i < neighbours_size; ++i) + { + unsigned int tri = neighbours[i]; + + if (tri == current_triangle) + { + neighbours[i] = neighbours[neighbours_size - 1]; + adjacency.counts[index]--; + break; + } + } + } + + unsigned int best_triangle = ~0u; + float best_score = 0; + + // update cache positions, vertex scores and triangle scores, and find next best triangle + for (size_t i = 0; i < cache_write; ++i) + { + unsigned int index = cache[i]; + + int cache_position = i >= cache_size ? -1 : int(i); + + // update vertex score + float score = vertexScore(table, cache_position, live_triangles[index]); + float score_diff = score - vertex_scores[index]; + + vertex_scores[index] = score; + + // update scores of vertex triangles + const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[index]; + const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[index]; + + for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + { + unsigned int tri = *it; + assert(!emitted_flags[tri]); + + float tri_score = triangle_scores[tri] + score_diff; + assert(tri_score > 0); + + if (best_score < tri_score) + { + best_triangle = tri; + best_score = tri_score; + } + + triangle_scores[tri] = tri_score; + } + } + + // step through input triangles in order if we hit a dead-end + current_triangle = best_triangle; + + if (current_triangle == ~0u) + { + current_triangle = getNextTriangleDeadEnd(input_cursor, &emitted_flags[0], face_count); + } + } + + assert(input_cursor == face_count); + assert(output_triangle == face_count); +} + +void meshopt_optimizeVertexCache(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTable); +} + +void meshopt_optimizeVertexCacheStrip(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + meshopt_optimizeVertexCacheTable(destination, indices, index_count, vertex_count, &meshopt::kVertexScoreTableStrip); +} + +void meshopt_optimizeVertexCacheFifo(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, unsigned int cache_size) +{ + using namespace meshopt; + + assert(index_count % 3 == 0); + assert(cache_size >= 3); + + meshopt_Allocator allocator; + + // guard for empty meshes + if (index_count == 0 || vertex_count == 0) + return; + + // support in-place optimization + if (destination == indices) + { + unsigned int* indices_copy = allocator.allocate<unsigned int>(index_count); + memcpy(indices_copy, indices, index_count * sizeof(unsigned int)); + indices = indices_copy; + } + + size_t face_count = index_count / 3; + + // build adjacency information + TriangleAdjacency adjacency = {}; + buildTriangleAdjacency(adjacency, indices, index_count, vertex_count, allocator); + + // live triangle counts + unsigned int* live_triangles = allocator.allocate<unsigned int>(vertex_count); + memcpy(live_triangles, adjacency.counts, vertex_count * sizeof(unsigned int)); + + // cache time stamps + unsigned int* cache_timestamps = allocator.allocate<unsigned int>(vertex_count); + memset(cache_timestamps, 0, vertex_count * sizeof(unsigned int)); + + // dead-end stack + unsigned int* dead_end = allocator.allocate<unsigned int>(index_count); + unsigned int dead_end_top = 0; + + // emitted flags + unsigned char* emitted_flags = allocator.allocate<unsigned char>(face_count); + memset(emitted_flags, 0, face_count); + + unsigned int current_vertex = 0; + + unsigned int timestamp = cache_size + 1; + unsigned int input_cursor = 1; // vertex to restart from in case of dead-end + + unsigned int output_triangle = 0; + + while (current_vertex != ~0u) + { + const unsigned int* next_candidates_begin = &dead_end[0] + dead_end_top; + + // emit all vertex neighbours + const unsigned int* neighbours_begin = &adjacency.data[0] + adjacency.offsets[current_vertex]; + const unsigned int* neighbours_end = neighbours_begin + adjacency.counts[current_vertex]; + + for (const unsigned int* it = neighbours_begin; it != neighbours_end; ++it) + { + unsigned int triangle = *it; + + if (!emitted_flags[triangle]) + { + unsigned int a = indices[triangle * 3 + 0], b = indices[triangle * 3 + 1], c = indices[triangle * 3 + 2]; + + // output indices + destination[output_triangle * 3 + 0] = a; + destination[output_triangle * 3 + 1] = b; + destination[output_triangle * 3 + 2] = c; + output_triangle++; + + // update dead-end stack + dead_end[dead_end_top + 0] = a; + dead_end[dead_end_top + 1] = b; + dead_end[dead_end_top + 2] = c; + dead_end_top += 3; + + // update live triangle counts + live_triangles[a]--; + live_triangles[b]--; + live_triangles[c]--; + + // update cache info + // if vertex is not in cache, put it in cache + if (timestamp - cache_timestamps[a] > cache_size) + cache_timestamps[a] = timestamp++; + + if (timestamp - cache_timestamps[b] > cache_size) + cache_timestamps[b] = timestamp++; + + if (timestamp - cache_timestamps[c] > cache_size) + cache_timestamps[c] = timestamp++; + + // update emitted flags + emitted_flags[triangle] = true; + } + } + + // next candidates are the ones we pushed to dead-end stack just now + const unsigned int* next_candidates_end = &dead_end[0] + dead_end_top; + + // get next vertex + current_vertex = getNextVertexNeighbour(next_candidates_begin, next_candidates_end, &live_triangles[0], &cache_timestamps[0], timestamp, cache_size); + + if (current_vertex == ~0u) + { + current_vertex = getNextVertexDeadEnd(&dead_end[0], dead_end_top, input_cursor, &live_triangles[0], vertex_count); + } + } + + assert(output_triangle == face_count); +} diff --git a/thirdparty/meshoptimizer/vertexcodec.cpp b/thirdparty/meshoptimizer/vertexcodec.cpp new file mode 100644 index 0000000000..784c9a13db --- /dev/null +++ b/thirdparty/meshoptimizer/vertexcodec.cpp @@ -0,0 +1,1265 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +// The block below auto-detects SIMD ISA that can be used on the target platform +#ifndef MESHOPTIMIZER_NO_SIMD + +// The SIMD implementation requires SSSE3, which can be enabled unconditionally through compiler settings +#if defined(__AVX__) || defined(__SSSE3__) +#define SIMD_SSE +#endif + +// An experimental implementation using AVX512 instructions; it's only enabled when AVX512 is enabled through compiler settings +#if defined(__AVX512VBMI2__) && defined(__AVX512VBMI__) && defined(__AVX512VL__) && defined(__POPCNT__) +#undef SIMD_SSE +#define SIMD_AVX +#endif + +// MSVC supports compiling SSSE3 code regardless of compile options; we use a cpuid-based scalar fallback +#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +#define SIMD_SSE +#define SIMD_FALLBACK +#endif + +// GCC 4.9+ and clang 3.8+ support targeting SIMD ISA from individual functions; we use a cpuid-based scalar fallback +#if !defined(SIMD_SSE) && !defined(SIMD_AVX) && ((defined(__clang__) && __clang_major__ * 100 + __clang_minor__ >= 308) || (defined(__GNUC__) && __GNUC__ * 100 + __GNUC_MINOR__ >= 409)) && (defined(__i386__) || defined(__x86_64__)) +#define SIMD_SSE +#define SIMD_FALLBACK +#define SIMD_TARGET __attribute__((target("ssse3"))) +#endif + +// GCC/clang define these when NEON support is available +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define SIMD_NEON +#endif + +// On MSVC, we assume that ARM builds always target NEON-capable devices +#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +#define SIMD_NEON +#endif + +// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +#if defined(__wasm_simd128__) +#define SIMD_WASM +#endif + +#ifndef SIMD_TARGET +#define SIMD_TARGET +#endif + +#endif // !MESHOPTIMIZER_NO_SIMD + +#ifdef SIMD_SSE +#include <tmmintrin.h> +#endif + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +#ifdef _MSC_VER +#include <intrin.h> // __cpuid +#else +#include <cpuid.h> // __cpuid +#endif +#endif + +#ifdef SIMD_AVX +#include <immintrin.h> +#endif + +#ifdef SIMD_NEON +#if defined(_MSC_VER) && defined(_M_ARM64) +#include <arm64_neon.h> +#else +#include <arm_neon.h> +#endif +#endif + +#ifdef SIMD_WASM +#include <wasm_simd128.h> +#endif + +#ifndef TRACE +#define TRACE 0 +#endif + +#if TRACE +#include <stdio.h> +#endif + +#ifdef SIMD_WASM +#define wasmx_splat_v32x4(v, i) wasm_v32x4_shuffle(v, v, i, i, i, i) +#define wasmx_unpacklo_v8x16(a, b) wasm_v8x16_shuffle(a, b, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23) +#define wasmx_unpackhi_v8x16(a, b) wasm_v8x16_shuffle(a, b, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31) +#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +#define wasmx_unpacklo_v64x2(a, b) wasm_v64x2_shuffle(a, b, 0, 2) +#define wasmx_unpackhi_v64x2(a, b) wasm_v64x2_shuffle(a, b, 1, 3) +#endif + +namespace meshopt +{ + +const unsigned char kVertexHeader = 0xa0; + +static int gEncodeVertexVersion = 0; + +const size_t kVertexBlockSizeBytes = 8192; +const size_t kVertexBlockMaxSize = 256; +const size_t kByteGroupSize = 16; +const size_t kByteGroupDecodeLimit = 24; +const size_t kTailMaxSize = 32; + +static size_t getVertexBlockSize(size_t vertex_size) +{ + // make sure the entire block fits into the scratch buffer + size_t result = kVertexBlockSizeBytes / vertex_size; + + // align to byte group size; we encode each byte as a byte group + // if vertex block is misaligned, it results in wasted bytes, so just truncate the block size + result &= ~(kByteGroupSize - 1); + + return (result < kVertexBlockMaxSize) ? result : kVertexBlockMaxSize; +} + +inline unsigned char zigzag8(unsigned char v) +{ + return ((signed char)(v) >> 7) ^ (v << 1); +} + +inline unsigned char unzigzag8(unsigned char v) +{ + return -(v & 1) ^ (v >> 1); +} + +#if TRACE +struct Stats +{ + size_t size; + size_t header; + size_t bitg[4]; + size_t bitb[4]; +}; + +Stats* bytestats; +Stats vertexstats[256]; +#endif + +static bool encodeBytesGroupZero(const unsigned char* buffer) +{ + for (size_t i = 0; i < kByteGroupSize; ++i) + if (buffer[i]) + return false; + + return true; +} + +static size_t encodeBytesGroupMeasure(const unsigned char* buffer, int bits) +{ + assert(bits >= 1 && bits <= 8); + + if (bits == 1) + return encodeBytesGroupZero(buffer) ? 0 : size_t(-1); + + if (bits == 8) + return kByteGroupSize; + + size_t result = kByteGroupSize * bits / 8; + + unsigned char sentinel = (1 << bits) - 1; + + for (size_t i = 0; i < kByteGroupSize; ++i) + result += buffer[i] >= sentinel; + + return result; +} + +static unsigned char* encodeBytesGroup(unsigned char* data, const unsigned char* buffer, int bits) +{ + assert(bits >= 1 && bits <= 8); + + if (bits == 1) + return data; + + if (bits == 8) + { + memcpy(data, buffer, kByteGroupSize); + return data + kByteGroupSize; + } + + size_t byte_size = 8 / bits; + assert(kByteGroupSize % byte_size == 0); + + // fixed portion: bits bits for each value + // variable portion: full byte for each out-of-range value (using 1...1 as sentinel) + unsigned char sentinel = (1 << bits) - 1; + + for (size_t i = 0; i < kByteGroupSize; i += byte_size) + { + unsigned char byte = 0; + + for (size_t k = 0; k < byte_size; ++k) + { + unsigned char enc = (buffer[i + k] >= sentinel) ? sentinel : buffer[i + k]; + + byte <<= bits; + byte |= enc; + } + + *data++ = byte; + } + + for (size_t i = 0; i < kByteGroupSize; ++i) + { + if (buffer[i] >= sentinel) + { + *data++ = buffer[i]; + } + } + + return data; +} + +static unsigned char* encodeBytes(unsigned char* data, unsigned char* data_end, const unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + + unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + memset(header, 0, header_size); + + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + int best_bits = 8; + size_t best_size = encodeBytesGroupMeasure(buffer + i, 8); + + for (int bits = 1; bits < 8; bits *= 2) + { + size_t size = encodeBytesGroupMeasure(buffer + i, bits); + + if (size < best_size) + { + best_bits = bits; + best_size = size; + } + } + + int bitslog2 = (best_bits == 1) ? 0 : (best_bits == 2) ? 1 : (best_bits == 4) ? 2 : 3; + assert((1 << bitslog2) == best_bits); + + size_t header_offset = i / kByteGroupSize; + + header[header_offset / 4] |= bitslog2 << ((header_offset % 4) * 2); + + unsigned char* next = encodeBytesGroup(data, buffer + i, best_bits); + + assert(data + best_size == next); + data = next; + +#if TRACE > 1 + bytestats->bitg[bitslog2]++; + bytestats->bitb[bitslog2] += best_size; +#endif + } + +#if TRACE > 1 + bytestats->header += header_size; +#endif + + return data; +} + +static unsigned char* encodeVertexBlock(unsigned char* data, unsigned char* data_end, const unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize]; + assert(sizeof(buffer) % kByteGroupSize == 0); + + // we sometimes encode elements we didn't fill when rounding to kByteGroupSize + memset(buffer, 0, sizeof(buffer)); + + for (size_t k = 0; k < vertex_size; ++k) + { + size_t vertex_offset = k; + + unsigned char p = last_vertex[k]; + + for (size_t i = 0; i < vertex_count; ++i) + { + buffer[i] = zigzag8(vertex_data[vertex_offset] - p); + + p = vertex_data[vertex_offset]; + + vertex_offset += vertex_size; + } + +#if TRACE + const unsigned char* olddata = data; + bytestats = &vertexstats[k]; +#endif + + data = encodeBytes(data, data_end, buffer, (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1)); + if (!data) + return 0; + +#if TRACE + bytestats = 0; + vertexstats[k].size += data - olddata; +#endif + } + + memcpy(last_vertex, &vertex_data[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} + +#if defined(SIMD_FALLBACK) || (!defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_AVX)) +static const unsigned char* decodeBytesGroup(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ +#define READ() byte = *data++ +#define NEXT(bits) enc = byte >> (8 - bits), byte <<= bits, encv = *data_var, *buffer++ = (enc == (1 << bits) - 1) ? encv : enc, data_var += (enc == (1 << bits) - 1) + + unsigned char byte, enc, encv; + const unsigned char* data_var; + + switch (bitslog2) + { + case 0: + memset(buffer, 0, kByteGroupSize); + return data; + case 1: + data_var = data + 4; + + // 4 groups with 4 2-bit values in each byte + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + READ(), NEXT(2), NEXT(2), NEXT(2), NEXT(2); + + return data_var; + case 2: + data_var = data + 8; + + // 8 groups with 2 4-bit values in each byte + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + READ(), NEXT(4), NEXT(4); + + return data_var; + case 3: + memcpy(buffer, data, kByteGroupSize); + return data + kByteGroupSize; + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } + +#undef READ +#undef NEXT +} + +static const unsigned char* decodeBytes(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + + const unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + for (size_t i = 0; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + size_t header_offset = i / kByteGroupSize; + + int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; + + data = decodeBytesGroup(data, buffer + i, bitslog2); + } + + return data; +} + +static const unsigned char* decodeVertexBlock(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + for (size_t k = 0; k < vertex_size; ++k) + { + data = decodeBytes(data, data_end, buffer, vertex_count_aligned); + if (!data) + return 0; + + size_t vertex_offset = k; + + unsigned char p = last_vertex[k]; + + for (size_t i = 0; i < vertex_count; ++i) + { + unsigned char v = unzigzag8(buffer[i]) + p; + + transposed[vertex_offset] = v; + p = v; + + vertex_offset += vertex_size; + } + } + + memcpy(vertex_data, transposed, vertex_count * vertex_size); + + memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +static unsigned char kDecodeBytesGroupShuffle[256][8]; +static unsigned char kDecodeBytesGroupCount[256]; + +#ifdef __wasm__ +__attribute__((cold)) // this saves 500 bytes in the output binary - we don't need to vectorize this loop! +#endif +static bool +decodeBytesGroupBuildTables() +{ + for (int mask = 0; mask < 256; ++mask) + { + unsigned char shuffle[8]; + unsigned char count = 0; + + for (int i = 0; i < 8; ++i) + { + int maski = (mask >> i) & 1; + shuffle[i] = maski ? count : 0x80; + count += (unsigned char)(maski); + } + + memcpy(kDecodeBytesGroupShuffle[mask], shuffle, 8); + kDecodeBytesGroupCount[mask] = count; + } + + return true; +} + +static bool gDecodeBytesGroupInitialized = decodeBytesGroupBuildTables(); +#endif + +#ifdef SIMD_SSE +SIMD_TARGET +static __m128i decodeShuffleMask(unsigned char mask0, unsigned char mask1) +{ + __m128i sm0 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&kDecodeBytesGroupShuffle[mask0])); + __m128i sm1 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(&kDecodeBytesGroupShuffle[mask1])); + __m128i sm1off = _mm_set1_epi8(kDecodeBytesGroupCount[mask0]); + + __m128i sm1r = _mm_add_epi8(sm1, sm1off); + + return _mm_unpacklo_epi64(sm0, sm1r); +} + +SIMD_TARGET +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + __m128i result = _mm_setzero_si128(); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data; + } + + case 1: + { +#ifdef __GNUC__ + typedef int __attribute__((aligned(1))) unaligned_int; +#else + typedef int unaligned_int; +#endif + + __m128i sel2 = _mm_cvtsi32_si128(*reinterpret_cast<const unaligned_int*>(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 4)); + + __m128i sel22 = _mm_unpacklo_epi8(_mm_srli_epi16(sel2, 4), sel2); + __m128i sel2222 = _mm_unpacklo_epi8(_mm_srli_epi16(sel22, 2), sel22); + __m128i sel = _mm_and_si128(sel2222, _mm_set1_epi8(3)); + + __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(3)); + int mask16 = _mm_movemask_epi8(mask); + unsigned char mask0 = (unsigned char)(mask16 & 255); + unsigned char mask1 = (unsigned char)(mask16 >> 8); + + __m128i shuf = decodeShuffleMask(mask0, mask1); + + __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + __m128i sel4 = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data + 8)); + + __m128i sel44 = _mm_unpacklo_epi8(_mm_srli_epi16(sel4, 4), sel4); + __m128i sel = _mm_and_si128(sel44, _mm_set1_epi8(15)); + + __m128i mask = _mm_cmpeq_epi8(sel, _mm_set1_epi8(15)); + int mask16 = _mm_movemask_epi8(mask); + unsigned char mask0 = (unsigned char)(mask16 & 255); + unsigned char mask1 = (unsigned char)(mask16 >> 8); + + __m128i shuf = decodeShuffleMask(mask0, mask1); + + __m128i result = _mm_or_si128(_mm_shuffle_epi8(rest, shuf), _mm_andnot_si128(mask, sel)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + __m128i result = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_AVX +static const __m128i decodeBytesGroupConfig[] = { + _mm_set1_epi8(3), + _mm_set1_epi8(15), + _mm_setr_epi8(6, 4, 2, 0, 14, 12, 10, 8, 22, 20, 18, 16, 30, 28, 26, 24), + _mm_setr_epi8(4, 0, 12, 8, 20, 16, 28, 24, 36, 32, 44, 40, 52, 48, 60, 56), +}; + +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + __m128i result = _mm_setzero_si128(); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data; + } + + case 1: + case 2: + { + const unsigned char* skip = data + (bitslog2 << 2); + + __m128i selb = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(data)); + __m128i rest = _mm_loadu_si128(reinterpret_cast<const __m128i*>(skip)); + + __m128i sent = decodeBytesGroupConfig[bitslog2 - 1]; + __m128i ctrl = decodeBytesGroupConfig[bitslog2 + 1]; + + __m128i selw = _mm_shuffle_epi32(selb, 0x44); + __m128i sel = _mm_and_si128(sent, _mm_multishift_epi64_epi8(ctrl, selw)); + __mmask16 mask16 = _mm_cmp_epi8_mask(sel, sent, _MM_CMPINT_EQ); + + __m128i result = _mm_mask_expand_epi8(sel, mask16, rest); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return skip + _mm_popcnt_u32(mask16); + } + + case 3: + { + __m128i result = _mm_loadu_si128(reinterpret_cast<const __m128i*>(data)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(buffer), result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_NEON +static uint8x16_t shuffleBytes(unsigned char mask0, unsigned char mask1, uint8x8_t rest0, uint8x8_t rest1) +{ + uint8x8_t sm0 = vld1_u8(kDecodeBytesGroupShuffle[mask0]); + uint8x8_t sm1 = vld1_u8(kDecodeBytesGroupShuffle[mask1]); + + uint8x8_t r0 = vtbl1_u8(rest0, sm0); + uint8x8_t r1 = vtbl1_u8(rest1, sm1); + + return vcombine_u8(r0, r1); +} + +static void neonMoveMask(uint8x16_t mask, unsigned char& mask0, unsigned char& mask1) +{ + static const unsigned char byte_mask_data[16] = {1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128}; + + uint8x16_t byte_mask = vld1q_u8(byte_mask_data); + uint8x16_t masked = vandq_u8(mask, byte_mask); + +#ifdef __aarch64__ + // aarch64 has horizontal sums; MSVC doesn't expose this via arm64_neon.h so this path is exclusive to clang/gcc + mask0 = vaddv_u8(vget_low_u8(masked)); + mask1 = vaddv_u8(vget_high_u8(masked)); +#else + // we need horizontal sums of each half of masked, which can be done in 3 steps (yielding sums of sizes 2, 4, 8) + uint8x8_t sum1 = vpadd_u8(vget_low_u8(masked), vget_high_u8(masked)); + uint8x8_t sum2 = vpadd_u8(sum1, sum1); + uint8x8_t sum3 = vpadd_u8(sum2, sum2); + + mask0 = vget_lane_u8(sum3, 0); + mask1 = vget_lane_u8(sum3, 1); +#endif +} + +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + switch (bitslog2) + { + case 0: + { + uint8x16_t result = vdupq_n_u8(0); + + vst1q_u8(buffer, result); + + return data; + } + + case 1: + { + uint8x8_t sel2 = vld1_u8(data); + uint8x8_t sel22 = vzip_u8(vshr_n_u8(sel2, 4), sel2).val[0]; + uint8x8x2_t sel2222 = vzip_u8(vshr_n_u8(sel22, 2), sel22); + uint8x16_t sel = vandq_u8(vcombine_u8(sel2222.val[0], sel2222.val[1]), vdupq_n_u8(3)); + + uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(3)); + unsigned char mask0, mask1; + neonMoveMask(mask, mask0, mask1); + + uint8x8_t rest0 = vld1_u8(data + 4); + uint8x8_t rest1 = vld1_u8(data + 4 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); + + vst1q_u8(buffer, result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + uint8x8_t sel4 = vld1_u8(data); + uint8x8x2_t sel44 = vzip_u8(vshr_n_u8(sel4, 4), vand_u8(sel4, vdup_n_u8(15))); + uint8x16_t sel = vcombine_u8(sel44.val[0], sel44.val[1]); + + uint8x16_t mask = vceqq_u8(sel, vdupq_n_u8(15)); + unsigned char mask0, mask1; + neonMoveMask(mask, mask0, mask1); + + uint8x8_t rest0 = vld1_u8(data + 8); + uint8x8_t rest1 = vld1_u8(data + 8 + kDecodeBytesGroupCount[mask0]); + + uint8x16_t result = vbslq_u8(mask, shuffleBytes(mask0, mask1, rest0, rest1), sel); + + vst1q_u8(buffer, result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + uint8x16_t result = vld1q_u8(data); + + vst1q_u8(buffer, result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#ifdef SIMD_WASM +SIMD_TARGET +static v128_t decodeShuffleMask(unsigned char mask0, unsigned char mask1) +{ + v128_t sm0 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask0]); + v128_t sm1 = wasm_v128_load(&kDecodeBytesGroupShuffle[mask1]); + + v128_t sm1off = wasm_v128_load(&kDecodeBytesGroupCount[mask0]); + sm1off = wasm_v8x16_shuffle(sm1off, sm1off, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + + v128_t sm1r = wasm_i8x16_add(sm1, sm1off); + + return wasmx_unpacklo_v64x2(sm0, sm1r); +} + +SIMD_TARGET +static void wasmMoveMask(v128_t mask, unsigned char& mask0, unsigned char& mask1) +{ + v128_t mask_0 = wasm_v32x4_shuffle(mask, mask, 0, 2, 1, 3); + + uint64_t mask_1a = wasm_i64x2_extract_lane(mask_0, 0) & 0x0804020108040201ull; + uint64_t mask_1b = wasm_i64x2_extract_lane(mask_0, 1) & 0x8040201080402010ull; + + // TODO: This can use v8x16_bitmask in the future + uint64_t mask_2 = mask_1a | mask_1b; + uint64_t mask_4 = mask_2 | (mask_2 >> 16); + uint64_t mask_8 = mask_4 | (mask_4 >> 8); + + mask0 = uint8_t(mask_8); + mask1 = uint8_t(mask_8 >> 32); +} + +SIMD_TARGET +static const unsigned char* decodeBytesGroupSimd(const unsigned char* data, unsigned char* buffer, int bitslog2) +{ + unsigned char byte, enc, encv; + const unsigned char* data_var; + + switch (bitslog2) + { + case 0: + { + v128_t result = wasm_i8x16_splat(0); + + wasm_v128_store(buffer, result); + + return data; + } + + case 1: + { + v128_t sel2 = wasm_v128_load(data); + v128_t rest = wasm_v128_load(data + 4); + + v128_t sel22 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel2, 4), sel2); + v128_t sel2222 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel22, 2), sel22); + v128_t sel = wasm_v128_and(sel2222, wasm_i8x16_splat(3)); + + v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(3)); + + unsigned char mask0, mask1; + wasmMoveMask(mask, mask0, mask1); + + v128_t shuf = decodeShuffleMask(mask0, mask1); + + v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + + return data + 4 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 2: + { + v128_t sel4 = wasm_v128_load(data); + v128_t rest = wasm_v128_load(data + 8); + + v128_t sel44 = wasmx_unpacklo_v8x16(wasm_i16x8_shr(sel4, 4), sel4); + v128_t sel = wasm_v128_and(sel44, wasm_i8x16_splat(15)); + + v128_t mask = wasm_i8x16_eq(sel, wasm_i8x16_splat(15)); + + unsigned char mask0, mask1; + wasmMoveMask(mask, mask0, mask1); + + v128_t shuf = decodeShuffleMask(mask0, mask1); + + v128_t result = wasm_v128_bitselect(wasm_v8x16_swizzle(rest, shuf), sel, mask); + + wasm_v128_store(buffer, result); + + return data + 8 + kDecodeBytesGroupCount[mask0] + kDecodeBytesGroupCount[mask1]; + } + + case 3: + { + v128_t result = wasm_v128_load(data); + + wasm_v128_store(buffer, result); + + return data + 16; + } + + default: + assert(!"Unexpected bit length"); // unreachable since bitslog2 is a 2-bit value + return data; + } +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_AVX) +SIMD_TARGET +static void transpose8(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3) +{ + __m128i t0 = _mm_unpacklo_epi8(x0, x1); + __m128i t1 = _mm_unpackhi_epi8(x0, x1); + __m128i t2 = _mm_unpacklo_epi8(x2, x3); + __m128i t3 = _mm_unpackhi_epi8(x2, x3); + + x0 = _mm_unpacklo_epi16(t0, t2); + x1 = _mm_unpackhi_epi16(t0, t2); + x2 = _mm_unpacklo_epi16(t1, t3); + x3 = _mm_unpackhi_epi16(t1, t3); +} + +SIMD_TARGET +static __m128i unzigzag8(__m128i v) +{ + __m128i xl = _mm_sub_epi8(_mm_setzero_si128(), _mm_and_si128(v, _mm_set1_epi8(1))); + __m128i xr = _mm_and_si128(_mm_srli_epi16(v, 1), _mm_set1_epi8(127)); + + return _mm_xor_si128(xl, xr); +} +#endif + +#ifdef SIMD_NEON +static void transpose8(uint8x16_t& x0, uint8x16_t& x1, uint8x16_t& x2, uint8x16_t& x3) +{ + uint8x16x2_t t01 = vzipq_u8(x0, x1); + uint8x16x2_t t23 = vzipq_u8(x2, x3); + + uint16x8x2_t x01 = vzipq_u16(vreinterpretq_u16_u8(t01.val[0]), vreinterpretq_u16_u8(t23.val[0])); + uint16x8x2_t x23 = vzipq_u16(vreinterpretq_u16_u8(t01.val[1]), vreinterpretq_u16_u8(t23.val[1])); + + x0 = vreinterpretq_u8_u16(x01.val[0]); + x1 = vreinterpretq_u8_u16(x01.val[1]); + x2 = vreinterpretq_u8_u16(x23.val[0]); + x3 = vreinterpretq_u8_u16(x23.val[1]); +} + +static uint8x16_t unzigzag8(uint8x16_t v) +{ + uint8x16_t xl = vreinterpretq_u8_s8(vnegq_s8(vreinterpretq_s8_u8(vandq_u8(v, vdupq_n_u8(1))))); + uint8x16_t xr = vshrq_n_u8(v, 1); + + return veorq_u8(xl, xr); +} +#endif + +#ifdef SIMD_WASM +SIMD_TARGET +static void transpose8(v128_t& x0, v128_t& x1, v128_t& x2, v128_t& x3) +{ + v128_t t0 = wasmx_unpacklo_v8x16(x0, x1); + v128_t t1 = wasmx_unpackhi_v8x16(x0, x1); + v128_t t2 = wasmx_unpacklo_v8x16(x2, x3); + v128_t t3 = wasmx_unpackhi_v8x16(x2, x3); + + x0 = wasmx_unpacklo_v16x8(t0, t2); + x1 = wasmx_unpackhi_v16x8(t0, t2); + x2 = wasmx_unpacklo_v16x8(t1, t3); + x3 = wasmx_unpackhi_v16x8(t1, t3); +} + +SIMD_TARGET +static v128_t unzigzag8(v128_t v) +{ + v128_t xl = wasm_i8x16_neg(wasm_v128_and(v, wasm_i8x16_splat(1))); + v128_t xr = wasm_u8x16_shr(v, 1); + + return wasm_v128_xor(xl, xr); +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) +SIMD_TARGET +static const unsigned char* decodeBytesSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* buffer, size_t buffer_size) +{ + assert(buffer_size % kByteGroupSize == 0); + assert(kByteGroupSize == 16); + + const unsigned char* header = data; + + // round number of groups to 4 to get number of header bytes + size_t header_size = (buffer_size / kByteGroupSize + 3) / 4; + + if (size_t(data_end - data) < header_size) + return 0; + + data += header_size; + + size_t i = 0; + + // fast-path: process 4 groups at a time, do a shared bounds check - each group reads <=24b + for (; i + kByteGroupSize * 4 <= buffer_size && size_t(data_end - data) >= kByteGroupDecodeLimit * 4; i += kByteGroupSize * 4) + { + size_t header_offset = i / kByteGroupSize; + unsigned char header_byte = header[header_offset / 4]; + + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 0, (header_byte >> 0) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 1, (header_byte >> 2) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 2, (header_byte >> 4) & 3); + data = decodeBytesGroupSimd(data, buffer + i + kByteGroupSize * 3, (header_byte >> 6) & 3); + } + + // slow-path: process remaining groups + for (; i < buffer_size; i += kByteGroupSize) + { + if (size_t(data_end - data) < kByteGroupDecodeLimit) + return 0; + + size_t header_offset = i / kByteGroupSize; + + int bitslog2 = (header[header_offset / 4] >> ((header_offset % 4) * 2)) & 3; + + data = decodeBytesGroupSimd(data, buffer + i, bitslog2); + } + + return data; +} + +SIMD_TARGET +static const unsigned char* decodeVertexBlockSimd(const unsigned char* data, const unsigned char* data_end, unsigned char* vertex_data, size_t vertex_count, size_t vertex_size, unsigned char last_vertex[256]) +{ + assert(vertex_count > 0 && vertex_count <= kVertexBlockMaxSize); + + unsigned char buffer[kVertexBlockMaxSize * 4]; + unsigned char transposed[kVertexBlockSizeBytes]; + + size_t vertex_count_aligned = (vertex_count + kByteGroupSize - 1) & ~(kByteGroupSize - 1); + + for (size_t k = 0; k < vertex_size; k += 4) + { + for (size_t j = 0; j < 4; ++j) + { + data = decodeBytesSimd(data, data_end, buffer + j * vertex_count_aligned, vertex_count_aligned); + if (!data) + return 0; + } + +#if defined(SIMD_SSE) || defined(SIMD_AVX) +#define TEMP __m128i +#define PREP() __m128i pi = _mm_cvtsi32_si128(*reinterpret_cast<const int*>(last_vertex + k)) +#define LOAD(i) __m128i r##i = _mm_loadu_si128(reinterpret_cast<const __m128i*>(buffer + j + i * vertex_count_aligned)) +#define GRP4(i) t0 = _mm_shuffle_epi32(r##i, 0), t1 = _mm_shuffle_epi32(r##i, 1), t2 = _mm_shuffle_epi32(r##i, 2), t3 = _mm_shuffle_epi32(r##i, 3) +#define FIXD(i) t##i = pi = _mm_add_epi8(pi, t##i) +#define SAVE(i) *reinterpret_cast<int*>(savep) = _mm_cvtsi128_si32(t##i), savep += vertex_size +#endif + +#ifdef SIMD_NEON +#define TEMP uint8x8_t +#define PREP() uint8x8_t pi = vreinterpret_u8_u32(vld1_lane_u32(reinterpret_cast<uint32_t*>(last_vertex + k), vdup_n_u32(0), 0)) +#define LOAD(i) uint8x16_t r##i = vld1q_u8(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = vget_low_u8(r##i), t1 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t0), 1)), t2 = vget_high_u8(r##i), t3 = vreinterpret_u8_u32(vdup_lane_u32(vreinterpret_u32_u8(t2), 1)) +#define FIXD(i) t##i = pi = vadd_u8(pi, t##i) +#define SAVE(i) vst1_lane_u32(reinterpret_cast<uint32_t*>(savep), vreinterpret_u32_u8(t##i), 0), savep += vertex_size +#endif + +#ifdef SIMD_WASM +#define TEMP v128_t +#define PREP() v128_t pi = wasm_v128_load(last_vertex + k) +#define LOAD(i) v128_t r##i = wasm_v128_load(buffer + j + i * vertex_count_aligned) +#define GRP4(i) t0 = wasmx_splat_v32x4(r##i, 0), t1 = wasmx_splat_v32x4(r##i, 1), t2 = wasmx_splat_v32x4(r##i, 2), t3 = wasmx_splat_v32x4(r##i, 3) +#define FIXD(i) t##i = pi = wasm_i8x16_add(pi, t##i) +#define SAVE(i) *reinterpret_cast<int*>(savep) = wasm_i32x4_extract_lane(t##i, 0), savep += vertex_size +#endif + + PREP(); + + unsigned char* savep = transposed + k; + + for (size_t j = 0; j < vertex_count_aligned; j += 16) + { + LOAD(0); + LOAD(1); + LOAD(2); + LOAD(3); + + r0 = unzigzag8(r0); + r1 = unzigzag8(r1); + r2 = unzigzag8(r2); + r3 = unzigzag8(r3); + + transpose8(r0, r1, r2, r3); + + TEMP t0, t1, t2, t3; + + GRP4(0); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(1); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(2); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + + GRP4(3); + FIXD(0), FIXD(1), FIXD(2), FIXD(3); + SAVE(0), SAVE(1), SAVE(2), SAVE(3); + +#undef TEMP +#undef PREP +#undef LOAD +#undef GRP4 +#undef FIXD +#undef SAVE + } + } + + memcpy(vertex_data, transposed, vertex_count * vertex_size); + + memcpy(last_vertex, &transposed[vertex_size * (vertex_count - 1)], vertex_size); + + return data; +} +#endif + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) +static unsigned int getCpuFeatures() +{ + int cpuinfo[4] = {}; +#ifdef _MSC_VER + __cpuid(cpuinfo, 1); +#else + __cpuid(1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]); +#endif + return cpuinfo[2]; +} + +unsigned int cpuid = getCpuFeatures(); +#endif + +} // namespace meshopt + +size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + +#if TRACE + memset(vertexstats, 0, sizeof(vertexstats)); +#endif + + const unsigned char* vertex_data = static_cast<const unsigned char*>(vertices); + + unsigned char* data = buffer; + unsigned char* data_end = buffer + buffer_size; + + if (size_t(data_end - data) < 1 + vertex_size) + return 0; + + int version = gEncodeVertexVersion; + + *data++ = (unsigned char)(kVertexHeader | version); + + unsigned char first_vertex[256] = {}; + if (vertex_count > 0) + memcpy(first_vertex, vertex_data, vertex_size); + + unsigned char last_vertex[256] = {}; + memcpy(last_vertex, first_vertex, vertex_size); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + + size_t vertex_offset = 0; + + while (vertex_offset < vertex_count) + { + size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; + + data = encodeVertexBlock(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + if (!data) + return 0; + + vertex_offset += block_size; + } + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + if (size_t(data_end - data) < tail_size) + return 0; + + // write first vertex to the end of the stream and pad it to 32 bytes; this is important to simplify bounds checks in decoder + if (vertex_size < kTailMaxSize) + { + memset(data, 0, kTailMaxSize - vertex_size); + data += kTailMaxSize - vertex_size; + } + + memcpy(data, first_vertex, vertex_size); + data += vertex_size; + + assert(data >= buffer + tail_size); + assert(data <= buffer + buffer_size); + +#if TRACE + size_t total_size = data - buffer; + + for (size_t k = 0; k < vertex_size; ++k) + { + const Stats& vsk = vertexstats[k]; + + printf("%2d: %d bytes\t%.1f%%\t%.1f bpv", int(k), int(vsk.size), double(vsk.size) / double(total_size) * 100, double(vsk.size) / double(vertex_count) * 8); + +#if TRACE > 1 + printf("\t\thdr %d bytes\tbit0 %d (%d bytes)\tbit1 %d (%d bytes)\tbit2 %d (%d bytes)\tbit3 %d (%d bytes)", + int(vsk.header), + int(vsk.bitg[0]), int(vsk.bitb[0]), + int(vsk.bitg[1]), int(vsk.bitb[1]), + int(vsk.bitg[2]), int(vsk.bitb[2]), + int(vsk.bitg[3]), int(vsk.bitb[3])); +#endif + + printf("\n"); + } +#endif + + return data - buffer; +} + +size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + size_t vertex_block_count = (vertex_count + vertex_block_size - 1) / vertex_block_size; + + size_t vertex_block_header_size = (vertex_block_size / kByteGroupSize + 3) / 4; + size_t vertex_block_data_size = vertex_block_size; + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + return 1 + vertex_block_count * vertex_size * (vertex_block_header_size + vertex_block_data_size) + tail_size; +} + +void meshopt_encodeVertexVersion(int version) +{ + assert(unsigned(version) <= 0); + + meshopt::gEncodeVertexVersion = version; +} + +int meshopt_decodeVertexBuffer(void* destination, size_t vertex_count, size_t vertex_size, const unsigned char* buffer, size_t buffer_size) +{ + using namespace meshopt; + + assert(vertex_size > 0 && vertex_size <= 256); + assert(vertex_size % 4 == 0); + + const unsigned char* (*decode)(const unsigned char*, const unsigned char*, unsigned char*, size_t, size_t, unsigned char[256]) = 0; + +#if defined(SIMD_SSE) && defined(SIMD_FALLBACK) + decode = (cpuid & (1 << 9)) ? decodeVertexBlockSimd : decodeVertexBlock; +#elif defined(SIMD_SSE) || defined(SIMD_AVX) || defined(SIMD_NEON) || defined(SIMD_WASM) + decode = decodeVertexBlockSimd; +#else + decode = decodeVertexBlock; +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + assert(gDecodeBytesGroupInitialized); + (void)gDecodeBytesGroupInitialized; +#endif + + unsigned char* vertex_data = static_cast<unsigned char*>(destination); + + const unsigned char* data = buffer; + const unsigned char* data_end = buffer + buffer_size; + + if (size_t(data_end - data) < 1 + vertex_size) + return -2; + + unsigned char data_header = *data++; + + if ((data_header & 0xf0) != kVertexHeader) + return -1; + + int version = data_header & 0x0f; + if (version > 0) + return -1; + + unsigned char last_vertex[256]; + memcpy(last_vertex, data_end - vertex_size, vertex_size); + + size_t vertex_block_size = getVertexBlockSize(vertex_size); + + size_t vertex_offset = 0; + + while (vertex_offset < vertex_count) + { + size_t block_size = (vertex_offset + vertex_block_size < vertex_count) ? vertex_block_size : vertex_count - vertex_offset; + + data = decode(data, data_end, vertex_data + vertex_offset * vertex_size, block_size, vertex_size, last_vertex); + if (!data) + return -2; + + vertex_offset += block_size; + } + + size_t tail_size = vertex_size < kTailMaxSize ? kTailMaxSize : vertex_size; + + if (size_t(data_end - data) != tail_size) + return -3; + + return 0; +} + +#undef SIMD_NEON +#undef SIMD_SSE +#undef SIMD_AVX +#undef SIMD_WASM +#undef SIMD_FALLBACK +#undef SIMD_TARGET diff --git a/thirdparty/meshoptimizer/vertexfilter.cpp b/thirdparty/meshoptimizer/vertexfilter.cpp new file mode 100644 index 0000000000..e7ad2c9d39 --- /dev/null +++ b/thirdparty/meshoptimizer/vertexfilter.cpp @@ -0,0 +1,825 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <math.h> + +// The block below auto-detects SIMD ISA that can be used on the target platform +#ifndef MESHOPTIMIZER_NO_SIMD + +// The SIMD implementation requires SSE2, which can be enabled unconditionally through compiler settings +#if defined(__SSE2__) +#define SIMD_SSE +#endif + +// MSVC supports compiling SSE2 code regardless of compile options; we assume all 32-bit CPUs support SSE2 +#if !defined(SIMD_SSE) && defined(_MSC_VER) && !defined(__clang__) && (defined(_M_IX86) || defined(_M_X64)) +#define SIMD_SSE +#endif + +// GCC/clang define these when NEON support is available +#if defined(__ARM_NEON__) || defined(__ARM_NEON) +#define SIMD_NEON +#endif + +// On MSVC, we assume that ARM builds always target NEON-capable devices +#if !defined(SIMD_NEON) && defined(_MSC_VER) && (defined(_M_ARM) || defined(_M_ARM64)) +#define SIMD_NEON +#endif + +// When targeting Wasm SIMD we can't use runtime cpuid checks so we unconditionally enable SIMD +#if defined(__wasm_simd128__) +#define SIMD_WASM +#endif + +#endif // !MESHOPTIMIZER_NO_SIMD + +#ifdef SIMD_SSE +#include <emmintrin.h> +#include <stdint.h> +#endif + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#ifdef SIMD_NEON +#if defined(_MSC_VER) && defined(_M_ARM64) +#include <arm64_neon.h> +#else +#include <arm_neon.h> +#endif +#endif + +#ifdef SIMD_WASM +#include <wasm_simd128.h> +#endif + +#ifdef SIMD_WASM +#define wasmx_unpacklo_v16x8(a, b) wasm_v16x8_shuffle(a, b, 0, 8, 1, 9, 2, 10, 3, 11) +#define wasmx_unpackhi_v16x8(a, b) wasm_v16x8_shuffle(a, b, 4, 12, 5, 13, 6, 14, 7, 15) +#define wasmx_unziplo_v32x4(a, b) wasm_v32x4_shuffle(a, b, 0, 2, 4, 6) +#define wasmx_unziphi_v32x4(a, b) wasm_v32x4_shuffle(a, b, 1, 3, 5, 7) +#endif + +namespace meshopt +{ + +#if !defined(SIMD_SSE) && !defined(SIMD_NEON) && !defined(SIMD_WASM) +template <typename T> +static void decodeFilterOct(T* data, size_t count) +{ + const float max = float((1 << (sizeof(T) * 8 - 1)) - 1); + + for (size_t i = 0; i < count; ++i) + { + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + float x = float(data[i * 4 + 0]); + float y = float(data[i * 4 + 1]); + float z = float(data[i * 4 + 2]) - fabsf(x) - fabsf(y); + + // fixup octahedral coordinates for z<0 + float t = (z >= 0.f) ? 0.f : z; + + x += (x >= 0.f) ? t : -t; + y += (y >= 0.f) ? t : -t; + + // compute normal length & scale + float l = sqrtf(x * x + y * y + z * z); + float s = max / l; + + // rounded signed float->int + int xf = int(x * s + (x >= 0.f ? 0.5f : -0.5f)); + int yf = int(y * s + (y >= 0.f ? 0.5f : -0.5f)); + int zf = int(z * s + (z >= 0.f ? 0.5f : -0.5f)); + + data[i * 4 + 0] = T(xf); + data[i * 4 + 1] = T(yf); + data[i * 4 + 2] = T(zf); + } +} + +static void decodeFilterQuat(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; ++i) + { + // recover scale from the high byte of the component + int sf = data[i * 4 + 3] | 3; + float ss = scale / float(sf); + + // convert x/y/z to [-1..1] (scaled...) + float x = float(data[i * 4 + 0]) * ss; + float y = float(data[i * 4 + 1]) * ss; + float z = float(data[i * 4 + 2]) * ss; + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + float ww = 1.f - x * x - y * y - z * z; + float w = sqrtf(ww >= 0.f ? ww : 0.f); + + // rounded signed float->int + int xf = int(x * 32767.f + (x >= 0.f ? 0.5f : -0.5f)); + int yf = int(y * 32767.f + (y >= 0.f ? 0.5f : -0.5f)); + int zf = int(z * 32767.f + (z >= 0.f ? 0.5f : -0.5f)); + int wf = int(w * 32767.f + 0.5f); + + int qc = data[i * 4 + 3] & 3; + + // output order is dictated by input index + data[i * 4 + ((qc + 1) & 3)] = short(xf); + data[i * 4 + ((qc + 2) & 3)] = short(yf); + data[i * 4 + ((qc + 3) & 3)] = short(zf); + data[i * 4 + ((qc + 0) & 3)] = short(wf); + } +} + +static void decodeFilterExp(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; ++i) + { + unsigned int v = data[i]; + + // decode mantissa and exponent + int m = int(v << 8) >> 8; + int e = int(v) >> 24; + + union + { + float f; + unsigned int ui; + } u; + + // optimized version of ldexp(float(m), e) + u.ui = unsigned(e + 127) << 23; + u.f = u.f * float(m); + + data[i] = u.ui; + } +} +#endif + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) +inline uint64_t rotateleft64(uint64_t v, int x) +{ +#if defined(_MSC_VER) && !defined(__clang__) + return _rotl64(v, x); +// Apple's Clang 8 is actually vanilla Clang 3.9, there we need to look for +// version 11 instead: https://en.wikipedia.org/wiki/Xcode#Toolchain_versions +#elif defined(__clang__) && ((!defined(__apple_build_version__) && __clang_major__ >= 8) || __clang_major__ >= 11) + return __builtin_rotateleft64(v, x); +#else + return (v << (x & 63)) | (v >> ((64 - x) & 63)); +#endif +} +#endif + +#ifdef SIMD_SSE +static void decodeFilterOctSimd(signed char* data, size_t count) +{ + const __m128 sign = _mm_set1_ps(-0.f); + + for (size_t i = 0; i < count; i += 4) + { + __m128i n4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4])); + + // sign-extends each of x,y in [x y ? ?] with arithmetic shifts + __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 24), 24); + __m128i yf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 24); + + // unpack z; note that z is unsigned so we technically don't need to sign extend it + __m128i zf = _mm_srai_epi32(_mm_slli_epi32(n4, 8), 24); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + __m128 x = _mm_cvtepi32_ps(xf); + __m128 y = _mm_cvtepi32_ps(yf); + __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y))); + + // fixup octahedral coordinates for z<0 + __m128 t = _mm_min_ps(z, _mm_setzero_ps()); + + x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign))); + y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign))); + + // compute normal length & scale + __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))); + __m128 s = _mm_mul_ps(_mm_set1_ps(127.f), _mm_rsqrt_ps(ll)); + + // rounded signed float->int + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); + + // combine xr/yr/zr into final value + __m128i res = _mm_and_si128(n4, _mm_set1_epi32(0xff000000)); + res = _mm_or_si128(res, _mm_and_si128(xr, _mm_set1_epi32(0xff))); + res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(yr, _mm_set1_epi32(0xff)), 8)); + res = _mm_or_si128(res, _mm_slli_epi32(_mm_and_si128(zr, _mm_set1_epi32(0xff)), 16)); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res); + } +} + +static void decodeFilterOctSimd(short* data, size_t count) +{ + const __m128 sign = _mm_set1_ps(-0.f); + + for (size_t i = 0; i < count; i += 4) + { + __m128 n4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4])); + __m128 n4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + __m128i n4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(2, 0, 2, 0))); + + // sign-extends each of x,y in [x y] with arithmetic shifts + __m128i xf = _mm_srai_epi32(_mm_slli_epi32(n4, 16), 16); + __m128i yf = _mm_srai_epi32(n4, 16); + + // unpack z; note that z is unsigned so we don't need to sign extend it + __m128i z4 = _mm_castps_si128(_mm_shuffle_ps(n4_0, n4_1, _MM_SHUFFLE(3, 1, 3, 1))); + __m128i zf = _mm_and_si128(z4, _mm_set1_epi32(0x7fff)); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + __m128 x = _mm_cvtepi32_ps(xf); + __m128 y = _mm_cvtepi32_ps(yf); + __m128 z = _mm_sub_ps(_mm_cvtepi32_ps(zf), _mm_add_ps(_mm_andnot_ps(sign, x), _mm_andnot_ps(sign, y))); + + // fixup octahedral coordinates for z<0 + __m128 t = _mm_min_ps(z, _mm_setzero_ps()); + + x = _mm_add_ps(x, _mm_xor_ps(t, _mm_and_ps(x, sign))); + y = _mm_add_ps(y, _mm_xor_ps(t, _mm_and_ps(y, sign))); + + // compute normal length & scale + __m128 ll = _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z))); + __m128 s = _mm_div_ps(_mm_set1_ps(32767.f), _mm_sqrt_ps(ll)); + + // rounded signed float->int + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); + + // mix x/z and y/0 to make 16-bit unpack easier + __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); + __m128i y0r = _mm_and_si128(yr, _mm_set1_epi32(0xffff)); + + // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w + __m128i res_0 = _mm_unpacklo_epi16(xzr, y0r); + __m128i res_1 = _mm_unpackhi_epi16(xzr, y0r); + + // patch in .w + res_0 = _mm_or_si128(res_0, _mm_and_si128(_mm_castps_si128(n4_0), _mm_set1_epi64x(0xffff000000000000))); + res_1 = _mm_or_si128(res_1, _mm_and_si128(_mm_castps_si128(n4_1), _mm_set1_epi64x(0xffff000000000000))); + + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1); + } +} + +static void decodeFilterQuatSimd(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; i += 4) + { + __m128 q4_0 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 0) * 4])); + __m128 q4_1 = _mm_loadu_ps(reinterpret_cast<float*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + __m128i q4_xy = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(2, 0, 2, 0))); + __m128i q4_zc = _mm_castps_si128(_mm_shuffle_ps(q4_0, q4_1, _MM_SHUFFLE(3, 1, 3, 1))); + + // sign-extends each of x,y in [x y] with arithmetic shifts + __m128i xf = _mm_srai_epi32(_mm_slli_epi32(q4_xy, 16), 16); + __m128i yf = _mm_srai_epi32(q4_xy, 16); + __m128i zf = _mm_srai_epi32(_mm_slli_epi32(q4_zc, 16), 16); + __m128i cf = _mm_srai_epi32(q4_zc, 16); + + // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) + __m128i sf = _mm_or_si128(cf, _mm_set1_epi32(3)); + __m128 ss = _mm_div_ps(_mm_set1_ps(scale), _mm_cvtepi32_ps(sf)); + + // convert x/y/z to [-1..1] (scaled...) + __m128 x = _mm_mul_ps(_mm_cvtepi32_ps(xf), ss); + __m128 y = _mm_mul_ps(_mm_cvtepi32_ps(yf), ss); + __m128 z = _mm_mul_ps(_mm_cvtepi32_ps(zf), ss); + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + __m128 ww = _mm_sub_ps(_mm_set1_ps(1.f), _mm_add_ps(_mm_mul_ps(x, x), _mm_add_ps(_mm_mul_ps(y, y), _mm_mul_ps(z, z)))); + __m128 w = _mm_sqrt_ps(_mm_max_ps(ww, _mm_setzero_ps())); + + __m128 s = _mm_set1_ps(32767.f); + + // rounded signed float->int + __m128i xr = _mm_cvtps_epi32(_mm_mul_ps(x, s)); + __m128i yr = _mm_cvtps_epi32(_mm_mul_ps(y, s)); + __m128i zr = _mm_cvtps_epi32(_mm_mul_ps(z, s)); + __m128i wr = _mm_cvtps_epi32(_mm_mul_ps(w, s)); + + // mix x/z and w/y to make 16-bit unpack easier + __m128i xzr = _mm_or_si128(_mm_and_si128(xr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(zr, 16)); + __m128i wyr = _mm_or_si128(_mm_and_si128(wr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(yr, 16)); + + // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) + __m128i res_0 = _mm_unpacklo_epi16(wyr, xzr); + __m128i res_1 = _mm_unpackhi_epi16(wyr, xzr); + + // store results to stack so that we can rotate using scalar instructions + uint64_t res[4]; + _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[0]), res_0); + _mm_storeu_si128(reinterpret_cast<__m128i*>(&res[2]), res_1); + + // rotate and store + uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]); + + out[0] = rotateleft64(res[0], data[(i + 0) * 4 + 3] << 4); + out[1] = rotateleft64(res[1], data[(i + 1) * 4 + 3] << 4); + out[2] = rotateleft64(res[2], data[(i + 2) * 4 + 3] << 4); + out[3] = rotateleft64(res[3], data[(i + 3) * 4 + 3] << 4); + } +} + +static void decodeFilterExpSimd(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + __m128i v = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i])); + + // decode exponent into 2^x directly + __m128i ef = _mm_srai_epi32(v, 24); + __m128i es = _mm_slli_epi32(_mm_add_epi32(ef, _mm_set1_epi32(127)), 23); + + // decode 24-bit mantissa into floating-point value + __m128i mf = _mm_srai_epi32(_mm_slli_epi32(v, 8), 8); + __m128 m = _mm_cvtepi32_ps(mf); + + __m128 r = _mm_mul_ps(_mm_castsi128_ps(es), m); + + _mm_storeu_ps(reinterpret_cast<float*>(&data[i]), r); + } +} +#endif + +#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64) +inline float32x4_t vsqrtq_f32(float32x4_t x) +{ + float32x4_t r = vrsqrteq_f32(x); + r = vmulq_f32(r, vrsqrtsq_f32(vmulq_f32(r, x), r)); // refine rsqrt estimate + return vmulq_f32(r, x); +} + +inline float32x4_t vdivq_f32(float32x4_t x, float32x4_t y) +{ + float32x4_t r = vrecpeq_f32(y); + r = vmulq_f32(r, vrecpsq_f32(y, r)); // refine rcp estimate + return vmulq_f32(x, r); +} +#endif + +#ifdef SIMD_NEON +static void decodeFilterOctSimd(signed char* data, size_t count) +{ + const int32x4_t sign = vdupq_n_s32(0x80000000); + + for (size_t i = 0; i < count; i += 4) + { + int32x4_t n4 = vld1q_s32(reinterpret_cast<int32_t*>(&data[i * 4])); + + // sign-extends each of x,y in [x y ? ?] with arithmetic shifts + int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 24), 24); + int32x4_t yf = vshrq_n_s32(vshlq_n_s32(n4, 16), 24); + + // unpack z; note that z is unsigned so we technically don't need to sign extend it + int32x4_t zf = vshrq_n_s32(vshlq_n_s32(n4, 8), 24); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + float32x4_t x = vcvtq_f32_s32(xf); + float32x4_t y = vcvtq_f32_s32(yf); + float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y))); + + // fixup octahedral coordinates for z<0 + float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f)); + + x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign)))); + y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); + + // compute normal length & scale + float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); + float32x4_t rl = vrsqrteq_f32(ll); + float32x4_t s = vmulq_f32(vdupq_n_f32(127.f), rl); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); + int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); + int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + + // combine xr/yr/zr into final value + int32x4_t res = vandq_s32(n4, vdupq_n_s32(0xff000000)); + res = vorrq_s32(res, vandq_s32(xr, vdupq_n_s32(0xff))); + res = vorrq_s32(res, vshlq_n_s32(vandq_s32(yr, vdupq_n_s32(0xff)), 8)); + res = vorrq_s32(res, vshlq_n_s32(vandq_s32(zr, vdupq_n_s32(0xff)), 16)); + + vst1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]), res); + } +} + +static void decodeFilterOctSimd(short* data, size_t count) +{ + const int32x4_t sign = vdupq_n_s32(0x80000000); + + for (size_t i = 0; i < count; i += 4) + { + int32x4_t n4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4])); + int32x4_t n4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + int32x4_t n4 = vuzpq_s32(n4_0, n4_1).val[0]; + + // sign-extends each of x,y in [x y] with arithmetic shifts + int32x4_t xf = vshrq_n_s32(vshlq_n_s32(n4, 16), 16); + int32x4_t yf = vshrq_n_s32(n4, 16); + + // unpack z; note that z is unsigned so we don't need to sign extend it + int32x4_t z4 = vuzpq_s32(n4_0, n4_1).val[1]; + int32x4_t zf = vandq_s32(z4, vdupq_n_s32(0x7fff)); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + float32x4_t x = vcvtq_f32_s32(xf); + float32x4_t y = vcvtq_f32_s32(yf); + float32x4_t z = vsubq_f32(vcvtq_f32_s32(zf), vaddq_f32(vabsq_f32(x), vabsq_f32(y))); + + // fixup octahedral coordinates for z<0 + float32x4_t t = vminq_f32(z, vdupq_n_f32(0.f)); + + x = vaddq_f32(x, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(x), sign)))); + y = vaddq_f32(y, vreinterpretq_f32_s32(veorq_s32(vreinterpretq_s32_f32(t), vandq_s32(vreinterpretq_s32_f32(y), sign)))); + + // compute normal length & scale + float32x4_t ll = vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z))); + float32x4_t rl = vrsqrteq_f32(ll); + rl = vmulq_f32(rl, vrsqrtsq_f32(vmulq_f32(rl, ll), rl)); // refine rsqrt estimate + float32x4_t s = vmulq_f32(vdupq_n_f32(32767.f), rl); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); + int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); + int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + + // mix x/z and y/0 to make 16-bit unpack easier + int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); + int32x4_t y0r = vandq_s32(yr, vdupq_n_s32(0xffff)); + + // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w + int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[0]); + int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(xzr), vreinterpretq_s16_s32(y0r)).val[1]); + + // patch in .w + res_0 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_0, res_0); + res_1 = vbslq_s32(vreinterpretq_u32_u64(vdupq_n_u64(0xffff000000000000)), n4_1, res_1); + + vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]), res_0); + vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]), res_1); + } +} + +static void decodeFilterQuatSimd(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; i += 4) + { + int32x4_t q4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4])); + int32x4_t q4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4])); + + // gather both x/y 16-bit pairs in each 32-bit lane + int32x4_t q4_xy = vuzpq_s32(q4_0, q4_1).val[0]; + int32x4_t q4_zc = vuzpq_s32(q4_0, q4_1).val[1]; + + // sign-extends each of x,y in [x y] with arithmetic shifts + int32x4_t xf = vshrq_n_s32(vshlq_n_s32(q4_xy, 16), 16); + int32x4_t yf = vshrq_n_s32(q4_xy, 16); + int32x4_t zf = vshrq_n_s32(vshlq_n_s32(q4_zc, 16), 16); + int32x4_t cf = vshrq_n_s32(q4_zc, 16); + + // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) + int32x4_t sf = vorrq_s32(cf, vdupq_n_s32(3)); + float32x4_t ss = vdivq_f32(vdupq_n_f32(scale), vcvtq_f32_s32(sf)); + + // convert x/y/z to [-1..1] (scaled...) + float32x4_t x = vmulq_f32(vcvtq_f32_s32(xf), ss); + float32x4_t y = vmulq_f32(vcvtq_f32_s32(yf), ss); + float32x4_t z = vmulq_f32(vcvtq_f32_s32(zf), ss); + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + float32x4_t ww = vsubq_f32(vdupq_n_f32(1.f), vaddq_f32(vmulq_f32(x, x), vaddq_f32(vmulq_f32(y, y), vmulq_f32(z, z)))); + float32x4_t w = vsqrtq_f32(vmaxq_f32(ww, vdupq_n_f32(0.f))); + + float32x4_t s = vdupq_n_f32(32767.f); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const float32x4_t fsnap = vdupq_n_f32(3 << 22); + + int32x4_t xr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(x, s), fsnap)); + int32x4_t yr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(y, s), fsnap)); + int32x4_t zr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(z, s), fsnap)); + int32x4_t wr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(w, s), fsnap)); + + // mix x/z and w/y to make 16-bit unpack easier + int32x4_t xzr = vorrq_s32(vandq_s32(xr, vdupq_n_s32(0xffff)), vshlq_n_s32(zr, 16)); + int32x4_t wyr = vorrq_s32(vandq_s32(wr, vdupq_n_s32(0xffff)), vshlq_n_s32(yr, 16)); + + // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) + int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[0]); + int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(wyr), vreinterpretq_s16_s32(xzr)).val[1]); + + // rotate and store + uint64_t* out = (uint64_t*)&data[i * 4]; + + out[0] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 0), vgetq_lane_s32(cf, 0) << 4); + out[1] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_0), 1), vgetq_lane_s32(cf, 1) << 4); + out[2] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 0), vgetq_lane_s32(cf, 2) << 4); + out[3] = rotateleft64(vgetq_lane_u64(vreinterpretq_u64_s32(res_1), 1), vgetq_lane_s32(cf, 3) << 4); + } +} + +static void decodeFilterExpSimd(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + int32x4_t v = vld1q_s32(reinterpret_cast<int32_t*>(&data[i])); + + // decode exponent into 2^x directly + int32x4_t ef = vshrq_n_s32(v, 24); + int32x4_t es = vshlq_n_s32(vaddq_s32(ef, vdupq_n_s32(127)), 23); + + // decode 24-bit mantissa into floating-point value + int32x4_t mf = vshrq_n_s32(vshlq_n_s32(v, 8), 8); + float32x4_t m = vcvtq_f32_s32(mf); + + float32x4_t r = vmulq_f32(vreinterpretq_f32_s32(es), m); + + vst1q_f32(reinterpret_cast<float*>(&data[i]), r); + } +} +#endif + +#ifdef SIMD_WASM +static void decodeFilterOctSimd(signed char* data, size_t count) +{ + const v128_t sign = wasm_f32x4_splat(-0.f); + + for (size_t i = 0; i < count; i += 4) + { + v128_t n4 = wasm_v128_load(&data[i * 4]); + + // sign-extends each of x,y in [x y ? ?] with arithmetic shifts + v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 24), 24); + v128_t yf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 24); + + // unpack z; note that z is unsigned so we technically don't need to sign extend it + v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 8), 24); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + v128_t x = wasm_f32x4_convert_i32x4(xf); + v128_t y = wasm_f32x4_convert_i32x4(yf); + v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y))); + + // fixup octahedral coordinates for z<0 + // note: i32x4_min with 0 is equvalent to f32x4_min + v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0)); + + x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign))); + y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign))); + + // compute normal length & scale + v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))); + v128_t s = wasm_f32x4_div(wasm_f32x4_splat(127.f), wasm_f32x4_sqrt(ll)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); + + // combine xr/yr/zr into final value + v128_t res = wasm_v128_and(n4, wasm_i32x4_splat(0xff000000)); + res = wasm_v128_or(res, wasm_v128_and(xr, wasm_i32x4_splat(0xff))); + res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(yr, wasm_i32x4_splat(0xff)), 8)); + res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(zr, wasm_i32x4_splat(0xff)), 16)); + + wasm_v128_store(&data[i * 4], res); + } +} + +static void decodeFilterOctSimd(short* data, size_t count) +{ + const v128_t sign = wasm_f32x4_splat(-0.f); + const v128_t zmask = wasm_i32x4_splat(0x7fff); + + for (size_t i = 0; i < count; i += 4) + { + v128_t n4_0 = wasm_v128_load(&data[(i + 0) * 4]); + v128_t n4_1 = wasm_v128_load(&data[(i + 2) * 4]); + + // gather both x/y 16-bit pairs in each 32-bit lane + v128_t n4 = wasmx_unziplo_v32x4(n4_0, n4_1); + + // sign-extends each of x,y in [x y] with arithmetic shifts + v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(n4, 16), 16); + v128_t yf = wasm_i32x4_shr(n4, 16); + + // unpack z; note that z is unsigned so we don't need to sign extend it + v128_t z4 = wasmx_unziphi_v32x4(n4_0, n4_1); + v128_t zf = wasm_v128_and(z4, zmask); + + // convert x and y to floats and reconstruct z; this assumes zf encodes 1.f at the same bit count + v128_t x = wasm_f32x4_convert_i32x4(xf); + v128_t y = wasm_f32x4_convert_i32x4(yf); + v128_t z = wasm_f32x4_sub(wasm_f32x4_convert_i32x4(zf), wasm_f32x4_add(wasm_f32x4_abs(x), wasm_f32x4_abs(y))); + + // fixup octahedral coordinates for z<0 + // note: i32x4_min with 0 is equvalent to f32x4_min + v128_t t = wasm_i32x4_min(z, wasm_i32x4_splat(0)); + + x = wasm_f32x4_add(x, wasm_v128_xor(t, wasm_v128_and(x, sign))); + y = wasm_f32x4_add(y, wasm_v128_xor(t, wasm_v128_and(y, sign))); + + // compute normal length & scale + v128_t ll = wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z))); + v128_t s = wasm_f32x4_div(wasm_f32x4_splat(32767.f), wasm_f32x4_sqrt(ll)); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); + + // mix x/z and y/0 to make 16-bit unpack easier + v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); + v128_t y0r = wasm_v128_and(yr, wasm_i32x4_splat(0xffff)); + + // pack x/y/z using 16-bit unpacks; note that this has 0 where we should have .w + v128_t res_0 = wasmx_unpacklo_v16x8(xzr, y0r); + v128_t res_1 = wasmx_unpackhi_v16x8(xzr, y0r); + + // patch in .w + res_0 = wasm_v128_or(res_0, wasm_v128_and(n4_0, wasm_i64x2_splat(0xffff000000000000))); + res_1 = wasm_v128_or(res_1, wasm_v128_and(n4_1, wasm_i64x2_splat(0xffff000000000000))); + + wasm_v128_store(&data[(i + 0) * 4], res_0); + wasm_v128_store(&data[(i + 2) * 4], res_1); + } +} + +static void decodeFilterQuatSimd(short* data, size_t count) +{ + const float scale = 1.f / sqrtf(2.f); + + for (size_t i = 0; i < count; i += 4) + { + v128_t q4_0 = wasm_v128_load(&data[(i + 0) * 4]); + v128_t q4_1 = wasm_v128_load(&data[(i + 2) * 4]); + + // gather both x/y 16-bit pairs in each 32-bit lane + v128_t q4_xy = wasmx_unziplo_v32x4(q4_0, q4_1); + v128_t q4_zc = wasmx_unziphi_v32x4(q4_0, q4_1); + + // sign-extends each of x,y in [x y] with arithmetic shifts + v128_t xf = wasm_i32x4_shr(wasm_i32x4_shl(q4_xy, 16), 16); + v128_t yf = wasm_i32x4_shr(q4_xy, 16); + v128_t zf = wasm_i32x4_shr(wasm_i32x4_shl(q4_zc, 16), 16); + v128_t cf = wasm_i32x4_shr(q4_zc, 16); + + // get a floating-point scaler using zc with bottom 2 bits set to 1 (which represents 1.f) + v128_t sf = wasm_v128_or(cf, wasm_i32x4_splat(3)); + v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(scale), wasm_f32x4_convert_i32x4(sf)); + + // convert x/y/z to [-1..1] (scaled...) + v128_t x = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(xf), ss); + v128_t y = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(yf), ss); + v128_t z = wasm_f32x4_mul(wasm_f32x4_convert_i32x4(zf), ss); + + // reconstruct w as a square root; we clamp to 0.f to avoid NaN due to precision errors + // note: i32x4_max with 0 is equivalent to f32x4_max + v128_t ww = wasm_f32x4_sub(wasm_f32x4_splat(1.f), wasm_f32x4_add(wasm_f32x4_mul(x, x), wasm_f32x4_add(wasm_f32x4_mul(y, y), wasm_f32x4_mul(z, z)))); + v128_t w = wasm_f32x4_sqrt(wasm_i32x4_max(ww, wasm_i32x4_splat(0))); + + v128_t s = wasm_f32x4_splat(32767.f); + + // fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value + // note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction + const v128_t fsnap = wasm_f32x4_splat(3 << 22); + + v128_t xr = wasm_f32x4_add(wasm_f32x4_mul(x, s), fsnap); + v128_t yr = wasm_f32x4_add(wasm_f32x4_mul(y, s), fsnap); + v128_t zr = wasm_f32x4_add(wasm_f32x4_mul(z, s), fsnap); + v128_t wr = wasm_f32x4_add(wasm_f32x4_mul(w, s), fsnap); + + // mix x/z and w/y to make 16-bit unpack easier + v128_t xzr = wasm_v128_or(wasm_v128_and(xr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(zr, 16)); + v128_t wyr = wasm_v128_or(wasm_v128_and(wr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(yr, 16)); + + // pack x/y/z/w using 16-bit unpacks; we pack wxyz by default (for qc=0) + v128_t res_0 = wasmx_unpacklo_v16x8(wyr, xzr); + v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr); + + // compute component index shifted left by 4 (and moved into i32x4 slot) + // TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449 + volatile v128_t cm = wasm_i32x4_shl(cf, 4); + + // rotate and store + uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]); + + out[0] = rotateleft64(wasm_i64x2_extract_lane(res_0, 0), wasm_i32x4_extract_lane(cm, 0)); + out[1] = rotateleft64(wasm_i64x2_extract_lane(res_0, 1), wasm_i32x4_extract_lane(cm, 1)); + out[2] = rotateleft64(wasm_i64x2_extract_lane(res_1, 0), wasm_i32x4_extract_lane(cm, 2)); + out[3] = rotateleft64(wasm_i64x2_extract_lane(res_1, 1), wasm_i32x4_extract_lane(cm, 3)); + } +} + +static void decodeFilterExpSimd(unsigned int* data, size_t count) +{ + for (size_t i = 0; i < count; i += 4) + { + v128_t v = wasm_v128_load(&data[i]); + + // decode exponent into 2^x directly + v128_t ef = wasm_i32x4_shr(v, 24); + v128_t es = wasm_i32x4_shl(wasm_i32x4_add(ef, wasm_i32x4_splat(127)), 23); + + // decode 24-bit mantissa into floating-point value + v128_t mf = wasm_i32x4_shr(wasm_i32x4_shl(v, 8), 8); + v128_t m = wasm_f32x4_convert_i32x4(mf); + + v128_t r = wasm_f32x4_mul(es, m); + + wasm_v128_store(&data[i], r); + } +} +#endif + +} // namespace meshopt + +void meshopt_decodeFilterOct(void* buffer, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_count % 4 == 0); + assert(vertex_size == 4 || vertex_size == 8); + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + if (vertex_size == 4) + decodeFilterOctSimd(static_cast<signed char*>(buffer), vertex_count); + else + decodeFilterOctSimd(static_cast<short*>(buffer), vertex_count); +#else + if (vertex_size == 4) + decodeFilterOct(static_cast<signed char*>(buffer), vertex_count); + else + decodeFilterOct(static_cast<short*>(buffer), vertex_count); +#endif +} + +void meshopt_decodeFilterQuat(void* buffer, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_count % 4 == 0); + assert(vertex_size == 8); + (void)vertex_size; + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + decodeFilterQuatSimd(static_cast<short*>(buffer), vertex_count); +#else + decodeFilterQuat(static_cast<short*>(buffer), vertex_count); +#endif +} + +void meshopt_decodeFilterExp(void* buffer, size_t vertex_count, size_t vertex_size) +{ + using namespace meshopt; + + assert(vertex_count % 4 == 0); + assert(vertex_size % 4 == 0); + +#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM) + decodeFilterExpSimd(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4)); +#else + decodeFilterExp(static_cast<unsigned int*>(buffer), vertex_count * (vertex_size / 4)); +#endif +} + +#undef SIMD_SSE +#undef SIMD_NEON +#undef SIMD_WASM diff --git a/thirdparty/meshoptimizer/vfetchanalyzer.cpp b/thirdparty/meshoptimizer/vfetchanalyzer.cpp new file mode 100644 index 0000000000..51dca873f8 --- /dev/null +++ b/thirdparty/meshoptimizer/vfetchanalyzer.cpp @@ -0,0 +1,58 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +meshopt_VertexFetchStatistics meshopt_analyzeVertexFetch(const unsigned int* indices, size_t index_count, size_t vertex_count, size_t vertex_size) +{ + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + meshopt_VertexFetchStatistics result = {}; + + unsigned char* vertex_visited = allocator.allocate<unsigned char>(vertex_count); + memset(vertex_visited, 0, vertex_count); + + const size_t kCacheLine = 64; + const size_t kCacheSize = 128 * 1024; + + // simple direct mapped cache; on typical mesh data this is close to 4-way cache, and this model is a gross approximation anyway + size_t cache[kCacheSize / kCacheLine] = {}; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + vertex_visited[index] = 1; + + size_t start_address = index * vertex_size; + size_t end_address = start_address + vertex_size; + + size_t start_tag = start_address / kCacheLine; + size_t end_tag = (end_address + kCacheLine - 1) / kCacheLine; + + assert(start_tag < end_tag); + + for (size_t tag = start_tag; tag < end_tag; ++tag) + { + size_t line = tag % (sizeof(cache) / sizeof(cache[0])); + + // we store +1 since cache is filled with 0 by default + result.bytes_fetched += (cache[line] != tag + 1) * kCacheLine; + cache[line] = tag + 1; + } + } + + size_t unique_vertex_count = 0; + + for (size_t i = 0; i < vertex_count; ++i) + unique_vertex_count += vertex_visited[i]; + + result.overfetch = unique_vertex_count == 0 ? 0 : float(result.bytes_fetched) / float(unique_vertex_count * vertex_size); + + return result; +} diff --git a/thirdparty/meshoptimizer/vfetchoptimizer.cpp b/thirdparty/meshoptimizer/vfetchoptimizer.cpp new file mode 100644 index 0000000000..465d6df5ca --- /dev/null +++ b/thirdparty/meshoptimizer/vfetchoptimizer.cpp @@ -0,0 +1,74 @@ +// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details +#include "meshoptimizer.h" + +#include <assert.h> +#include <string.h> + +size_t meshopt_optimizeVertexFetchRemap(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count) +{ + assert(index_count % 3 == 0); + + memset(destination, -1, vertex_count * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + if (destination[index] == ~0u) + { + destination[index] = next_vertex++; + } + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} + +size_t meshopt_optimizeVertexFetch(void* destination, unsigned int* indices, size_t index_count, const void* vertices, size_t vertex_count, size_t vertex_size) +{ + assert(index_count % 3 == 0); + assert(vertex_size > 0 && vertex_size <= 256); + + meshopt_Allocator allocator; + + // support in-place optimization + if (destination == vertices) + { + unsigned char* vertices_copy = allocator.allocate<unsigned char>(vertex_count * vertex_size); + memcpy(vertices_copy, vertices, vertex_count * vertex_size); + vertices = vertices_copy; + } + + // build vertex remap table + unsigned int* vertex_remap = allocator.allocate<unsigned int>(vertex_count); + memset(vertex_remap, -1, vertex_count * sizeof(unsigned int)); + + unsigned int next_vertex = 0; + + for (size_t i = 0; i < index_count; ++i) + { + unsigned int index = indices[i]; + assert(index < vertex_count); + + unsigned int& remap = vertex_remap[index]; + + if (remap == ~0u) // vertex was not added to destination VB + { + // add vertex + memcpy(static_cast<unsigned char*>(destination) + next_vertex * vertex_size, static_cast<const unsigned char*>(vertices) + index * vertex_size, vertex_size); + + remap = next_vertex++; + } + + // modify indices in place + indices[i] = remap; + } + + assert(next_vertex <= vertex_count); + + return next_vertex; +} diff --git a/thirdparty/rvo2/src/API.h b/thirdparty/rvo2/API.h index c64efb452c..c64efb452c 100644 --- a/thirdparty/rvo2/src/API.h +++ b/thirdparty/rvo2/API.h diff --git a/thirdparty/rvo2/src/Agent.cpp b/thirdparty/rvo2/Agent.cpp index 851d780758..851d780758 100644 --- a/thirdparty/rvo2/src/Agent.cpp +++ b/thirdparty/rvo2/Agent.cpp diff --git a/thirdparty/rvo2/src/Agent.h b/thirdparty/rvo2/Agent.h index 16f75a08f6..16f75a08f6 100644 --- a/thirdparty/rvo2/src/Agent.h +++ b/thirdparty/rvo2/Agent.h diff --git a/thirdparty/rvo2/src/Definitions.h b/thirdparty/rvo2/Definitions.h index a73aca9908..a73aca9908 100644 --- a/thirdparty/rvo2/src/Definitions.h +++ b/thirdparty/rvo2/Definitions.h diff --git a/thirdparty/rvo2/src/KdTree.cpp b/thirdparty/rvo2/KdTree.cpp index bc224614f0..bc224614f0 100644 --- a/thirdparty/rvo2/src/KdTree.cpp +++ b/thirdparty/rvo2/KdTree.cpp diff --git a/thirdparty/rvo2/src/KdTree.h b/thirdparty/rvo2/KdTree.h index 1dbad00ea4..1dbad00ea4 100644 --- a/thirdparty/rvo2/src/KdTree.h +++ b/thirdparty/rvo2/KdTree.h diff --git a/thirdparty/rvo2/src/Vector3.h b/thirdparty/rvo2/Vector3.h index 8c8835c865..8c8835c865 100644 --- a/thirdparty/rvo2/src/Vector3.h +++ b/thirdparty/rvo2/Vector3.h |